Using backend: pytorch
2022-02-23 15:51:43.263 | Level 20 | dee.tasks.base_task:logging:196 - ====================Check Setting Validity====================
2022-02-23 15:51:43.264 | Level 20 | dee.tasks.base_task:logging:196 - Setting: {
"data_dir": "./Data",
"model_dir": "./Exps/jiao/Model",
"output_dir": "./Exps/jiao/Output",
"bert_model": "bert",
"train_file_name": "typed_train.json",
"dev_file_name": "typed_dev.json",
"test_file_name": "typed_test.json",
"max_seq_len": 128,
"train_batch_size": 16,
"eval_batch_size": 2,
"learning_rate": 0.0001,
"num_train_epochs": 10,
"warmup_proportion": 0.1,
"no_cuda": false,
"local_rank": -1,
"seed": 99,
"gradient_accumulation_steps": 8,
"optimize_on_cpu": false,
"fp16": false,
"loss_scale": 128,
"cpt_file_name": "Doc2EDAG",
"summary_dir_name": "./Exps/jiao/Summary/Summary",
"event_type_template": "jiao",
"max_sent_len": 128,
"max_sent_num": 64,
"use_lr_scheduler": false,
"lr_scheduler_step": 20,
"use_bert": false,
"use_biaffine_ner": false,
"use_masked_crf": false,
"only_master_logging": true,
"resume_latest_cpt": true,
"remove_last_cpt": false,
"save_best_cpt": false,
"model_type": "Doc2EDAG",
"rearrange_sent": false,
"use_crf_layer": true,
"min_teacher_prob": 0.1,
"schedule_epoch_start": 10,
"schedule_epoch_length": 10,
"loss_lambda": 0.05,
"loss_gamma": 1.0,
"add_greedy_dec": true,
"use_token_role": true,
"seq_reduce_type": "MaxPooling",
"hidden_size": 768,
"dropout": 0.1,
"ff_size": 1024,
"num_tf_layers": 4,
"use_path_mem": true,
"use_scheduled_sampling": true,
"use_doc_enc": true,
"neg_field_loss_scaling": 3.0,
"gcn_layer": 3,
"ner_num_tf_layers": 4,
"num_lstm_layers": 1,
"use_span_lstm": false,
"span_lstm_num_layer": 1,
"use_span_att": false,
"span_att_heads": 4,
"dot_att_head": 4,
"comb_samp_min_num_span": 2,
"comb_samp_num_samp": 100,
"comb_samp_max_samp_times": 1000,
"use_span_lstm_projection": false,
"biaffine_hidden_size": 256,
"triaffine_hidden_size": 150,
"vi_max_iter": 3,
"biaffine_hard_threshold": 0.5,
"event_cls_loss_weight": 1.0,
"smooth_attn_loss_weight": 1.0,
"combination_loss_weight": 1.0,
"comb_cls_loss_weight": 1.0,
"comb_sim_loss_weight": 1.0,
"span_cls_loss_weight": 1.0,
"use_comb_cls_pred": false,
"role_loss_weight": 1.0,
"event_relevant_combination": false,
"run_mode": "full",
"drop_irr_ents": false,
"at_least_one_comb": true,
"include_complementary_ents": true,
"filtered_data_types": "o2o",
"ent_context_window": 20,
"biaffine_grad_clip": false,
"global_grad_clip": false,
"ent_fix_mode": "n",
"span_mention_sum": false,
"add_adj_mat_weight_bias": false,
"optimizer": "adam",
"num_triggers": 1,
"eval_num_triggers": 1,
"with_left_trigger": true,
"with_all_one_trigger_comb": false,
"directed_trigger_graph": false,
"adj_sim_head": 1,
"adj_sim_agg": "mean",
"adj_sim_split_head": false,
"num_triggering_steps": 1,
"use_shared_dropout_proj": false,
"use_layer_norm_b4_biaffine": false,
"remove_mention_type_layer_norm": false,
"use_token_drop": false,
"guessing_decode": false,
"max_clique_decode": true,
"try_to_make_up": false,
"self_loop": false,
"incremental_min_conn": -1,
"use_span_self_att": false,
"use_smooth_span_self_att": false,
"ment_feature_type": "plus",
"ment_type_hidden_size": 32,
"num_mention_lstm_layer": 1,
"gat_alpha": 0.2,
"gat_num_heads": 4,
"gat_num_layers": 2,
"role_by_encoding": false,
"use_mention_lstm": false,
"mlp_before_adj_measure": false,
"use_field_cls_mlp": false,
"build_dense_connected_doc_graph": false,
"stop_gradient": false,
"doc_lang": "zh"
}
2022-02-23 15:51:43.264 | Level 20 | dee.tasks.base_task:logging:196 - ====================Init Device====================
2022-02-23 15:51:43.296 | Level 20 | dee.tasks.base_task:logging:196 - device cuda n_gpu 2 distributed training False
2022-02-23 15:51:43.296 | Level 20 | dee.tasks.base_task:logging:196 - ====================Reset Random Seed to 99====================
2022-02-23 15:51:43.297 | Level 20 | dee.tasks.base_task:logging:196 - Init Summary Writer
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:516: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint8 = np.dtype([("qint8", np.int8, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:517: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint8 = np.dtype([("quint8", np.uint8, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:518: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint16 = np.dtype([("qint16", np.int16, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:519: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_quint16 = np.dtype([("quint16", np.uint16, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:520: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
_np_qint32 = np.dtype([("qint32", np.int32, 1)])
/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/tensorflow/python/framework/dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
np_resource = np.dtype([("resource", np.ubyte, 1)])
2022-02-23 15:51:44.384 | Level 20 | dee.tasks.base_task:logging:196 - Writing summary into ./Exps/jiao/Summary/Summary-Feb23_15-51-43
2022-02-23 15:51:44.384 | Level 20 | dee.tasks.base_task:logging:196 - Initializing DEETask
file bert/config.json not found
The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization.
The tokenizer class you load from this checkpoint is 'BertTokenizer'.
The class this function is called from is 'BertTokenizerForDocEE'.
[('Build', ['CompanyName', 'Product', 'Address', 'StartTime', 'Country'], {1: ['CompanyName'], 2: ['CompanyName', 'StartTime'], 3: ['CompanyName', 'Product', 'StartTime'], 4: ['Address', 'CompanyName', 'Product', 'StartTime'], 5: ['Address', 'CompanyName', 'Country', 'Product', 'StartTime'], 'all': ['CompanyName', 'Product', 'Address', 'StartTime', 'Country']}, 5), ('Violated', ['CompanyName', 'Law', 'StartTime', 'Address', 'Character'], {1: ['CompanyName'], 2: ['CompanyName', 'StartTime'], 3: ['Character', 'CompanyName', 'StartTime'], 4: ['Address', 'Character', 'CompanyName', 'StartTime'], 5: ['Address', 'Character', 'CompanyName', 'Law', 'StartTime'], 'all': ['CompanyName', 'Law', 'StartTime', 'Address', 'Character']}, 5)]
2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.token_embedding.weight torch.Size([21128, 768]) 16226304
2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.pos_embedding.weight torch.Size([128, 768]) 98304
2022-02-23 15:51:44.651 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.layer_norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_embedding.layer_norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.652 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.653 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.654 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.655 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.656 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.657 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.658 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.659 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.660 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.661 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.662 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.token_encoder.norm.betatorch.Size([768]) 768
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.trans_mat torch.Size([17, 17]) 289
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.hidden2tag.weight torch.Size([17, 768]) 13056
2022-02-23 15:51:44.663 | INFO | dee.tasks.dee_task:init:377 - Trainable: ner_model.crf_layer.hidden2tag.bias torch.Size([17]) 17
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_query torch.Size([1, 768]) 768
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_cls.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.event_cls.bias torch.Size([2]) 2
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.0.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.0.bias torch.Size([2]) 2
2022-02-23 15:51:44.664 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.1.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.1.bias torch.Size([2]) 2
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.2.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.2.bias torch.Size([2]) 2
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.3.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.3.bias torch.Size([2]) 2
2022-02-23 15:51:44.665 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.4.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_cls_list.4.bias torch.Size([2]) 2
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.0 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.1 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.2 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.3 torch.Size([1, 768]) 768
2022-02-23 15:51:44.666 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.0.field_queries.4 torch.Size([1, 768]) 768
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_query torch.Size([1, 768]) 768
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_cls.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.event_cls.bias torch.Size([2]) 2
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.0.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.0.bias torch.Size([2]) 2
2022-02-23 15:51:44.667 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.1.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.1.bias torch.Size([2]) 2
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.2.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.2.bias torch.Size([2]) 2
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.3.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.3.bias torch.Size([2]) 2
2022-02-23 15:51:44.668 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.4.weight torch.Size([2, 768]) 1536
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_cls_list.4.bias torch.Size([2]) 2
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.0 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.1 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.2 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.3 torch.Size([1, 768]) 768
2022-02-23 15:51:44.669 | INFO | dee.tasks.dee_task:init:377 - Trainable: event_tables.1.field_queries.4 torch.Size([1, 768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.embedding.weighttorch.Size([64, 768]) 49152
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.layer_norm.gammatorch.Size([768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: sent_pos_encoder.layer_norm.betatorch.Size([768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.embedding.weight torch.Size([15, 768]) 11520
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.layer_norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.670 | INFO | dee.tasks.dee_task:init:377 - Trainable: ment_type_encoder.layer_norm.betatorch.Size([768]) 768
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.671 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.672 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.673 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.674 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.675 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.676 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.677 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.678 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.679 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.680 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.681 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: doc_context_encoder.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.682 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.683 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.0.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.684 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.685 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.686 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.1.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.687 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.688 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.689 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.2.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.0.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.0.bias torch.Size([768]) 768
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.1.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.690 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.1.bias torch.Size([768]) 768
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.2.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.2.bias torch.Size([768]) 768
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.3.weight torch.Size([768, 768]) 589824
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.self_attn.linears.3.bias torch.Size([768]) 768
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_1.weight torch.Size([1024, 768]) 786432
2022-02-23 15:51:44.691 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_1.bias torch.Size([1024]) 1024
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_2.weight torch.Size([768, 1024]) 786432
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.feed_forward.w_2.bias torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.0.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.0.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.1.norm.gamma torch.Size([768]) 768
2022-02-23 15:51:44.692 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.layers.3.sublayer.1.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.norm.gammatorch.Size([768]) 768
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:377 - Trainable: field_context_encoder.norm.beta torch.Size([768]) 768
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:389 - #Total Trainable Parameters: 63716682
2022-02-23 15:51:44.693 | INFO | dee.tasks.dee_task:init:390 - #Total Fixed Parameters: 0
2022-02-23 15:51:44.693 | Level 20 | dee.tasks.base_task:logging:196 - ====================Decorate Model====================
Traceback (most recent call last):
File "/home/jiaojiaxin/DocEE/run_dee_task.py", line 208, in
parallel_decorate=in_argv.parallel_decorate,
File "/home/jiaojiaxin/DocEE/dee/tasks/dee_task.py", line 392, in init
self._decorate_model(parallel_decorate=parallel_decorate)
File "/home/jiaojiaxin/DocEE/dee/tasks/base_task.py", line 474, in _decorate_model
self.model.to(self.device)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 612, in to
return self._apply(convert)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply
module._apply(fn)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply
module._apply(fn)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 359, in _apply
module._apply(fn)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 381, in _apply
param_applied = fn(param)
File "/root/anaconda3/envs/zhtorch/lib/python3.6/site-packages/torch/nn/modules/module.py", line 610, in convert
return t.to(device, dtype if t.is_floating_point() else None, non_blocking)
RuntimeError: CUDA error: out of memory