Hi, I trained a Levenshtein Transformer NMT model for German to English according to the instructions by fairseq and now I'm trying to use your code to generate translations with constraints but I get errors. I saw you're using fairseq version 0.8.0 so I thought it might be some problem with incompatible versions but I tried training with versions 0.10.0 and 0.9.0 too and still get errors. Version 0.8.0 had no translation_lev task at all so that didn't work either. What am I missing?
This is the command I used for training:
fairseq-train data-bin/prepared_data \
--save-dir checkpoints \
--ddp-backend=legacy_ddp \
--task translation_lev \
--criterion nat_loss \
--arch levenshtein_transformer \
--noise random_delete \
--share-all-embeddings \
--optimizer adam --adam-betas '(0.9,0.98)' \
--lr 0.0002 --lr-scheduler reduce_lr_on_plateau \
--stop-min-lr '1e-09' --warmup-updates 10000 \
--warmup-init-lr '1e-07' --label-smoothing 0.1 \
--dropout 0.3 --weight-decay 0.01 \
--decoder-learned-pos \
--encoder-learned-pos \
--apply-bert-init \
--log-format 'simple' --log-interval 50 \
--log-file log \
--fixed-validation-seed 7 \
--max-tokens 2048 \
--save-interval-updates 4000 \
--max-update 300000 \
--patience 4 \
--skip-invalid-size-inputs-valid-test
python interactive_with_constraints.py \
data-bin/prepared_data \
-s de -t en \
--input data/test_three.de \
--task translation_lev \
--path checkpoints/checkpoint_best.pt \
--iter-decode-max-iter 9 \
--iter-decode-eos-penalty 0 \
--beam 1 \
--print-step \
--batch-size 400 \
--buffer-size 4000 \
--preserve-constraint
Namespace(allow_insertion_constraint=False, beam=1, bpe=None, buffer_size=4000, cpu=False, criterion='cross_entropy', data='/content/drive/MyDrive/susanto_model/data-bin/prepared_data', dataset_impl=None, decoding_format=None, diverse_beam_groups=-1, diverse_beam_strength=0.5, empty_cache_freq=0, force_anneal=None, fp16=False, fp16_init_scale=128, fp16_scale_tolerance=0.0, fp16_scale_window=None, gen_subset='test', input='/content/drive/MyDrive/susanto_model/data/test_three.de', iter_decode_eos_penalty=0.0, iter_decode_force_max_iter=False, iter_decode_max_iter=9, lazy_load=False, left_pad_source='True', left_pad_target='False', lenpen=1, load_alignments=False, log_format=None, log_interval=1000, lr_scheduler='fixed', lr_shrink=0.1, match_source_len=False, max_len_a=0, max_len_b=200, max_sentences=400, max_source_positions=1024, max_target_positions=1024, max_tokens=None, memory_efficient_fp16=False, min_len=1, min_loss_scale=0.0001, model_overrides='{}', momentum=0.99, nbest=1, no_beamable_mm=False, no_early_stop=False, no_progress_bar=False, no_repeat_ngram_size=0, noise='random_delete', num_shards=1, num_workers=1, optimizer='nag', path='/content/drive/MyDrive/susanto_model/checkpoints_susanto/checkpoint_best.pt', prefix_size=0, preserve_constraint=True, print_alignment=False, print_step=True, quiet=False, raw_text=False, remove_bpe=None, replace_unk=None, required_batch_size_multiple=8, results_path=None, sacrebleu=False, sampling=False, sampling_topk=-1, sampling_topp=-1.0, score_reference=False, seed=1, shard_id=0, skip_invalid_size_inputs_valid_test=False, source_lang='de', target_lang='en', task='translation_lev', tbmf_wrapper=False, temperature=1.0, tensorboard_logdir='', threshold_loss_scale=None, tokenizer=None, unkpen=0, unnormalized=False, upsample_primary=1, user_dir=None, warmup_updates=0, weight_decay=0.0)
| [de] dictionary: 8544 types
| [en] dictionary: 8544 types
| loading model(s) from checkpoints/checkpoint_best.pt
Traceback (most recent call last):
File "interactive_with_constraints.py", line 234, in <module>
cli_main()
File "interactive_with_constraints.py", line 230, in cli_main
main(args)
File "interactive_with_constraints.py", line 101, in main
task=task,
File "/content/constrained-levt/fairseq/checkpoint_utils.py", line 167, in load_model_ensemble
ensemble, args, _task = load_model_ensemble_and_task(filenames, arg_overrides, task)
File "/content/constrained-levt/fairseq/checkpoint_utils.py", line 178, in load_model_ensemble_and_task
state = load_checkpoint_to_cpu(filename, arg_overrides)
File "/content/constrained-levt/fairseq/checkpoint_utils.py", line 154, in load_checkpoint_to_cpu
state = _upgrade_state_dict(state)
File "/content/constrained-levt/fairseq/checkpoint_utils.py", line 323, in _upgrade_state_dict
state['args'].task = 'translation'
AttributeError: 'NoneType' object has no attribute 'task'
Traceback (most recent call last):
File "interactive_with_constraints.py", line 234, in <module>
cli_main()
File "interactive_with_constraints.py", line 230, in cli_main
main(args)
File "interactive_with_constraints.py", line 101, in main
task=task,
File "/content/constrained-levt/fairseq/checkpoint_utils.py", line 167, in load_model_ensemble
ensemble, args, _task = load_model_ensemble_and_task(filenames, arg_overrides, task)
File "/content/constrained-levt/fairseq/checkpoint_utils.py", line 186, in load_model_ensemble_and_task
model.load_state_dict(state['model'], strict=True)
File "/content/constrained-levt/fairseq/models/fairseq_model.py", line 69, in load_state_dict
return super().load_state_dict(state_dict, strict)
File "/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py", line 1407, in load_state_dict
self.__class__.__name__, "\n\t".join(error_msgs)))
RuntimeError: Error(s) in loading state_dict for LevenshteinTransformerModel:
Missing key(s) in state_dict: "encoder.layers.0.self_attn.in_proj_weight", "encoder.layers.0.self_attn.in_proj_bias", "encoder.layers.1.self_attn.in_proj_weight", "encoder.layers.1.self_attn.in_proj_bias", "encoder.layers.2.self_attn.in_proj_weight", [...], "decoder.layers.5.encoder_attn.in_proj_bias".
Unexpected key(s) in state_dict: "encoder.layers.0.self_attn.k_proj.weight", "encoder.layers.0.self_attn.k_proj.bias", "encoder.layers.0.self_attn.v_proj.weight", "encoder.layers.0.self_attn.v_proj.bias", "encoder.layers.0.self_attn.q_proj.weight", "encoder.layers.0.self_attn.q_proj.bias", "encoder.layers.1.self_attn.k_proj.weight", "encoder.layers.1.self_attn.k_proj.bias", "encoder.layers.1.self_attn.v_proj.weight", "encoder.layers.1.self_attn.v_proj.bias", "encoder.layers.1.self_attn.q_proj.weight", "encoder.layers.1.self_attn.q_proj.bias", [...] "decoder.layers.5.encoder_attn.v_proj.bias", "decoder.layers.5.encoder_attn.q_proj.weight", "decoder.layers.5.encoder_attn.q_proj.bias".