hi there, i'm trying to quantize a finetuned version of gptj trought the https://github.com/AlpinDale/gptq-gptj repo.
To quantize the model i use this command:
CUDA_VISIBLE_DEVICES=0 python gptj.py ../finetuned6B/checkpoint-3000/ c4 --wbits 4 --save GPTJQ.pt
the process complete successfully and the file GPTJQ.pt is produced. The only warning i get is:
Token indices sequence length is longer than the specified maximum sequence length for this model (3403 > 2048). Running this sequence through the model will result in indexing errors.
When i run the inference trough this command:
CUDA_VISIBLE_DEVICES=0 python gptj-inference.py EleutherAI/gpt-j-6b --wbits 4 --load GPTJQ.pt --text "Hello"
i get the following error. what am i doing wrong?
thank you very much for any help!
the error:
CUDA extension not installed.
Loading model ...
Traceback (most recent call last):
File "gptj-inference.py", line 120, in
model = load_quant(args.model, args.load, args.wbits)
File "gptj-inference.py", line 55, in load_quant
model.load_state_dict(torch.load(checkpoint))
File "/home/gianmarco/miniconda3/envs/gpt_env/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1671, in load_state_dict
raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format(
RuntimeError: Error(s) in loading state_dict for GPTJForCausalLM:
Missing key(s) in state_dict: "transformer.h.0.attn.k_proj.qzeros", "transformer.h.0.attn.k_proj.scales", "transformer.h.0.attn.k_proj.bias", "transformer.h.0.attn.k_proj.qweight", "transformer.h.0.attn.v_proj.qzeros", "transformer.h.0.attn.v_proj.scales", "transformer.h.0.attn.v_proj.bias", "transformer.h.0.attn.v_proj.qweight", "transformer.h.0.attn.q_proj.qzeros", "transformer.h.0.attn.q_proj.scales", "transformer.h.0.attn.q_proj.bias", "transformer.h.0.attn.q_proj.qweight", "transformer.h.0.attn.out_proj.qzeros", "transformer.h.0.attn.out_proj.scales", "transformer.h.0.attn.out_proj.bias", "transformer.h.0.attn.out_proj.qweight", "transformer.h.0.mlp.fc_in.qzeros", "transformer.h.0.mlp.fc_in.scales", "transformer.h.0.mlp.fc_in.qweight", "transformer.h.0.mlp.fc_out.qzeros", "transformer.h.0.mlp.fc_out.scales", "transformer.h.0.mlp.fc_out.qweight", "transformer.h.1.attn.k_proj.qzeros", "transformer.h.1.attn.k_proj.scales", "transformer.h.1.attn.k_proj.bias", "transformer.h.1.attn.k_proj.qweight", "transformer.h.1.attn.v_proj.qzeros", "transformer.h.1.attn.v_proj.scales", "transformer.h.1.attn.v_proj.bias", "transformer.h.1.attn.v_proj.qweight", "transformer.h.1.attn.q_proj.qzeros", "transformer.h.1.attn.q_proj.scales", "transformer.h.1.attn.q_proj.bias", "transformer.h.1.attn.q_proj.qweight", "transformer.h.1.attn.out_proj.qzeros", "transformer.h.1.attn.out_proj.scales", "transformer.h.1.attn.out_proj.bias", "transformer.h.1.attn.out_proj.qweight", "transformer.h.1.mlp.fc_in.qzeros", "transformer.h.1.mlp.fc_in.scales", "transformer.h.1.mlp.fc_in.qweight", "transformer.h.1.mlp.fc_out.qzeros", "transformer.h.1.mlp.fc_out.scales", "transformer.h.1.mlp.fc_out.qweight", "transformer.h.2.attn.k_proj.qzeros", "transformer.h.2.attn.k_proj.scales", "transformer.h.2.attn.k_proj.bias", "transformer.h.2.attn.k_proj.qweight", "transformer.h.2.attn.v_proj.qzeros", "transformer.h.2.attn.v_proj.scales", "transformer.h.2.attn.v_proj.bias", "transformer.h.2.attn.v_proj.qweight", "transformer.h.2.attn.q_proj.qzeros", "transformer.h.2.attn.q_proj.scales", "transformer.h.2.attn.q_proj.bias", "transformer.h.2.attn.q_proj.qweight", "transformer.h.2.attn.out_proj.qzeros", "transformer.h.2.attn.out_proj.scales", "transformer.h.2.attn.out_proj.bias", "transformer.h.2.attn.out_proj.qweight", "transformer.h.2.mlp.fc_in.qzeros", "transformer.h.2.mlp.fc_in.scales", "transformer.h.2.mlp.fc_in.qweight", "transformer.h.2.mlp.fc_out.qzeros", "transformer.h.2.mlp.fc_out.scales", "transformer.h.2.mlp.fc_out.qweight", "transformer.h.3.attn.k_proj.qzeros", "transformer.h.3.attn.k_proj.scales", "transformer.h.3.attn.k_proj.bias", "transformer.h.3.attn.k_proj.qweight", "transformer.h.3.attn.v_proj.qzeros", "transformer.h.3.attn.v_proj.scales", "transformer.h.3.attn.v_proj.bias", "transformer.h.3.attn.v_proj.qweight", "transformer.h.3.attn.q_proj.qzeros", "transformer.h.3.attn.q_proj.scales", "transformer.h.3.attn.q_proj.bias", "transformer.h.3.attn.q_proj.qweight", "transformer.h.3.attn.out_proj.qzeros", "transformer.h.3.attn.out_proj.scales", "transformer.h.3.attn.out_proj.bias", "transformer.h.3.attn.out_proj.qweight", "transformer.h.3.mlp.fc_in.qzeros", "transformer.h.3.mlp.fc_in.scales", "transformer.h.3.mlp.fc_in.qweight", "transformer.h.3.mlp.fc_out.qzeros", "transformer.h.3.mlp.fc_out.scales", "transformer.h.3.mlp.fc_out.qweight", "transformer.h.4.attn.k_proj.qzeros", "transformer.h.4.attn.k_proj.scales", "transformer.h.4.attn.k_proj.bias", "transformer.h.4.attn.k_proj.qweight", "transformer.h.4.attn.v_proj.qzeros", "transformer.h.4.attn.v_proj.scales", "transformer.h.4.attn.v_proj.bias", "transformer.h.4.attn.v_proj.qweight", "transformer.h.4.attn.q_proj.qzeros", "transformer.h.4.attn.q_proj.scales", "transformer.h.4.attn.q_proj.bias", "transformer.h.4.attn.q_proj.qweight", "transformer.h.4.attn.out_proj.qzeros", "transformer.h.4.attn.out_proj.scales", "transformer.h.4.attn.out_proj.bias", "transformer.h.4.attn.out_proj.qweight", "transformer.h.4.mlp.fc_in.qzeros", "transformer.h.4.mlp.fc_in.scales", "transformer.h.4.mlp.fc_in.qweight", "transformer.h.4.mlp.fc_out.qzeros", "transformer.h.4.mlp.fc_out.scales", "transformer.h.4.mlp.fc_out.qweight", "transformer.h.5.attn.k_proj.qzeros", "transformer.h.5.attn.k_proj.scales", "transformer.h.5.attn.k_proj.bias", "transformer.h.5.attn.k_proj.qweight", "transformer.h.5.attn.v_proj.qzeros", "transformer.h.5.attn.v_proj.scales", "transformer.h.5.attn.v_proj.bias", "transformer.h.5.attn.v_proj.qweight", "transformer.h.5.attn.q_proj.qzeros", "transformer.h.5.attn.q_proj.scales", "transformer.h.5.attn.q_proj.bias", "transformer.h.5.attn.q_proj.qweight", "transformer.h.5.attn.out_proj.qzeros", "transformer.h.5.attn.out_proj.scales", "transformer.h.5.attn.out_proj.bias", "transformer.h.5.attn.out_proj.qweight", "transformer.h.5.mlp.fc_in.qzeros", "transformer.h.5.mlp.fc_in.scales", "transformer.h.5.mlp.fc_in.qweight", "transformer.h.5.mlp.fc_out.qzeros", "transformer.h.5.mlp.fc_out.scales", "transformer.h.5.mlp.fc_out.qweight", "transformer.h.6.attn.k_proj.qzeros", "transformer.h.6.attn.k_proj.scales", "transformer.h.6.attn.k_proj.bias", "transformer.h.6.attn.k_proj.qweight", "transformer.h.6.attn.v_proj.qzeros", "transformer.h.6.attn.v_proj.scales", "transformer.h.6.attn.v_proj.bias", "transformer.h.6.attn.v_proj.qweight", "transformer.h.6.attn.q_proj.qzeros", "transformer.h.6.attn.q_proj.scales", "transformer.h.6.attn.q_proj.bias", "transformer.h.6.attn.q_proj.qweight", "transformer.h.6.attn.out_proj.qzeros", "transformer.h.6.attn.out_proj.scales", "transformer.h.6.attn.out_proj.bias", "transformer.h.6.attn.out_proj.qweight", "transformer.h.6.mlp.fc_in.qzeros", "transformer.h.6.mlp.fc_in.scales", "transformer.h.6.mlp.fc_in.qweight", "transformer.h.6.mlp.fc_out.qzeros", "transformer.h.6.mlp.fc_out.scales", "transformer.h.6.mlp.fc_out.qweight", "transformer.h.7.attn.k_proj.qzeros", "transformer.h.7.attn.k_proj.scales", "transformer.h.7.attn.k_proj.bias", "transformer.h.7.attn.k_proj.qweight", "transformer.h.7.attn.v_proj.qzeros", "transformer.h.7.attn.v_proj.scales", "transformer.h.7.attn.v_proj.bias", "transformer.h.7.attn.v_proj.qweight", "transformer.h.7.attn.q_proj.qzeros", "transformer.h.7.attn.q_proj.scales", "transformer.h.7.attn.q_proj.bias", "transformer.h.7.attn.q_proj.qweight", "transformer.h.7.attn.out_proj.qzeros", "transformer.h.7.attn.out_proj.scales", "transformer.h.7.attn.out_proj.bias", "transformer.h.7.attn.out_proj.qweight", "transformer.h.7.mlp.fc_in.qzeros", "transformer.h.7.mlp.fc_in.scales", "transformer.h.7.mlp.fc_in.qweight", "transformer.h.7.mlp.fc_out.qzeros", "transformer.h.7.mlp.fc_out.scales", "transformer.h.7.mlp.fc_out.qweight", "transformer.h.8.attn.k_proj.qzeros", "transformer.h.8.attn.k_proj.scales", "transformer.h.8.attn.k_proj.bias", "transformer.h.8.attn.k_proj.qweight", "transformer.h.8.attn.v_proj.qzeros", "transformer.h.8.attn.v_proj.scales", "transformer.h.8.attn.v_proj.bias", "transformer.h.8.attn.v_proj.qweight", "transformer.h.8.attn.q_proj.qzeros", "transformer.h.8.attn.q_proj.scales", "transformer.h.8.attn.q_proj.bias", "transformer.h.8.attn.q_proj.qweight", "transformer.h.8.attn.out_proj.qzeros", "transformer.h.8.attn.out_proj.scales", "transformer.h.8.attn.out_proj.bias", "transformer.h.8.attn.out_proj.qweight", "transformer.h.8.mlp.fc_in.qzeros", "transformer.h.8.mlp.fc_in.scales", "transformer.h.8.mlp.fc_in.qweight", "transformer.h.8.mlp.fc_out.qzeros", "transformer.h.8.mlp.fc_out.scales", "transformer.h.8.mlp.fc_out.qweight", "transformer.h.9.attn.k_proj.qzeros", "transformer.h.9.attn.k_proj.scales", "transformer.h.9.attn.k_proj.bias", "transformer.h.9.attn.k_proj.qweight", "transformer.h.9.attn.v_proj.qzeros", "transformer.h.9.attn.v_proj.scales", "transformer.h.9.attn.v_proj.bias", "transformer.h.9.attn.v_proj.qweight", "transformer.h.9.attn.q_proj.qzeros", "transformer.h.9.attn.q_proj.scales", "transformer.h.9.attn.q_proj.bias", "transformer.h.9.attn.q_proj.qweight", "transformer.h.9.attn.out_proj.qzeros", "transformer.h.9.attn.out_proj.scales", "transformer.h.9.attn.out_proj.bias", "transformer.h.9.attn.out_proj.qweight", "transformer.h.9.mlp.fc_in.qzeros", "transformer.h.9.mlp.fc_in.scales", "transformer.h.9.mlp.fc_in.qweight", "transformer.h.9.mlp.fc_out.qzeros", "transformer.h.9.mlp.fc_out.scales", "transformer.h.9.mlp.fc_out.qweight", "transformer.h.10.attn.k_proj.qzeros", "transformer.h.10.attn.k_proj.scales", "transformer.h.10.attn.k_proj.bias", "transformer.h.10.attn.k_proj.qweight", "transformer.h.10.attn.v_proj.qzeros", "transformer.h.10.attn.v_proj.scales", "transformer.h.10.attn.v_proj.bias", "transformer.h.10.attn.v_proj.qweight", "transformer.h.10.attn.q_proj.qzeros", "transformer.h.10.attn.q_proj.scales", "transformer.h.10.attn.q_proj.bias", "transformer.h.10.attn.q_proj.qweight", "transformer.h.10.attn.out_proj.qzeros", "transformer.h.10.attn.out_proj.scales", "transformer.h.10.attn.out_proj.bias", "transformer.h.10.attn.out_proj.qweight", "transformer.h.10.mlp.fc_in.qzeros", "transformer.h.10.mlp.fc_in.scales", "transformer.h.10.mlp.fc_in.qweight", "transformer.h.10.mlp.fc_out.qzeros", "transformer.h.10.mlp.fc_out.scales", "transformer.h.10.mlp.fc_out.qweight", "transformer.h.11.attn.k_proj.qzeros", "transformer.h.11.attn.k_proj.scales", "transformer.h.11.attn.k_proj.bias", "transformer.h.11.attn.k_proj.qweight", "transformer.h.11.attn.v_proj.qzeros", "transformer.h.11.attn.v_proj.scales", "transformer.h.11.attn.v_proj.bias", "transformer.h.11.attn.v_proj.qweight", "transformer.h.11.attn.q_proj.qzeros", "transformer.h.11.attn.q_proj.scales", "transformer.h.11.attn.q_proj.bias", "transformer.h.11.attn.q_proj.qweight", "transformer.h.11.attn.out_proj.qzeros", "transformer.h.11.attn.out_proj.scales", "transformer.h.11.attn.out_proj.bias", "transformer.h.11.attn.out_proj.qweight", "transformer.h.11.mlp.fc_in.qzeros", "transformer.h.11.mlp.fc_in.scales", "transformer.h.11.mlp.fc_in.qweight", "transformer.h.11.mlp.fc_out.qzeros", "transformer.h.11.mlp.fc_out.scales", "transformer.h.11.mlp.fc_out.qweight", "transformer.h.12.attn.k_proj.qzeros", "transformer.h.12.attn.k_proj.scales", "transformer.h.12.attn.k_proj.bias", "transformer.h.12.attn.k_proj.qweight", "transformer.h.12.attn.v_proj.qzeros", "transformer.h.12.attn.v_proj.scales", "transformer.h.12.attn.v_proj.bias", "transformer.h.12.attn.v_proj.qweight", "transformer.h.12.attn.q_proj.qzeros", "transformer.h.12.attn.q_proj.scales", "transformer.h.12.attn.q_proj.bias", "transformer.h.12.attn.q_proj.qweight", "transformer.h.12.attn.out_proj.qzeros", "transformer.h.12.attn.out_proj.scales", "transformer.h.12.attn.out_proj.bias", "transformer.h.12.attn.out_proj.qweight", "transformer.h.12.mlp.fc_in.qzeros", "transformer.h.12.mlp.fc_in.scales", "transformer.h.12.mlp.fc_in.qweight", "transformer.h.12.mlp.fc_out.qzeros", "transformer.h.12.mlp.fc_out.scales", "transformer.h.12.mlp.fc_out.qweight", "transformer.h.13.attn.k_proj.qzeros", "transformer.h.13.attn.k_proj.scales", "transformer.h.13.attn.k_proj.bias", "transformer.h.13.attn.k_proj.qweight", "transformer.h.13.attn.v_proj.qzeros", "transformer.h.13.attn.v_proj.scales", "transformer.h.13.attn.v_proj.bias", "transformer.h.13.attn.v_proj.qweight", "transformer.h.13.attn.q_proj.qzeros", "transformer.h.13.attn.q_proj.scales", "transformer.h.13.attn.q_proj.bias", "transformer.h.13.attn.q_proj.qweight", "transformer.h.13.attn.out_proj.qzeros", "transformer.h.13.attn.out_proj.scales", "transformer.h.13.attn.out_proj.bias", "transformer.h.13.attn.out_proj.qweight", "transformer.h.13.mlp.fc_in.qzeros", "transformer.h.13.mlp.fc_in.scales", "transformer.h.13.mlp.fc_in.qweight", "transformer.h.13.mlp.fc_out.qzeros", "transformer.h.13.mlp.fc_out.scales", "transformer.h.13.mlp.fc_out.qweight", "transformer.h.14.attn.k_proj.qzeros", "transformer.h.14.attn.k_proj.scales", "transformer.h.14.attn.k_proj.bias", "transformer.h.14.attn.k_proj.qweight", "transformer.h.14.attn.v_proj.qzeros", "transformer.h.14.attn.v_proj.scales", "transformer.h.14.attn.v_proj.bias", "transformer.h.14.attn.v_proj.qweight", "transformer.h.14.attn.q_proj.qzeros", "transformer.h.14.attn.q_proj.scales", "transformer.h.14.attn.q_proj.bias", "transformer.h.14.attn.q_proj.qweight", "transformer.h.14.attn.out_proj.qzeros", "transformer.h.14.attn.out_proj.scales", "transformer.h.14.attn.out_proj.bias", "transformer.h.14.attn.out_proj.qweight", "transformer.h.14.mlp.fc_in.qzeros", "transformer.h.14.mlp.fc_in.scales", "transformer.h.14.mlp.fc_in.qweight", "transformer.h.14.mlp.fc_out.qzeros", "transformer.h.14.mlp.fc_out.scales", "transformer.h.14.mlp.fc_out.qweight", "transformer.h.15.attn.k_proj.qzeros", "transformer.h.15.attn.k_proj.scales", "transformer.h.15.attn.k_proj.bias", "transformer.h.15.attn.k_proj.qweight", "transformer.h.15.attn.v_proj.qzeros", "transformer.h.15.attn.v_proj.scales", "transformer.h.15.attn.v_proj.bias", "transformer.h.15.attn.v_proj.qweight", "transformer.h.15.attn.q_proj.qzeros", "transformer.h.15.attn.q_proj.scales", "transformer.h.15.attn.q_proj.bias", "transformer.h.15.attn.q_proj.qweight", "transformer.h.15.attn.out_proj.qzeros", "transformer.h.15.attn.out_proj.scales", "transformer.h.15.attn.out_proj.bias", "transformer.h.15.attn.out_proj.qweight", "transformer.h.15.mlp.fc_in.qzeros", "transformer.h.15.mlp.fc_in.scales", "transformer.h.15.mlp.fc_in.qweight", "transformer.h.15.mlp.fc_out.qzeros", "transformer.h.15.mlp.fc_out.scales", "transformer.h.15.mlp.fc_out.qweight", "transformer.h.16.attn.k_proj.qzeros", "transformer.h.16.attn.k_proj.scales", "transformer.h.16.attn.k_proj.bias", "transformer.h.16.attn.k_proj.qweight", "transformer.h.16.attn.v_proj.qzeros", "transformer.h.16.attn.v_proj.scales", "transformer.h.16.attn.v_proj.bias", "transformer.h.16.attn.v_proj.qweight", "transformer.h.16.attn.q_proj.qzeros", "transformer.h.16.attn.q_proj.scales", "transformer.h.16.attn.q_proj.bias", "transformer.h.16.attn.q_proj.qweight", "transformer.h.16.attn.out_proj.qzeros", "transformer.h.16.attn.out_proj.scales", "transformer.h.16.attn.out_proj.bias", "transformer.h.16.attn.out_proj.qweight", "transformer.h.16.mlp.fc_in.qzeros", "transformer.h.16.mlp.fc_in.scales", "transformer.h.16.mlp.fc_in.qweight", "transformer.h.16.mlp.fc_out.qzeros", "transformer.h.16.mlp.fc_out.scales", "transformer.h.16.mlp.fc_out.qweight", "transformer.h.17.attn.k_proj.qzeros", "transformer.h.17.attn.k_proj.scales", "transformer.h.17.attn.k_proj.bias", "transformer.h.17.attn.k_proj.qweight", "transformer.h.17.attn.v_proj.qzeros", "transformer.h.17.attn.v_proj.scales", "transformer.h.17.attn.v_proj.bias", "transformer.h.17.attn.v_proj.qweight", "transformer.h.17.attn.q_proj.qzeros", "transformer.h.17.attn.q_proj.scales", "transformer.h.17.attn.q_proj.bias", "transformer.h.17.attn.q_proj.qweight", "transformer.h.17.attn.out_proj.qzeros", "transformer.h.17.attn.out_proj.scales", "transformer.h.17.attn.out_proj.bias", "transformer.h.17.attn.out_proj.qweight", "transformer.h.17.mlp.fc_in.qzeros", "transformer.h.17.mlp.fc_in.scales", "transformer.h.17.mlp.fc_in.qweight", "transformer.h.17.mlp.fc_out.qzeros", "transformer.h.17.mlp.fc_out.scales", "transformer.h.17.mlp.fc_out.qweight", "transformer.h.18.attn.k_proj.qzeros", "transformer.h.18.attn.k_proj.scales", "transformer.h.18.attn.k_proj.bias", "transformer.h.18.attn.k_proj.qweight", "transformer.h.18.attn.v_proj.qzeros", "transformer.h.18.attn.v_proj.scales", "transformer.h.18.attn.v_proj.bias", "transformer.h.18.attn.v_proj.qweight", "transformer.h.18.attn.q_proj.qzeros", "transformer.h.18.attn.q_proj.scales", "transformer.h.18.attn.q_proj.bias", "transformer.h.18.attn.q_proj.qweight", "transformer.h.18.attn.out_proj.qzeros", "transformer.h.18.attn.out_proj.scales", "transformer.h.18.attn.out_proj.bias", "transformer.h.18.attn.out_proj.qweight", "transformer.h.18.mlp.fc_in.qzeros", "transformer.h.18.mlp.fc_in.scales", "transformer.h.18.mlp.fc_in.qweight", "transformer.h.18.mlp.fc_out.qzeros", "transformer.h.18.mlp.fc_out.scales", "transformer.h.18.mlp.fc_out.qweight", "transformer.h.19.attn.k_proj.qzeros", "transformer.h.19.attn.k_proj.scales", "transformer.h.19.attn.k_proj.bias", "transformer.h.19.attn.k_proj.qweight", "transformer.h.19.attn.v_proj.qzeros", "transformer.h.19.attn.v_proj.scales", "transformer.h.19.attn.v_proj.bias", "transformer.h.19.attn.v_proj.qweight", "transformer.h.19.attn.q_proj.qzeros", "transformer.h.19.attn.q_proj.scales", "transformer.h.19.attn.q_proj.bias", "transformer.h.19.attn.q_proj.qweight", "transformer.h.19.attn.out_proj.qzeros", "transformer.h.19.attn.out_proj.scales", "transformer.h.19.attn.out_proj.bias", "transformer.h.19.attn.out_proj.qweight", "transformer.h.19.mlp.fc_in.qzeros", "transformer.h.19.mlp.fc_in.scales", "transformer.h.19.mlp.fc_in.qweight", "transformer.h.19.mlp.fc_out.qzeros", "transformer.h.19.mlp.fc_out.scales", "transformer.h.19.mlp.fc_out.qweight", "transformer.h.20.attn.k_proj.qzeros", "transformer.h.20.attn.k_proj.scales", "transformer.h.20.attn.k_proj.bias", "transformer.h.20.attn.k_proj.qweight", "transformer.h.20.attn.v_proj.qzeros", "transformer.h.20.attn.v_proj.scales", "transformer.h.20.attn.v_proj.bias", "transformer.h.20.attn.v_proj.qweight", "transformer.h.20.attn.q_proj.qzeros", "transformer.h.20.attn.q_proj.scales", "transformer.h.20.attn.q_proj.bias", "transformer.h.20.attn.q_proj.qweight", "transformer.h.20.attn.out_proj.qzeros", "transformer.h.20.attn.out_proj.scales", "transformer.h.20.attn.out_proj.bias", "transformer.h.20.attn.out_proj.qweight", "transformer.h.20.mlp.fc_in.qzeros", "transformer.h.20.mlp.fc_in.scales", "transformer.h.20.mlp.fc_in.qweight", "transformer.h.20.mlp.fc_out.qzeros", "transformer.h.20.mlp.fc_out.scales", "transformer.h.20.mlp.fc_out.qweight", "transformer.h.21.attn.k_proj.qzeros", "transformer.h.21.attn.k_proj.scales", "transformer.h.21.attn.k_proj.bias", "transformer.h.21.attn.k_proj.qweight", "transformer.h.21.attn.v_proj.qzeros", "transformer.h.21.attn.v_proj.scales", "transformer.h.21.attn.v_proj.bias", "transformer.h.21.attn.v_proj.qweight", "transformer.h.21.attn.q_proj.qzeros", "transformer.h.21.attn.q_proj.scales", "transformer.h.21.attn.q_proj.bias", "transformer.h.21.attn.q_proj.qweight", "transformer.h.21.attn.out_proj.qzeros", "transformer.h.21.attn.out_proj.scales", "transformer.h.21.attn.out_proj.bias", "transformer.h.21.attn.out_proj.qweight", "transformer.h.21.mlp.fc_in.qzeros", "transformer.h.21.mlp.fc_in.scales", "transformer.h.21.mlp.fc_in.qweight", "transformer.h.21.mlp.fc_out.qzeros", "transformer.h.21.mlp.fc_out.scales", "transformer.h.21.mlp.fc_out.qweight", "transformer.h.22.attn.k_proj.qzeros", "transformer.h.22.attn.k_proj.scales", "transformer.h.22.attn.k_proj.bias", "transformer.h.22.attn.k_proj.qweight", "transformer.h.22.attn.v_proj.qzeros", "transformer.h.22.attn.v_proj.scales", "transformer.h.22.attn.v_proj.bias", "transformer.h.22.attn.v_proj.qweight", "transformer.h.22.attn.q_proj.qzeros", "transformer.h.22.attn.q_proj.scales", "transformer.h.22.attn.q_proj.bias", "transformer.h.22.attn.q_proj.qweight", "transformer.h.22.attn.out_proj.qzeros", "transformer.h.22.attn.out_proj.scales", "transformer.h.22.attn.out_proj.bias", "transformer.h.22.attn.out_proj.qweight", "transformer.h.22.mlp.fc_in.qzeros", "transformer.h.22.mlp.fc_in.scales", "transformer.h.22.mlp.fc_in.qweight", "transformer.h.22.mlp.fc_out.qzeros", "transformer.h.22.mlp.fc_out.scales", "transformer.h.22.mlp.fc_out.qweight", "transformer.h.23.attn.k_proj.qzeros", "transformer.h.23.attn.k_proj.scales", "transformer.h.23.attn.k_proj.bias", "transformer.h.23.attn.k_proj.qweight", "transformer.h.23.attn.v_proj.qzeros", "transformer.h.23.attn.v_proj.scales", "transformer.h.23.attn.v_proj.bias", "transformer.h.23.attn.v_proj.qweight", "transformer.h.23.attn.q_proj.qzeros", "transformer.h.23.attn.q_proj.scales", "transformer.h.23.attn.q_proj.bias", "transformer.h.23.attn.q_proj.qweight", "transformer.h.23.attn.out_proj.qzeros", "transformer.h.23.attn.out_proj.scales", "transformer.h.23.attn.out_proj.bias", "transformer.h.23.attn.out_proj.qweight", "transformer.h.23.mlp.fc_in.qzeros", "transformer.h.23.mlp.fc_in.scales", "transformer.h.23.mlp.fc_in.qweight", "transformer.h.23.mlp.fc_out.qzeros", "transformer.h.23.mlp.fc_out.scales", "transformer.h.23.mlp.fc_out.qweight", "transformer.h.24.attn.k_proj.qzeros", "transformer.h.24.attn.k_proj.scales", "transformer.h.24.attn.k_proj.bias", "transformer.h.24.attn.k_proj.qweight", "transformer.h.24.attn.v_proj.qzeros", "transformer.h.24.attn.v_proj.scales", "transformer.h.24.attn.v_proj.bias", "transformer.h.24.attn.v_proj.qweight", "transformer.h.24.attn.q_proj.qzeros", "transformer.h.24.attn.q_proj.scales", "transformer.h.24.attn.q_proj.bias", "transformer.h.24.attn.q_proj.qweight", "transformer.h.24.attn.out_proj.qzeros", "transformer.h.24.attn.out_proj.scales", "transformer.h.24.attn.out_proj.bias", "transformer.h.24.attn.out_proj.qweight", "transformer.h.24.mlp.fc_in.qzeros", "transformer.h.24.mlp.fc_in.scales", "transformer.h.24.mlp.fc_in.qweight", "transformer.h.24.mlp.fc_out.qzeros", "transformer.h.24.mlp.fc_out.scales", "transformer.h.24.mlp.fc_out.qweight", "transformer.h.25.attn.k_proj.qzeros", "transformer.h.25.attn.k_proj.scales", "transformer.h.25.attn.k_proj.bias", "transformer.h.25.attn.k_proj.qweight", "transformer.h.25.attn.v_proj.qzeros", "transformer.h.25.attn.v_proj.scales", "transformer.h.25.attn.v_proj.bias", "transformer.h.25.attn.v_proj.qweight", "transformer.h.25.attn.q_proj.qzeros", "transformer.h.25.attn.q_proj.scales", "transformer.h.25.attn.q_proj.bias", "transformer.h.25.attn.q_proj.qweight", "transformer.h.25.attn.out_proj.qzeros", "transformer.h.25.attn.out_proj.scales", "transformer.h.25.attn.out_proj.bias", "transformer.h.25.attn.out_proj.qweight", "transformer.h.25.mlp.fc_in.qzeros", "transformer.h.25.mlp.fc_in.scales", "transformer.h.25.mlp.fc_in.qweight", "transformer.h.25.mlp.fc_out.qzeros", "transformer.h.25.mlp.fc_out.scales", "transformer.h.25.mlp.fc_out.qweight", "transformer.h.26.attn.k_proj.qzeros", "transformer.h.26.attn.k_proj.scales", "transformer.h.26.attn.k_proj.bias", "transformer.h.26.attn.k_proj.qweight", "transformer.h.26.attn.v_proj.qzeros", "transformer.h.26.attn.v_proj.scales", "transformer.h.26.attn.v_proj.bias", "transformer.h.26.attn.v_proj.qweight", "transformer.h.26.attn.q_proj.qzeros", "transformer.h.26.attn.q_proj.scales", "transformer.h.26.attn.q_proj.bias", "transformer.h.26.attn.q_proj.qweight", "transformer.h.26.attn.out_proj.qzeros", "transformer.h.26.attn.out_proj.scales", "transformer.h.26.attn.out_proj.bias", "transformer.h.26.attn.out_proj.qweight", "transformer.h.26.mlp.fc_in.qzeros", "transformer.h.26.mlp.fc_in.scales", "transformer.h.26.mlp.fc_in.qweight", "transformer.h.26.mlp.fc_out.qzeros", "transformer.h.26.mlp.fc_out.scales", "transformer.h.26.mlp.fc_out.qweight", "transformer.h.27.attn.k_proj.qzeros", "transformer.h.27.attn.k_proj.scales", "transformer.h.27.attn.k_proj.bias", "transformer.h.27.attn.k_proj.qweight", "transformer.h.27.attn.v_proj.qzeros", "transformer.h.27.attn.v_proj.scales", "transformer.h.27.attn.v_proj.bias", "transformer.h.27.attn.v_proj.qweight", "transformer.h.27.attn.q_proj.qzeros", "transformer.h.27.attn.q_proj.scales", "transformer.h.27.attn.q_proj.bias", "transformer.h.27.attn.q_proj.qweight", "transformer.h.27.attn.out_proj.qzeros", "transformer.h.27.attn.out_proj.scales", "transformer.h.27.attn.out_proj.bias", "transformer.h.27.attn.out_proj.qweight", "transformer.h.27.mlp.fc_in.qzeros", "transformer.h.27.mlp.fc_in.scales", "transformer.h.27.mlp.fc_in.qweight", "transformer.h.27.mlp.fc_out.qzeros", "transformer.h.27.mlp.fc_out.scales", "transformer.h.27.mlp.fc_out.qweight".
Unexpected key(s) in state_dict: "transformer.h.0.attn.k_proj.weight", "transformer.h.0.attn.v_proj.weight", "transformer.h.0.attn.q_proj.weight", "transformer.h.0.attn.out_proj.weight", "transformer.h.0.mlp.fc_in.weight", "transformer.h.0.mlp.fc_out.weight", "transformer.h.1.attn.k_proj.weight", "transformer.h.1.attn.v_proj.weight", "transformer.h.1.attn.q_proj.weight", "transformer.h.1.attn.out_proj.weight", "transformer.h.1.mlp.fc_in.weight", "transformer.h.1.mlp.fc_out.weight", "transformer.h.2.attn.k_proj.weight", "transformer.h.2.attn.v_proj.weight", "transformer.h.2.attn.q_proj.weight", "transformer.h.2.attn.out_proj.weight", "transformer.h.2.mlp.fc_in.weight", "transformer.h.2.mlp.fc_out.weight", "transformer.h.3.attn.k_proj.weight", "transformer.h.3.attn.v_proj.weight", "transformer.h.3.attn.q_proj.weight", "transformer.h.3.attn.out_proj.weight", "transformer.h.3.mlp.fc_in.weight", "transformer.h.3.mlp.fc_out.weight", "transformer.h.4.attn.k_proj.weight", "transformer.h.4.attn.v_proj.weight", "transformer.h.4.attn.q_proj.weight", "transformer.h.4.attn.out_proj.weight", "transformer.h.4.mlp.fc_in.weight", "transformer.h.4.mlp.fc_out.weight", "transformer.h.5.attn.k_proj.weight", "transformer.h.5.attn.v_proj.weight", "transformer.h.5.attn.q_proj.weight", "transformer.h.5.attn.out_proj.weight", "transformer.h.5.mlp.fc_in.weight", "transformer.h.5.mlp.fc_out.weight", "transformer.h.6.attn.k_proj.weight", "transformer.h.6.attn.v_proj.weight", "transformer.h.6.attn.q_proj.weight", "transformer.h.6.attn.out_proj.weight", "transformer.h.6.mlp.fc_in.weight", "transformer.h.6.mlp.fc_out.weight", "transformer.h.7.attn.k_proj.weight", "transformer.h.7.attn.v_proj.weight", "transformer.h.7.attn.q_proj.weight", "transformer.h.7.attn.out_proj.weight", "transformer.h.7.mlp.fc_in.weight", "transformer.h.7.mlp.fc_out.weight", "transformer.h.8.attn.k_proj.weight", "transformer.h.8.attn.v_proj.weight", "transformer.h.8.attn.q_proj.weight", "transformer.h.8.attn.out_proj.weight", "transformer.h.8.mlp.fc_in.weight", "transformer.h.8.mlp.fc_out.weight", "transformer.h.9.attn.k_proj.weight", "transformer.h.9.attn.v_proj.weight", "transformer.h.9.attn.q_proj.weight", "transformer.h.9.attn.out_proj.weight", "transformer.h.9.mlp.fc_in.weight", "transformer.h.9.mlp.fc_out.weight", "transformer.h.10.attn.k_proj.weight", "transformer.h.10.attn.v_proj.weight", "transformer.h.10.attn.q_proj.weight", "transformer.h.10.attn.out_proj.weight", "transformer.h.10.mlp.fc_in.weight", "transformer.h.10.mlp.fc_out.weight", "transformer.h.11.attn.k_proj.weight", "transformer.h.11.attn.v_proj.weight", "transformer.h.11.attn.q_proj.weight", "transformer.h.11.attn.out_proj.weight", "transformer.h.11.mlp.fc_in.weight", "transformer.h.11.mlp.fc_out.weight", "transformer.h.12.attn.k_proj.weight", "transformer.h.12.attn.v_proj.weight", "transformer.h.12.attn.q_proj.weight", "transformer.h.12.attn.out_proj.weight", "transformer.h.12.mlp.fc_in.weight", "transformer.h.12.mlp.fc_out.weight", "transformer.h.13.attn.k_proj.weight", "transformer.h.13.attn.v_proj.weight", "transformer.h.13.attn.q_proj.weight", "transformer.h.13.attn.out_proj.weight", "transformer.h.13.mlp.fc_in.weight", "transformer.h.13.mlp.fc_out.weight", "transformer.h.14.attn.k_proj.weight", "transformer.h.14.attn.v_proj.weight", "transformer.h.14.attn.q_proj.weight", "transformer.h.14.attn.out_proj.weight", "transformer.h.14.mlp.fc_in.weight", "transformer.h.14.mlp.fc_out.weight", "transformer.h.15.attn.k_proj.weight", "transformer.h.15.attn.v_proj.weight", "transformer.h.15.attn.q_proj.weight", "transformer.h.15.attn.out_proj.weight", "transformer.h.15.mlp.fc_in.weight", "transformer.h.15.mlp.fc_out.weight", "transformer.h.16.attn.k_proj.weight", "transformer.h.16.attn.v_proj.weight", "transformer.h.16.attn.q_proj.weight", "transformer.h.16.attn.out_proj.weight", "transformer.h.16.mlp.fc_in.weight", "transformer.h.16.mlp.fc_out.weight", "transformer.h.17.attn.k_proj.weight", "transformer.h.17.attn.v_proj.weight", "transformer.h.17.attn.q_proj.weight", "transformer.h.17.attn.out_proj.weight", "transformer.h.17.mlp.fc_in.weight", "transformer.h.17.mlp.fc_out.weight", "transformer.h.18.attn.k_proj.weight", "transformer.h.18.attn.v_proj.weight", "transformer.h.18.attn.q_proj.weight", "transformer.h.18.attn.out_proj.weight", "transformer.h.18.mlp.fc_in.weight", "transformer.h.18.mlp.fc_out.weight", "transformer.h.19.attn.k_proj.weight", "transformer.h.19.attn.v_proj.weight", "transformer.h.19.attn.q_proj.weight", "transformer.h.19.attn.out_proj.weight", "transformer.h.19.mlp.fc_in.weight", "transformer.h.19.mlp.fc_out.weight", "transformer.h.20.attn.k_proj.weight", "transformer.h.20.attn.v_proj.weight", "transformer.h.20.attn.q_proj.weight", "transformer.h.20.attn.out_proj.weight", "transformer.h.20.mlp.fc_in.weight", "transformer.h.20.mlp.fc_out.weight", "transformer.h.21.attn.k_proj.weight", "transformer.h.21.attn.v_proj.weight", "transformer.h.21.attn.q_proj.weight", "transformer.h.21.attn.out_proj.weight", "transformer.h.21.mlp.fc_in.weight", "transformer.h.21.mlp.fc_out.weight", "transformer.h.22.attn.k_proj.weight", "transformer.h.22.attn.v_proj.weight", "transformer.h.22.attn.q_proj.weight", "transformer.h.22.attn.out_proj.weight", "transformer.h.22.mlp.fc_in.weight", "transformer.h.22.mlp.fc_out.weight", "transformer.h.23.attn.k_proj.weight", "transformer.h.23.attn.v_proj.weight", "transformer.h.23.attn.q_proj.weight", "transformer.h.23.attn.out_proj.weight", "transformer.h.23.mlp.fc_in.weight", "transformer.h.23.mlp.fc_out.weight", "transformer.h.24.attn.k_proj.weight", "transformer.h.24.attn.v_proj.weight", "transformer.h.24.attn.q_proj.weight", "transformer.h.24.attn.out_proj.weight", "transformer.h.24.mlp.fc_in.weight", "transformer.h.24.mlp.fc_out.weight", "transformer.h.25.attn.k_proj.weight", "transformer.h.25.attn.v_proj.weight", "transformer.h.25.attn.q_proj.weight", "transformer.h.25.attn.out_proj.weight", "transformer.h.25.mlp.fc_in.weight", "transformer.h.25.mlp.fc_out.weight", "transformer.h.26.attn.k_proj.weight", "transformer.h.26.attn.v_proj.weight", "transformer.h.26.attn.q_proj.weight", "transformer.h.26.attn.out_proj.weight", "transformer.h.26.mlp.fc_in.weight", "transformer.h.26.mlp.fc_out.weight", "transformer.h.27.attn.k_proj.weight", "transformer.h.27.attn.v_proj.weight", "transformer.h.27.attn.q_proj.weight", "transformer.h.27.attn.out_proj.weight", "transformer.h.27.mlp.fc_in.weight", "transformer.h.27.mlp.fc_out.weight".