报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错

具体报错是啥？

以下为报错信息（备注，在没有替换第一阶段模型之前，基于pretrained原来的模型，运行train_rl.py，是正常的）：

RuntimeError: Error(s) in loading state_dict for Transformer_base: Missing key(s) in state_dict: "enc_embedding.value_embedding.tokenConv.weight", "enc_embedding.value_embedding.tokenConv.bias", "enc_embedding.position_embedding.pe", "dec_embedding.value_embedding.tokenConv.weight", "dec_embedding.value_embedding.tokenConv.bias", "dec_embedding.position_embedding.pe", "encoder.attn_layers.0.attention.query_projection.weight", "encoder.attn_layers.0.attention.query_projection.bias", "encoder.attn_layers.0.attention.key_projection.weight", "encoder.attn_layers.0.attention.key_projection.bias", "encoder.attn_layers.0.attention.value_projection.weight", "encoder.attn_layers.0.attention.value_projection.bias", "encoder.attn_layers.0.attention.out_projection.weight", "encoder.attn_layers.0.attention.out_projection.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.0.norm1.weight", "encoder.attn_layers.0.norm1.bias", "encoder.attn_layers.0.norm2.weight", "encoder.attn_layers.0.norm2.bias", "encoder.attn_layers.1.attention.query_projection.weight", "encoder.attn_layers.1.attention.query_projection.bias", "encoder.attn_layers.1.attention.key_projection.weight", "encoder.attn_layers.1.attention.key_projection.bias", "encoder.attn_layers.1.attention.value_projection.weight", "encoder.attn_layers.1.attention.value_projection.bias", "encoder.attn_layers.1.attention.out_projection.weight", "encoder.attn_layers.1.attention.out_projection.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.1.norm1.weight", "encoder.attn_layers.1.norm1.bias", "encoder.attn_layers.1.norm2.weight", "encoder.attn_layers.1.norm2.bias", "encoder.norm.weight", "encoder.norm.bias", "decoder.layers.0.self_attention.query_projection.weight", "decoder.layers.0.self_attention.query_projection.bias", "decoder.layers.0.self_attention.key_projection.weight", "decoder.layers.0.self_attention.key_projection.bias", "decoder.layers.0.self_attention.value_projection.weight", "decoder.layers.0.self_attention.value_projection.bias", "decoder.layers.0.self_attention.out_projection.weight", "decoder.layers.0.self_attention.out_projection.bias", "decoder.layers.0.cross_attention.query_projection.weight", "decoder.layers.0.cross_attention.query_projection.bias", "decoder.layers.0.cross_attention.key_projection.weight", "decoder.layers.0.cross_attention.key_projection.bias", "decoder.layers.0.cross_attention.value_projection.weight", "decoder.layers.0.cross_attention.value_projection.bias", "decoder.layers.0.cross_attention.out_projection.weight", "decoder.layers.0.cross_attention.out_projection.bias", "decoder.layers.0.mhfw.mhfw.0.conv1.weight", "decoder.layers.0.mhfw.mhfw.0.conv1.bias", "decoder.layers.0.mhfw.mhfw.0.conv2.weight", "decoder.layers.0.mhfw.mhfw.0.conv2.bias", "decoder.layers.0.mhfw.mhfw.1.conv1.weight", "decoder.layers.0.mhfw.mhfw.1.conv1.bias", "decoder.layers.0.mhfw.mhfw.1.conv2.weight", "decoder.layers.0.mhfw.mhfw.1.conv2.bias", "decoder.layers.0.mhfw.mhfw.2.conv1.weight", "decoder.layers.0.mhfw.mhfw.2.conv1.bias", "decoder.layers.0.mhfw.mhfw.2.conv2.weight", "decoder.layers.0.mhfw.mhfw.2.conv2.bias", "decoder.layers.0.mhfw.mhfw.3.conv1.weight", "decoder.layers.0.mhfw.mhfw.3.conv1.bias", "decoder.layers.0.mhfw.mhfw.3.conv2.weight", "decoder.layers.0.mhfw.mhfw.3.conv2.bias", "decoder.layers.0.norm1.weight", "decoder.layers.0.norm1.bias", "decoder.layers.0.norm2.weight", "decoder.layers.0.norm2.bias", "decoder.layers.0.norm3.weight", "decoder.layers.0.norm3.bias", "decoder.norm.weight", "decoder.norm.bias", "projection_decoder.weight", "projection_decoder.bias". Unexpected key(s) in state_dict: "edding.value_embedding.tokenConv.weight", "edding.value_embedding.tokenConv.bias", "edding.position_embedding.pe", ".attn_layers.0.attention.query_projection.weight", ".attn_layers.0.attention.query_projection.bias", ".attn_layers.0.attention.key_projection.weight", ".attn_layers.0.attention.key_projection.bias", ".attn_layers.0.attention.value_projection.weight", ".attn_layers.0.attention.value_projection.bias", ".attn_layers.0.attention.out_projection.weight", ".attn_layers.0.attention.out_projection.bias", ".attn_layers.0.mhfw.mhfw.0.conv1.weight", ".attn_layers.0.mhfw.mhfw.0.conv1.bias", ".attn_layers.0.mhfw.mhfw.0.conv2.weight", ".attn_layers.0.mhfw.mhfw.0.conv2.bias", ".attn_layers.0.mhfw.mhfw.1.conv1.weight", ".attn_layers.0.mhfw.mhfw.1.conv1.bias", ".attn_layers.0.mhfw.mhfw.1.conv2.weight", ".attn_layers.0.mhfw.mhfw.1.conv2.bias", ".attn_layers.0.mhfw.mhfw.2.conv1.weight", ".attn_layers.0.mhfw.mhfw.2.conv1.bias", ".attn_layers.0.mhfw.mhfw.2.conv2.weight", ".attn_layers.0.mhfw.mhfw.2.conv2.bias", ".attn_layers.0.mhfw.mhfw.3.conv1.weight", ".attn_layers.0.mhfw.mhfw.3.conv1.bias", ".attn_layers.0.mhfw.mhfw.3.conv2.weight", ".attn_layers.0.mhfw.mhfw.3.conv2.bias", ".attn_layers.0.norm1.weight", ".attn_layers.0.norm1.bias", ".attn_layers.0.norm2.weight", ".attn_layers.0.norm2.bias", ".attn_layers.1.attention.query_projection.weight", ".attn_layers.1.attention.query_projection.bias", ".attn_layers.1.attention.key_projection.weight", ".attn_layers.1.attention.key_projection.bias", ".attn_layers.1.attention.value_projection.weight", ".attn_layers.1.attention.value_projection.bias", ".attn_layers.1.attention.out_projection.weight", ".attn_layers.1.attention.out_projection.bias", ".attn_layers.1.mhfw.mhfw.0.conv1.weight", ".attn_layers.1.mhfw.mhfw.0.conv1.bias", ".attn_layers.1.mhfw.mhfw.0.conv2.weight", ".attn_layers.1.mhfw.mhfw.0.conv2.bias", ".attn_layers.1.mhfw.mhfw.1.conv1.weight", ".attn_layers.1.mhfw.mhfw.1.conv1.bias", ".attn_layers.1.mhfw.mhfw.1.conv2.weight", ".attn_layers.1.mhfw.mhfw.1.conv2.bias", ".attn_layers.1.mhfw.mhfw.2.conv1.weight", ".attn_layers.1.mhfw.mhfw.2.conv1.bias", ".attn_layers.1.mhfw.mhfw.2.conv2.weight", ".attn_layers.1.mhfw.mhfw.2.conv2.bias", ".attn_layers.1.mhfw.mhfw.3.conv1.weight", ".attn_layers.1.mhfw.mhfw.3.conv1.bias", ".attn_layers.1.mhfw.mhfw.3.conv2.weight", ".attn_layers.1.mhfw.mhfw.3.conv2.bias", ".attn_layers.1.norm1.weight", ".attn_layers.1.norm1.bias", ".attn_layers.1.norm2.weight", ".attn_layers.1.norm2.bias", ".norm.weight", ".norm.bias", ".layers.0.self_attention.query_projection.weight", ".layers.0.self_attention.query_projection.bias", ".layers.0.self_attention.key_projection.weight", ".layers.0.self_attention.key_projection.bias", ".layers.0.self_attention.value_projection.weight", ".layers.0.self_attention.value_projection.bias", ".layers.0.self_attention.out_projection.weight", ".layers.0.self_attention.out_projection.bias", ".layers.0.cross_attention.query_projection.weight", ".layers.0.cross_attention.query_projection.bias", ".layers.0.cross_attention.key_projection.weight", ".layers.0.cross_attention.key_projection.bias", ".layers.0.cross_attention.value_projection.weight", ".layers.0.cross_attention.value_projection.bias", ".layers.0.cross_attention.out_projection.weight", ".layers.0.cross_attention.out_projection.bias", ".layers.0.mhfw.mhfw.0.conv1.weight", ".layers.0.mhfw.mhfw.0.conv1.bias", ".layers.0.mhfw.mhfw.0.conv2.weight", ".layers.0.mhfw.mhfw.0.conv2.bias", ".layers.0.mhfw.mhfw.1.conv1.weight", ".layers.0.mhfw.mhfw.1.conv1.bias", ".layers.0.mhfw.mhfw.1.conv2.weight", ".layers.0.mhfw.mhfw.1.conv2.bias", ".layers.0.mhfw.mhfw.2.conv1.weight", ".layers.0.mhfw.mhfw.2.conv1.bias", ".layers.0.mhfw.mhfw.2.conv2.weight", ".layers.0.mhfw.mhfw.2.conv2.bias", ".layers.0.mhfw.mhfw.3.conv1.weight", ".layers.0.mhfw.mhfw.3.conv1.bias", ".layers.0.mhfw.mhfw.3.conv2.weight", ".layers.0.mhfw.mhfw.3.conv2.bias", ".layers.0.norm1.weight", ".layers.0.norm1.bias", ".layers.0.norm2.weight", ".layers.0.norm2.bias", ".layers.0.norm3.weight", ".layers.0.norm3.bias", "ion_decoder.weight", "ion_decoder.bias". 进程已结束，退出代码为 1

peter

Original: From：Hugo @.>Date：2024-04-15 16:57:47(中国 (GMT+08:00))To：gsyyysg/StockFormer @.>Cc：peterdong168 @.> , Author @.>Subject：Re: [gsyyysg/StockFormer] 报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错 (Issue #8) 具体报错是啥？ — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>

以下为报错信息（备注，在没有替换第一阶段模型之前，基于pretrained原来的模型，运行train_rl.py，是正常的）： RuntimeError: Error(s) in loading state_dict for Transformer_base: Missing key(s) in state_dict: "enc_embedding.value_embedding.tokenConv.weight", "enc_embedding.value_embedding.tokenConv.bias", "enc_embedding.position_embedding.pe", "dec_embedding.value_embedding.tokenConv.weight", "dec_embedding.value_embedding.tokenConv.bias", "dec_embedding.position_embedding.pe", "encoder.attn_layers.0.attention.query_projection.weight", "encoder.attn_layers.0.attention.query_projection.bias", "encoder.attn_layers.0.attention.key_projection.weight", "encoder.attn_layers.0.attention.key_projection.bias", "encoder.attn_layers.0.attention.value_projection.weight", "encoder.attn_layers.0.attention.value_projection.bias", "encoder.attn_layers.0.attention.out_projection.weight", "encoder.attn_layers.0.attention.out_projection.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.0.norm1.weight", "encoder.attn_layers.0.norm1.bias", "encoder.attn_layers.0.norm2.weight", "encoder.attn_layers.0.norm2.bias", "encoder.attn_layers.1.attention.query_projection.weight", "encoder.attn_layers.1.attention.query_projection.bias", "encoder.attn_layers.1.attention.key_projection.weight", "encoder.attn_layers.1.attention.key_projection.bias", "encoder.attn_layers.1.attention.value_projection.weight", "encoder.attn_layers.1.attention.value_projection.bias", "encoder.attn_layers.1.attention.out_projection.weight", "encoder.attn_layers.1.attention.out_projection.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.1.norm1.weight", "encoder.attn_layers.1.norm1.bias", "encoder.attn_layers.1.norm2.weight", "encoder.attn_layers.1.norm2.bias", "encoder.norm.weight", "encoder.norm.bias", "decoder.layers.0.self_attention.query_projection.weight", "decoder.layers.0.self_attention.query_projection.bias", "decoder.layers.0.self_attention.key_projection.weight", "decoder.layers.0.self_attention.key_projection.bias", "decoder.layers.0.self_attention.value_projection.weight", "decoder.layers.0.self_attention.value_projection.bias", "decoder.layers.0.self_attention.out_projection.weight", "decoder.layers.0.self_attention.out_projection.bias", "decoder.layers.0.cross_attention.query_projection.weight", "decoder.layers.0.cross_attention.query_projection.bias", "decoder.layers.0.cross_attention.key_projection.weight", "decoder.layers.0.cross_attention.key_projection.bias", "decoder.layers.0.cross_attention.value_projection.weight", "decoder.layers.0.cross_attention.value_projection.bias", "decoder.layers.0.cross_attention.out_projection.weight", "decoder.layers.0.cross_attention.out_projection.bias", "decoder.layers.0.mhfw.mhfw.0.conv1.weight", "decoder.layers.0.mhfw.mhfw.0.conv1.bias", "decoder.layers.0.mhfw.mhfw.0.conv2.weight", "decoder.layers.0.mhfw.mhfw.0.conv2.bias", "decoder.layers.0.mhfw.mhfw.1.conv1.weight", "decoder.layers.0.mhfw.mhfw.1.conv1.bias", "decoder.layers.0.mhfw.mhfw.1.conv2.weight", "decoder.layers.0.mhfw.mhfw.1.conv2.bias", "decoder.layers.0.mhfw.mhfw.2.conv1.weight", "decoder.layers.0.mhfw.mhfw.2.conv1.bias", "decoder.layers.0.mhfw.mhfw.2.conv2.weight", "decoder.layers.0.mhfw.mhfw.2.conv2.bias", "decoder.layers.0.mhfw.mhfw.3.conv1.weight", "decoder.layers.0.mhfw.mhfw.3.conv1.bias", "decoder.layers.0.mhfw.mhfw.3.conv2.weight", "decoder.layers.0.mhfw.mhfw.3.conv2.bias", "decoder.layers.0.norm1.weight", "decoder.layers.0.norm1.bias", "decoder.layers.0.norm2.weight", "decoder.layers.0.norm2.bias", "decoder.layers.0.norm3.weight", "decoder.layers.0.norm3.bias", "decoder.norm.weight", "decoder.norm.bias", "projection_decoder.weight", "projection_decoder.bias". Unexpected key(s) in state_dict: "edding.value_embedding.tokenConv.weight", "edding.value_embedding.tokenConv.bias", "edding.position_embedding.pe", ".attn_layers.0.attention.query_projection.weight", ".attn_layers.0.attention.query_projection.bias", ".attn_layers.0.attention.key_projection.weight", ".attn_layers.0.attention.key_projection.bias", ".attn_layers.0.attention.value_projection.weight", ".attn_layers.0.attention.value_projection.bias", ".attn_layers.0.attention.out_projection.weight", ".attn_layers.0.attention.out_projection.bias", ".attn_layers.0.mhfw.mhfw.0.conv1.weight", ".attn_layers.0.mhfw.mhfw.0.conv1.bias", ".attn_layers.0.mhfw.mhfw.0.conv2.weight", ".attn_layers.0.mhfw.mhfw.0.conv2.bias", ".attn_layers.0.mhfw.mhfw.1.conv1.weight", ".attn_layers.0.mhfw.mhfw.1.conv1.bias", ".attn_layers.0.mhfw.mhfw.1.conv2.weight", ".attn_layers.0.mhfw.mhfw.1.conv2.bias", ".attn_layers.0.mhfw.mhfw.2.conv1.weight", ".attn_layers.0.mhfw.mhfw.2.conv1.bias", ".attn_layers.0.mhfw.mhfw.2.conv2.weight", ".attn_layers.0.mhfw.mhfw.2.conv2.bias", ".attn_layers.0.mhfw.mhfw.3.conv1.weight", ".attn_layers.0.mhfw.mhfw.3.conv1.bias", ".attn_layers.0.mhfw.mhfw.3.conv2.weight", ".attn_layers.0.mhfw.mhfw.3.conv2.bias", ".attn_layers.0.norm1.weight", ".attn_layers.0.norm1.bias", ".attn_layers.0.norm2.weight", ".attn_layers.0.norm2.bias", ".attn_layers.1.attention.query_projection.weight", ".attn_layers.1.attention.query_projection.bias", ".attn_layers.1.attention.key_projection.weight", ".attn_layers.1.attention.key_projection.bias", ".attn_layers.1.attention.value_projection.weight", ".attn_layers.1.attention.value_projection.bias", ".attn_layers.1.attention.out_projection.weight", ".attn_layers.1.attention.out_projection.bias", ".attn_layers.1.mhfw.mhfw.0.conv1.weight", ".attn_layers.1.mhfw.mhfw.0.conv1.bias", ".attn_layers.1.mhfw.mhfw.0.conv2.weight", ".attn_layers.1.mhfw.mhfw.0.conv2.bias", ".attn_layers.1.mhfw.mhfw.1.conv1.weight", ".attn_layers.1.mhfw.mhfw.1.conv1.bias", ".attn_layers.1.mhfw.mhfw.1.conv2.weight", ".attn_layers.1.mhfw.mhfw.1.conv2.bias", ".attn_layers.1.mhfw.mhfw.2.conv1.weight", ".attn_layers.1.mhfw.mhfw.2.conv1.bias", ".attn_layers.1.mhfw.mhfw.2.conv2.weight", ".attn_layers.1.mhfw.mhfw.2.conv2.bias", ".attn_layers.1.mhfw.mhfw.3.conv1.weight", ".attn_layers.1.mhfw.mhfw.3.conv1.bias", ".attn_layers.1.mhfw.mhfw.3.conv2.weight", ".attn_layers.1.mhfw.mhfw.3.conv2.bias", ".attn_layers.1.norm1.weight", ".attn_layers.1.norm1.bias", ".attn_layers.1.norm2.weight", ".attn_layers.1.norm2.bias", ".norm.weight", ".norm.bias", ".layers.0.self_attention.query_projection.weight", ".layers.0.self_attention.query_projection.bias", ".layers.0.self_attention.key_projection.weight", ".layers.0.self_attention.key_projection.bias", ".layers.0.self_attention.value_projection.weight", ".layers.0.self_attention.value_projection.bias", ".layers.0.self_attention.out_projection.weight", ".layers.0.self_attention.out_projection.bias", ".layers.0.cross_attention.query_projection.weight", ".layers.0.cross_attention.query_projection.bias", ".layers.0.cross_attention.key_projection.weight", ".layers.0.cross_attention.key_projection.bias", ".layers.0.cross_attention.value_projection.weight", ".layers.0.cross_attention.value_projection.bias", ".layers.0.cross_attention.out_projection.weight", ".layers.0.cross_attention.out_projection.bias", ".layers.0.mhfw.mhfw.0.conv1.weight", ".layers.0.mhfw.mhfw.0.conv1.bias", ".layers.0.mhfw.mhfw.0.conv2.weight", ".layers.0.mhfw.mhfw.0.conv2.bias", ".layers.0.mhfw.mhfw.1.conv1.weight", ".layers.0.mhfw.mhfw.1.conv1.bias", ".layers.0.mhfw.mhfw.1.conv2.weight", ".layers.0.mhfw.mhfw.1.conv2.bias", ".layers.0.mhfw.mhfw.2.conv1.weight", ".layers.0.mhfw.mhfw.2.conv1.bias", ".layers.0.mhfw.mhfw.2.conv2.weight", ".layers.0.mhfw.mhfw.2.conv2.bias", ".layers.0.mhfw.mhfw.3.conv1.weight", ".layers.0.mhfw.mhfw.3.conv1.bias", ".layers.0.mhfw.mhfw.3.conv2.weight", ".layers.0.mhfw.mhfw.3.conv2.bias", ".layers.0.norm1.weight", ".layers.0.norm1.bias", ".layers.0.norm2.weight", ".layers.0.norm2.bias", ".layers.0.norm3.weight", ".layers.0.norm3.bias", "ion_decoder.weight", "ion_decoder.bias". 进程已结束，退出代码为 1 peter Original: From：Hugo @.>Date：2024-04-15 16:57:47(中国 (GMT+08:00))To：gsyyysg/StockFormer @.>Cc：peterdong168 @.> , Author @.>Subject：Re: [gsyyysg/StockFormer] 报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错 (Issue #8) 具体报错是啥？ — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>

我只看得出来是加载模型权重文件出了问题......

请问，在这种情况下，如何修改才能避免，执行train_rl.py时报错？

peter

原始邮件: 发件人：Hugo @.>发送日期：2024-04-15 21:22:08(中国 (GMT+08:00))收件人：gsyyysg/StockFormer @.>抄送：peterdong168 @.> , Author @.>主题：Re: [gsyyysg/StockFormer] 报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错 (Issue #8) 以下为报错信息（备注，在没有替换第一阶段模型之前，基于pretrained原来的模型，运行train_rl.py，是正常的）： RuntimeError: Error(s) in loading state_dict for Transformer_base: Missing key(s) in state_dict: "enc_embedding.value_embedding.tokenConv.weight", "enc_embedding.value_embedding.tokenConv.bias", "enc_embedding.position_embedding.pe", "dec_embedding.value_embedding.tokenConv.weight", "dec_embedding.value_embedding.tokenConv.bias", "dec_embedding.position_embedding.pe", "encoder.attn_layers.0.attention.query_projection.weight", "encoder.attn_layers.0.attention.query_projection.bias", "encoder.attn_layers.0.attention.key_projection.weight", "encoder.attn_layers.0.attention.key_projection.bias", "encoder.attn_layers.0.attention.value_projection.weight", "encoder.attn_layers.0.attention.value_projection.bias", "encoder.attn_layers.0.attention.out_projection.weight", "encoder.attn_layers.0.attention.out_projection.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.0.norm1.weight", "encoder.attn_layers.0.norm1.bias", "encoder.attn_layers.0.norm2.weight", "encoder.attn_layers.0.norm2.bias", "encoder.attn_layers.1.attention.query_projection.weight", "encoder.attn_layers.1.attention.query_projection.bias", "encoder.attn_layers.1.attention.key_projection.weight", "encoder.attn_layers.1.attention.key_projection.bias", "encoder.attn_layers.1.attention.value_projection.weight", "encoder.attn_layers.1.attention.value_projection.bias", "encoder.attn_layers.1.attention.out_projection.weight", "encoder.attn_layers.1.attention.out_projection.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.1.norm1.weight", "encoder.attn_layers.1.norm1.bias", "encoder.attn_layers.1.norm2.weight", "encoder.attn_layers.1.norm2.bias", "encoder.norm.weight", "encoder.norm.bias", "decoder.layers.0.self_attention.query_projection.weight", "decoder.layers.0.self_attention.query_projection.bias", "decoder.layers.0.self_attention.key_projection.weight", "decoder.layers.0.self_attention.key_projection.bias", "decoder.layers.0.self_attention.value_projection.weight", "decoder.layers.0.self_attention.value_projection.bias", "decoder.layers.0.self_attention.out_projection.weight", "decoder.layers.0.self_attention.out_projection.bias", "decoder.layers.0.cross_attention.query_projection.weight", "decoder.layers.0.cross_attention.query_projection.bias", "decoder.layers.0.cross_attention.key_projection.weight", "decoder.layers.0.cross_attention.key_projection.bias", "decoder.layers.0.cross_attention.value_projection.weight", "decoder.layers.0.cross_attention.value_projection.bias", "decoder.layers.0.cross_attention.out_projection.weight", "decoder.layers.0.cross_attention.out_projection.bias", "decoder.layers.0.mhfw.mhfw.0.conv1.weight", "decoder.layers.0.mhfw.mhfw.0.conv1.bias", "decoder.layers.0.mhfw.mhfw.0.conv2.weight", "decoder.layers.0.mhfw.mhfw.0.conv2.bias", "decoder.layers.0.mhfw.mhfw.1.conv1.weight", "decoder.layers.0.mhfw.mhfw.1.conv1.bias", "decoder.layers.0.mhfw.mhfw.1.conv2.weight", "decoder.layers.0.mhfw.mhfw.1.conv2.bias", "decoder.layers.0.mhfw.mhfw.2.conv1.weight", "decoder.layers.0.mhfw.mhfw.2.conv1.bias", "decoder.layers.0.mhfw.mhfw.2.conv2.weight", "decoder.layers.0.mhfw.mhfw.2.conv2.bias", "decoder.layers.0.mhfw.mhfw.3.conv1.weight", "decoder.layers.0.mhfw.mhfw.3.conv1.bias", "decoder.layers.0.mhfw.mhfw.3.conv2.weight", "decoder.layers.0.mhfw.mhfw.3.conv2.bias", "decoder.layers.0.norm1.weight", "decoder.layers.0.norm1.bias", "decoder.layers.0.norm2.weight", "decoder.layers.0.norm2.bias", "decoder.layers.0.norm3.weight", "decoder.layers.0.norm3.bias", "decoder.norm.weight", "decoder.norm.bias", "projection_decoder.weight", "projection_decoder.bias". Unexpected key(s) in state_dict: "edding.value_embedding.tokenConv.weight", "edding.value_embedding.tokenConv.bias", "edding.position_embedding.pe", ".attn_layers.0.attention.query_projection.weight", ".attn_layers.0.attention.query_projection.bias", ".attn_layers.0.attention.key_projection.weight", ".attn_layers.0.attention.key_projection.bias", ".attn_layers.0.attention.value_projection.weight", ".attn_layers.0.attention.value_projection.bias", ".attn_layers.0.attention.out_projection.weight", ".attn_layers.0.attention.out_projection.bias", ".attn_layers.0.mhfw.mhfw.0.conv1.weight", ".attn_layers.0.mhfw.mhfw.0.conv1.bias", ".attn_layers.0.mhfw.mhfw.0.conv2.weight", ".attn_layers.0.mhfw.mhfw.0.conv2.bias", ".attn_layers.0.mhfw.mhfw.1.conv1.weight", ".attn_layers.0.mhfw.mhfw.1.conv1.bias", ".attn_layers.0.mhfw.mhfw.1.conv2.weight", ".attn_layers.0.mhfw.mhfw.1.conv2.bias", ".attn_layers.0.mhfw.mhfw.2.conv1.weight", ".attn_layers.0.mhfw.mhfw.2.conv1.bias", ".attn_layers.0.mhfw.mhfw.2.conv2.weight", ".attn_layers.0.mhfw.mhfw.2.conv2.bias", ".attn_layers.0.mhfw.mhfw.3.conv1.weight", ".attn_layers.0.mhfw.mhfw.3.conv1.bias", ".attn_layers.0.mhfw.mhfw.3.conv2.weight", ".attn_layers.0.mhfw.mhfw.3.conv2.bias", ".attn_layers.0.norm1.weight", ".attn_layers.0.norm1.bias", ".attn_layers.0.norm2.weight", ".attn_layers.0.norm2.bias", ".attn_layers.1.attention.query_projection.weight", ".attn_layers.1.attention.query_projection.bias", ".attn_layers.1.attention.key_projection.weight", ".attn_layers.1.attention.key_projection.bias", ".attn_layers.1.attention.value_projection.weight", ".attn_layers.1.attention.value_projection.bias", ".attn_layers.1.attention.out_projection.weight", ".attn_layers.1.attention.out_projection.bias", ".attn_layers.1.mhfw.mhfw.0.conv1.weight", ".attn_layers.1.mhfw.mhfw.0.conv1.bias", ".attn_layers.1.mhfw.mhfw.0.conv2.weight", ".attn_layers.1.mhfw.mhfw.0.conv2.bias", ".attn_layers.1.mhfw.mhfw.1.conv1.weight", ".attn_layers.1.mhfw.mhfw.1.conv1.bias", ".attn_layers.1.mhfw.mhfw.1.conv2.weight", ".attn_layers.1.mhfw.mhfw.1.conv2.bias", ".attn_layers.1.mhfw.mhfw.2.conv1.weight", ".attn_layers.1.mhfw.mhfw.2.conv1.bias", ".attn_layers.1.mhfw.mhfw.2.conv2.weight", ".attn_layers.1.mhfw.mhfw.2.conv2.bias", ".attn_layers.1.mhfw.mhfw.3.conv1.weight", ".attn_layers.1.mhfw.mhfw.3.conv1.bias", ".attn_layers.1.mhfw.mhfw.3.conv2.weight", ".attn_layers.1.mhfw.mhfw.3.conv2.bias", ".attn_layers.1.norm1.weight", ".attn_layers.1.norm1.bias", ".attn_layers.1.norm2.weight", ".attn_layers.1.norm2.bias", ".norm.weight", ".norm.bias", ".layers.0.self_attention.query_projection.weight", ".layers.0.self_attention.query_projection.bias", ".layers.0.self_attention.key_projection.weight", ".layers.0.self_attention.key_projection.bias", ".layers.0.self_attention.value_projection.weight", ".layers.0.self_attention.value_projection.bias", ".layers.0.self_attention.out_projection.weight", ".layers.0.self_attention.out_projection.bias", ".layers.0.cross_attention.query_projection.weight", ".layers.0.cross_attention.query_projection.bias", ".layers.0.cross_attention.key_projection.weight", ".layers.0.cross_attention.key_projection.bias", ".layers.0.cross_attention.value_projection.weight", ".layers.0.cross_attention.value_projection.bias", ".layers.0.cross_attention.out_projection.weight", ".layers.0.cross_attention.out_projection.bias", ".layers.0.mhfw.mhfw.0.conv1.weight", ".layers.0.mhfw.mhfw.0.conv1.bias", ".layers.0.mhfw.mhfw.0.conv2.weight", ".layers.0.mhfw.mhfw.0.conv2.bias", ".layers.0.mhfw.mhfw.1.conv1.weight", ".layers.0.mhfw.mhfw.1.conv1.bias", ".layers.0.mhfw.mhfw.1.conv2.weight", ".layers.0.mhfw.mhfw.1.conv2.bias", ".layers.0.mhfw.mhfw.2.conv1.weight", ".layers.0.mhfw.mhfw.2.conv1.bias", ".layers.0.mhfw.mhfw.2.conv2.weight", ".layers.0.mhfw.mhfw.2.conv2.bias", ".layers.0.mhfw.mhfw.3.conv1.weight", ".layers.0.mhfw.mhfw.3.conv1.bias", ".layers.0.mhfw.mhfw.3.conv2.weight", ".layers.0.mhfw.mhfw.3.conv2.bias", ".layers.0.norm1.weight", ".layers.0.norm1.bias", ".layers.0.norm2.weight", ".layers.0.norm2.bias", ".layers.0.norm3.weight", ".layers.0.norm3.bias", "ion_decoder.weight", "ion_decoder.bias". 进程已结束，退出代码为 1 peter Original: From：Hugo @.>Date：2024-04-15 16:57:47(中国 (GMT+08:00))To：gsyyysg/StockFormer @.>Cc：peterdong168 @.> , Author @.>Subject：Re: [gsyyysg/StockFormer] 报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错 (Issue #8) 具体报错是啥？ — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.> 我只看得出来是加载模型权重文件出了问题...... — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.>

方便在说一下你是怎么触发的这个报错吗我看看能不能复现出你的这个报错

peterdong168 @.***> 于2024年4月15日周一 22:36写道：

请问，在这种情况下，如何修改才能避免，执行train_rl.py时报错？

peter

原始邮件: 发件人：Hugo @.>发送日期：2024-04-15 21:22:08(中国 (GMT+08:00))收件人：gsyyysg/StockFormer @.>抄送：peterdong168 @.> , Author @.>主题：Re: [gsyyysg/StockFormer] 报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错 (Issue #8) 以下为报错信息（备注，在没有替换第一阶段模型之前，基于pretrained原来的模型，运行train_rl.py，是正常的）： RuntimeError: Error(s) in loading state_dict for Transformer_base: Missing key(s) in state_dict: "enc_embedding.value_embedding.tokenConv.weight", "enc_embedding.value_embedding.tokenConv.bias", " enc_embedding.position_embedding.pe", "dec_embedding.value_embedding.tokenConv.weight", "dec_embedding.value_embedding.tokenConv.bias", " dec_embedding.position_embedding.pe", "encoder.attn_layers.0.attention.query_projection.weight", "encoder.attn_layers.0.attention.query_projection.bias", "encoder.attn_layers.0.attention.key_projection.weight", "encoder.attn_layers.0.attention.key_projection.bias", "encoder.attn_layers.0.attention.value_projection.weight", "encoder.attn_layers.0.attention.value_projection.bias", "encoder.attn_layers.0.attention.out_projection.weight", "encoder.attn_layers.0.attention.out_projection.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.0.norm1.weight", "encoder.attn_layers.0.norm1.bias", "encoder.attn_layers.0.norm2.weight", "encoder.attn_layers.0.norm2.bias", "encoder.attn_layers.1.attention.query_projection.weight", "encoder.attn_layers.1.attention.query_projection.bias", "encoder.attn_layers.1.attention.key_projection.weight", "encoder.attn_layers.1.attention.key_projection.bias", "encoder.attn_layers.1.attention.value_projection.weight", "encoder.attn_layers.1.attention.value_projection.bias", "encoder.attn_layers.1.attention.out_projection.weight", "encoder.attn_layers.1.attention.out_projection.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.1.norm1.weight", "encoder.attn_layers.1.norm1.bias", "encoder.attn_layers.1.norm2.weight", "encoder.attn_layers.1.norm2.bias", "encoder.norm.weight", "encoder.norm.bias", "decoder.layers.0.self_attention.query_projection.weight", "decoder.layers.0.self_attention.query_projection.bias", "decoder.layers.0.self_attention.key_projection.weight", "decoder.layers.0.self_attention.key_projection.bias", "decoder.layers.0.self_attention.value_projection.weight", "decoder.layers.0.self_attention.value_projection.bias", "decoder.layers.0.self_attention.out_projection.weight", "decoder.layers.0.self_attention.out_projection.bias", "decoder.layers.0.cross_attention.query_projection.weight", "decoder.layers.0.cross_attention.query_projection.bias", "decoder.layers.0.cross_attention.key_projection.weight", "decoder.layers.0.cross_attention.key_projection.bias", "decoder.layers.0.cross_attention.value_projection.weight", "decoder.layers.0.cross_attention.value_projection.bias", "decoder.layers.0.cross_attention.out_projection.weight", "decoder.layers.0.cross_attention.out_projection.bias", "decoder.layers.0.mhfw.mhfw.0.conv1.weight", "decoder.layers.0.mhfw.mhfw.0.conv1.bias", "decoder.layers.0.mhfw.mhfw.0.conv2.weight", "decoder.layers.0.mhfw.mhfw.0.conv2.bias", "decoder.layers.0.mhfw.mhfw.1.conv1.weight", "decoder.layers.0.mhfw.mhfw.1.conv1.bias", "decoder.layers.0.mhfw.mhfw.1.conv2.weight", "decoder.layers.0.mhfw.mhfw.1.conv2.bias", "decoder.layers.0.mhfw.mhfw.2.conv1.weight", "decoder.layers.0.mhfw.mhfw.2.conv1.bias", "decoder.layers.0.mhfw.mhfw.2.conv2.weight", "decoder.layers.0.mhfw.mhfw.2.conv2.bias", "decoder.layers.0.mhfw.mhfw.3.conv1.weight", "decoder.layers.0.mhfw.mhfw.3.conv1.bias", "decoder.layers.0.mhfw.mhfw.3.conv2.weight", "decoder.layers.0.mhfw.mhfw.3.conv2.bias", "decoder.layers.0.norm1.weight", "decoder.layers.0.norm1.bias", "decoder.layers.0.norm2.weight", "decoder.layers.0.norm2.bias", "decoder.layers.0.norm3.weight", "decoder.layers.0.norm3.bias", "decoder.norm.weight", "decoder.norm.bias", "projection_decoder.weight", "projection_decoder.bias". Unexpected key(s) in state_dict: "edding.value_embedding.tokenConv.weight", "edding.value_embedding.tokenConv.bias", "edding.position_embedding.pe", ".attn_layers.0.attention.query_projection.weight", ".attn_layers.0.attention.query_projection.bias", ".attn_layers.0.attention.key_projection.weight", ".attn_layers.0.attention.key_projection.bias", ".attn_layers.0.attention.value_projection.weight", ".attn_layers.0.attention.value_projection.bias", ".attn_layers.0.attention.out_projection.weight", ".attn_layers.0.attention.out_projection.bias", ".attn_layers.0.mhfw.mhfw.0.conv1.weight", ".attn_layers.0.mhfw.mhfw.0.conv1.bias", ".attn_layers.0.mhfw.mhfw.0.conv2.weight", ".attn_layers.0.mhfw.mhfw.0.conv2.bias", ".attn_layers.0.mhfw.mhfw.1.conv1.weight", ".attn_layers.0.mhfw.mhfw.1.conv1.bias", ".attn_layers.0.mhfw.mhfw.1.conv2.weight", ".attn_layers.0.mhfw.mhfw.1.conv2.bias", ".attn_layers.0.mhfw.mhfw.2.conv1.weight", ".attn_layers.0.mhfw.mhfw.2.conv1.bias", ".attn_layers.0.mhfw.mhfw.2.conv2.weight", ".attn_layers.0.mhfw.mhfw.2.conv2.bias", ".attn_layers.0.mhfw.mhfw.3.conv1.weight", ".attn_layers.0.mhfw.mhfw.3.conv1.bias", ".attn_layers.0.mhfw.mhfw.3.conv2.weight", ".attn_layers.0.mhfw.mhfw.3.conv2.bias", ".attn_layers.0.norm1.weight", ".attn_layers.0.norm1.bias", ".attn_layers.0.norm2.weight", ".attn_layers.0.norm2.bias", ".attn_layers.1.attention.query_projection.weight", ".attn_layers.1.attention.query_projection.bias", ".attn_layers.1.attention.key_projection.weight", ".attn_layers.1.attention.key_projection.bias", ".attn_layers.1.attention.value_projection.weight", ".attn_layers.1.attention.value_projection.bias", ".attn_layers.1.attention.out_projection.weight", ".attn_layers.1.attention.out_projection.bias", ".attn_layers.1.mhfw.mhfw.0.conv1.weight", ".attn_layers.1.mhfw.mhfw.0.conv1.bias", ".attn_layers.1.mhfw.mhfw.0.conv2.weight", ".attn_layers.1.mhfw.mhfw.0.conv2.bias", ".attn_layers.1.mhfw.mhfw.1.conv1.weight", ".attn_layers.1.mhfw.mhfw.1.conv1.bias", ".attn_layers.1.mhfw.mhfw.1.conv2.weight", ".attn_layers.1.mhfw.mhfw.1.conv2.bias", ".attn_layers.1.mhfw.mhfw.2.conv1.weight", ".attn_layers.1.mhfw.mhfw.2.conv1.bias", ".attn_layers.1.mhfw.mhfw.2.conv2.weight", ".attn_layers.1.mhfw.mhfw.2.conv2.bias", ".attn_layers.1.mhfw.mhfw.3.conv1.weight", ".attn_layers.1.mhfw.mhfw.3.conv1.bias", ".attn_layers.1.mhfw.mhfw.3.conv2.weight", ".attn_layers.1.mhfw.mhfw.3.conv2.bias", ".attn_layers.1.norm1.weight", ".attn_layers.1.norm1.bias", ".attn_layers.1.norm2.weight", ".attn_layers.1.norm2.bias", ".norm.weight", ".norm.bias", ".layers.0.self_attention.query_projection.weight", ".layers.0.self_attention.query_projection.bias", ".layers.0.self_attention.key_projection.weight", ".layers.0.self_attention.key_projection.bias", ".layers.0.self_attention.value_projection.weight", ".layers.0.self_attention.value_projection.bias", ".layers.0.self_attention.out_projection.weight", ".layers.0.self_attention.out_projection.bias", ".layers.0.cross_attention.query_projection.weight", ".layers.0.cross_attention.query_projection.bias", ".layers.0.cross_attention.key_projection.weight", ".layers.0.cross_attention.key_projection.bias", ".layers.0.cross_attention.value_projection.weight", ".layers.0.cross_attention.value_projection.bias", ".layers.0.cross_attention.out_projection.weight", ".layers.0.cross_attention.out_projection.bias", ".layers.0.mhfw.mhfw.0.conv1.weight", ".layers.0.mhfw.mhfw.0.conv1.bias", ".layers.0.mhfw.mhfw.0.conv2.weight", ".layers.0.mhfw.mhfw.0.conv2.bias", ".layers.0.mhfw.mhfw.1.conv1.weight", ".layers.0.mhfw.mhfw.1.conv1.bias", ".layers.0.mhfw.mhfw.1.conv2.weight", ".layers.0.mhfw.mhfw.1.conv2.bias", ".layers.0.mhfw.mhfw.2.conv1.weight", ".layers.0.mhfw.mhfw.2.conv1.bias", ".layers.0.mhfw.mhfw.2.conv2.weight", ".layers.0.mhfw.mhfw.2.conv2.bias", ".layers.0.mhfw.mhfw.3.conv1.weight", ".layers.0.mhfw.mhfw.3.conv1.bias", ".layers.0.mhfw.mhfw.3.conv2.weight", ".layers.0.mhfw.mhfw.3.conv2.bias", ".layers.0.norm1.weight", ".layers.0.norm1.bias", ".layers.0.norm2.weight", ".layers.0.norm2.bias", ".layers.0.norm3.weight", ".layers.0.norm3.bias", "ion_decoder.weight", "ion_decoder.bias". 进程已结束，退出代码为 1 peter Original: From：Hugo @.>Date：2024-04-15 16:57:47(中国 (GMT+08:00))To：gsyyysg/StockFormer @.>Cc：peterdong168 @.> , Author @.>Subject：Re: [gsyyysg/StockFormer] 报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错 (Issue #8) 具体报错是啥？ — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.> 我只看得出来是加载模型权重文件出了问题...... — Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.>

— Reply to this email directly, view it on GitHub https://github.com/gsyyysg/StockFormer/issues/8#issuecomment-2057016904, or unsubscribe https://github.com/notifications/unsubscribe-auth/ADTD44ZL2JESOD4RACG4W2TY5PQW7AVCNFSM6AAAAABGGBBTBGVHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDANJXGAYTMOJQGQ . You are receiving this because you commented.Message ID: @.***>

我先是运行第一阶段（train_mae.sh, train_pred_long.sh, train_short.sh），得到.pth文件，用3个文件替换了pretrained文件夹下的3个.pth文件，再运行train_rl.py，就报错了。（备注：如果不替换，运行train_rl.py，是没有问题的）

似乎是这段代码的问题，不明白作者为什么这样做，直接load参数就好了

多谢！我找时间去试试直接load参数。

多谢！我找时间去试试直接load参数。

我也遇到同样的报错，请问你解决了吗？

Remove Prefix if Necessary: In your code, you're stripping the first 7 characters of each key in the state dict (name = k[7:]). This is often done to remove a module prefix added by PyTorch models when they are wrapped in a nn.DataParallel or nn.ModuleList. If your saved model wasn't saved with such a wrapper, this step could be causing the mismatch. Verify whether this step is necessary or not. gpt4似乎找出问题了，应该是单卡训练生成的模型这个地方会报错 StockFormer/code/envs/env_stocktrading_hybrid_control.py", line 513 StockFormer/code/MySAC/SAC/MAE_SAC.py", line 175

Remove Prefix if Necessary: In your code, you're stripping the first 7 characters of each key in the state dict (name = k[7:]). This is often done to remove a module prefix added by PyTorch models when they are wrapped in a nn.DataParallel or nn.ModuleList. If your saved model wasn't saved with such a wrapper, this step could be causing the mismatch. Verify whether this step is necessary or not. gpt4似乎找出问题了，应该是单卡训练生成的模型这个地方会报错 StockFormer/code/envs/env_stocktrading_hybrid_control.py", line 513 StockFormer/code/MySAC/SAC/MAE_SAC.py", line 175

我也遇到了这个问题，按照楼上的做法直接load模型没有解决，你有其他的方法吗？

似乎是这段代码的问题，不明白作者为什么这样做，直接load参数就好了

您好，我想问一下您是不是env和MAE_SAC的load部分都需要更改呢？按您改了之后抛出了另外的一个错误，想问您一下是不是还有其他部分需要更改？ Traceback (most recent call last): File "/content/drive/MyDrive/Colab/StockFormer-main/code/train_rl.py", line 169, in eval_trade_gym = Env(df = eval, **env_kwargs_test) File "/content/drive/MyDrive/Colab/StockFormer-main/code/envs/env_stocktrading_hybrid_control.py", line 105, in init self.short_prediction_model = self.load_model(short_prediction_model_path).to(self.device) File "/content/drive/MyDrive/Colab/StockFormer-main/code/envs/env_stocktrading_hybrid_control.py", line 513, in load_model model.load_state_dict(state_dict) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 2189, in load_state_dict raise RuntimeError('Error(s) in loading state_dict for {}:\n\t{}'.format( RuntimeError: Error(s) in loading state_dict for Transformer_base: Missing key(s) in state_dict: "enc_embedding.value_embedding.tokenConv.weight", "enc_embedding.value_embedding.tokenConv.bias", "enc_embedding.position_embedding.pe", "dec_embedding.value_embedding.tokenConv.weight", "dec_embedding.value_embedding.tokenConv.bias", "dec_embedding.position_embedding.pe", "encoder.attn_layers.0.attention.query_projection.weight", "encoder.attn_layers.0.attention.query_projection.bias", "encoder.attn_layers.0.attention.key_projection.weight", "encoder.attn_layers.0.attention.key_projection.bias", "encoder.attn_layers.0.attention.value_projection.weight", "encoder.attn_layers.0.attention.value_projection.bias", "encoder.attn_layers.0.attention.out_projection.weight", "encoder.attn_layers.0.attention.out_projection.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.0.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.0.norm1.weight", "encoder.attn_layers.0.norm1.bias", "encoder.attn_layers.0.norm2.weight", "encoder.attn_layers.0.norm2.bias", "encoder.attn_layers.1.attention.query_projection.weight", "encoder.attn_layers.1.attention.query_projection.bias", "encoder.attn_layers.1.attention.key_projection.weight", "encoder.attn_layers.1.attention.key_projection.bias", "encoder.attn_layers.1.attention.value_projection.weight", "encoder.attn_layers.1.attention.value_projection.bias", "encoder.attn_layers.1.attention.out_projection.weight", "encoder.attn_layers.1.attention.out_projection.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.0.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.1.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.2.conv2.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv1.bias", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.weight", "encoder.attn_layers.1.mhfw.mhfw.3.conv2.bias", "encoder.attn_layers.1.norm1.weight", "encoder.attn_layers.1.norm1.bias", "encoder.attn_layers.1.norm2.weight", "encoder.attn_layers.1.norm2.bias", "encoder.norm.weight", "encoder.norm.bias", "decoder.layers.0.self_attention.query_projection.weight", "decoder.layers.0.self_attention.query_projection.bias", "decoder.layers.0.self_attention.key_projection.weight", "decoder.layers.0.self_attention.key_projection.bias", "decoder.layers.0.self_attention.value_projection.weight", "decoder.layers.0.self_attention.value_projection.bias", "decoder.layers.0.self_attention.out_projection.weight", "decoder.layers.0.self_attention.out_projection.bias", "decoder.layers.0.cross_attention.query_projection.weight", "decoder.layers.0.cross_attention.query_projection.bias", "decoder.layers.0.cross_attention.key_projection.weight", "decoder.layers.0.cross_attention.key_projection.bias", "decoder.layers.0.cross_attention.value_projection.weight", "decoder.layers.0.cross_attention.value_projection.bias", "decoder.layers.0.cross_attention.out_projection.weight", "decoder.layers.0.cross_attention.out_projection.bias", "decoder.layers.0.mhfw.mhfw.0.conv1.weight", "decoder.layers.0.mhfw.mhfw.0.conv1.bias", "decoder.layers.0.mhfw.mhfw.0.conv2.weight", "decoder.layers.0.mhfw.mhfw.0.conv2.bias", "decoder.layers.0.mhfw.mhfw.1.conv1.weight", "decoder.layers.0.mhfw.mhfw.1.conv1.bias", "decoder.layers.0.mhfw.mhfw.1.conv2.weight", "decoder.layers.0.mhfw.mhfw.1.conv2.bias", "decoder.layers.0.mhfw.mhfw.2.conv1.weight", "decoder.layers.0.mhfw.mhfw.2.conv1.bias", "decoder.layers.0.mhfw.mhfw.2.conv2.weight", "decoder.layers.0.mhfw.mhfw.2.conv2.bias", "decoder.layers.0.mhfw.mhfw.3.conv1.weight", "decoder.layers.0.mhfw.mhfw.3.conv1.bias", "decoder.layers.0.mhfw.mhfw.3.conv2.weight", "decoder.layers.0.mhfw.mhfw.3.conv2.bias", "decoder.layers.0.norm1.weight", "decoder.layers.0.norm1.bias", "decoder.layers.0.norm2.weight", "decoder.layers.0.norm2.bias", "decoder.layers.0.norm3.weight", "decoder.layers.0.norm3.bias", "decoder.norm.weight", "decoder.norm.bias", "projection_decoder.weight", "projection_decoder.bias". Unexpected key(s) in state_dict: "module.enc_embedding.value_embedding.tokenConv.weight", "module.enc_embedding.value_embedding.tokenConv.bias", "module.enc_embedding.position_embedding.pe", "module.dec_embedding.value_embedding.tokenConv.weight", "module.dec_embedding.value_embedding.tokenConv.bias", "module.dec_embedding.position_embedding.pe", "module.encoder.attn_layers.0.attention.query_projection.weight", "module.encoder.attn_layers.0.attention.query_projection.bias", "module.encoder.attn_layers.0.attention.key_projection.weight", "module.encoder.attn_layers.0.attention.key_projection.bias", "module.encoder.attn_layers.0.attention.value_projection.weight", "module.encoder.attn_layers.0.attention.value_projection.bias", "module.encoder.attn_layers.0.attention.out_projection.weight", "module.encoder.attn_layers.0.attention.out_projection.bias", "module.encoder.attn_layers.0.mhfw.mhfw.0.conv1.weight", "module.encoder.attn_layers.0.mhfw.mhfw.0.conv1.bias", "module.encoder.attn_layers.0.mhfw.mhfw.0.conv2.weight", "module.encoder.attn_layers.0.mhfw.mhfw.0.conv2.bias", "module.encoder.attn_layers.0.mhfw.mhfw.1.conv1.weight", "module.encoder.attn_layers.0.mhfw.mhfw.1.conv1.bias", "module.encoder.attn_layers.0.mhfw.mhfw.1.conv2.weight", "module.encoder.attn_layers.0.mhfw.mhfw.1.conv2.bias", "module.encoder.attn_layers.0.mhfw.mhfw.2.conv1.weight", "module.encoder.attn_layers.0.mhfw.mhfw.2.conv1.bias", "module.encoder.attn_layers.0.mhfw.mhfw.2.conv2.weight", "module.encoder.attn_layers.0.mhfw.mhfw.2.conv2.bias", "module.encoder.attn_layers.0.mhfw.mhfw.3.conv1.weight", "module.encoder.attn_layers.0.mhfw.mhfw.3.conv1.bias", "module.encoder.attn_layers.0.mhfw.mhfw.3.conv2.weight", "module.encoder.attn_layers.0.mhfw.mhfw.3.conv2.bias", "module.encoder.attn_layers.0.norm1.weight", "module.encoder.attn_layers.0.norm1.bias", "module.encoder.attn_layers.0.norm2.weight", "module.encoder.attn_layers.0.norm2.bias", "module.encoder.attn_layers.1.attention.query_projection.weight", "module.encoder.attn_layers.1.attention.query_projection.bias", "module.encoder.attn_layers.1.attention.key_projection.weight", "module.encoder.attn_layers.1.attention.key_projection.bias", "module.encoder.attn_layers.1.attention.value_projection.weight", "module.encoder.attn_layers.1.attention.value_projection.bias", "module.encoder.attn_layers.1.attention.out_projection.weight", "module.encoder.attn_layers.1.attention.out_projection.bias", "module.encoder.attn_layers.1.mhfw.mhfw.0.conv1.weight", "module.encoder.attn_layers.1.mhfw.mhfw.0.conv1.bias", "module.encoder.attn_layers.1.mhfw.mhfw.0.conv2.weight", "module.encoder.attn_layers.1.mhfw.mhfw.0.conv2.bias", "module.encoder.attn_layers.1.mhfw.mhfw.1.conv1.weight", "module.encoder.attn_layers.1.mhfw.mhfw.1.conv1.bias", "module.encoder.attn_layers.1.mhfw.mhfw.1.conv2.weight", "module.encoder.attn_layers.1.mhfw.mhfw.1.conv2.bias", "module.encoder.attn_layers.1.mhfw.mhfw.2.conv1.weight", "module.encoder.attn_layers.1.mhfw.mhfw.2.conv1.bias", "module.encoder.attn_layers.1.mhfw.mhfw.2.conv2.weight", "module.encoder.attn_layers.1.mhfw.mhfw.2.conv2.bias", "module.encoder.attn_layers.1.mhfw.mhfw.3.conv1.weight", "module.encoder.attn_layers.1.mhfw.mhfw.3.conv1.bias", "module.encoder.attn_layers.1.mhfw.mhfw.3.conv2.weight", "module.encoder.attn_layers.1.mhfw.mhfw.3.conv2.bias", "module.encoder.attn_layers.1.norm1.weight", "module.encoder.attn_layers.1.norm1.bias", "module.encoder.attn_layers.1.norm2.weight", "module.encoder.attn_layers.1.norm2.bias", "module.encoder.norm.weight", "module.encoder.norm.bias", "module.decoder.layers.0.self_attention.query_projection.weight", "module.decoder.layers.0.self_attention.query_projection.bias", "module.decoder.layers.0.self_attention.key_projection.weight", "module.decoder.layers.0.self_attention.key_projection.bias", "module.decoder.layers.0.self_attention.value_projection.weight", "module.decoder.layers.0.self_attention.value_projection.bias", "module.decoder.layers.0.self_attention.out_projection.weight", "module.decoder.layers.0.self_attention.out_projection.bias", "module.decoder.layers.0.cross_attention.query_projection.weight", "module.decoder.layers.0.cross_attention.query_projection.bias", "module.decoder.layers.0.cross_attention.key_projection.weight", "module.decoder.layers.0.cross_attention.key_projection.bias", "module.decoder.layers.0.cross_attention.value_projection.weight", "module.decoder.layers.0.cross_attention.value_projection.bias", "module.decoder.layers.0.cross_attention.out_projection.weight", "module.decoder.layers.0.cross_attention.out_projection.bias", "module.decoder.layers.0.mhfw.mhfw.0.conv1.weight", "module.decoder.layers.0.mhfw.mhfw.0.conv1.bias", "module.decoder.layers.0.mhfw.mhfw.0.conv2.weight", "module.decoder.layers.0.mhfw.mhfw.0.conv2.bias", "module.decoder.layers.0.mhfw.mhfw.1.conv1.weight", "module.decoder.layers.0.mhfw.mhfw.1.conv1.bias", "module.decoder.layers.0.mhfw.mhfw.1.conv2.weight", "module.decoder.layers.0.mhfw.mhfw.1.conv2.bias", "module.decoder.layers.0.mhfw.mhfw.2.conv1.weight", "module.decoder.layers.0.mhfw.mhfw.2.conv1.bias", "module.decoder.layers.0.mhfw.mhfw.2.conv2.weight", "module.decoder.layers.0.mhfw.mhfw.2.conv2.bias", "module.decoder.layers.0.mhfw.mhfw.3.conv1.weight", "module.decoder.layers.0.mhfw.mhfw.3.conv1.bias", "module.decoder.layers.0.mhfw.mhfw.3.conv2.weight", "module.decoder.layers.0.mhfw.mhfw.3.conv2.bias", "module.decoder.layers.0.norm1.weight", "module.decoder.layers.0.norm1.bias", "module.decoder.layers.0.norm2.weight", "module.decoder.layers.0.norm2.bias", "module.decoder.layers.0.norm3.weight", "module.decoder.layers.0.norm3.bias", "module.decoder.norm.weight", "module.decoder.norm.bias", "module.projection_decoder.weight", "module.projection_decoder.bias".

gsyyysg / StockFormer

报错：在第一阶段训练完成后，选出模型用于train_rl.py时，报错。train_rl用作者训练好的模型，运行正常，不会报错 #8