apply_query_key_layer_scaling

ypeError: TransformerLayer.init() got an unexpected keyword argument 'apply_query_key_layer_scaling' [rank2]: Traceback (most recent call last): [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/examples/llama2/pretrain_megatron_llama.py", line 125, in [rank2]: pretrain(train_valid_test_datasets_provider, model_provider, [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/megatron_patch/training.py", line 116, in pretrain [rank2]: model, optimizer, opt_param_scheduler = setup_model_and_optimizer( [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/Megatron-LM-240612/megatron/training/training.py", line 501, in setup_model_and_optimizer [rank2]: model = get_model(model_provider_func, model_type) [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/Megatron-LM-240612/megatron/training/training.py", line 383, in get_model [rank2]: model = model_provider_func( [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/examples/llama2/pretrain_megatron_llama.py", line 38, in model_provider [rank2]: model = GPTModel( [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/megatron_patch/model/llama2/gpt_model.py", line 71, in init [rank2]: self.language_model, self._language_model_key = get_language_model( [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/megatron_patch/model/llama2/language_model.py", line 77, in get_language_model [rank2]: language_model = TransformerLanguageModel( [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/megatron_patch/model/llama2/language_model.py", line 395, in init [rank2]: self.encoder = ParallelTransformer( [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/megatron_patch/model/llama2/transformer.py", line 1633, in init [rank2]: [build_layer(i + 1 + offset) for i in range(self.num_layers)]) [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/megatron_patch/model/llama2/transformer.py", line 1633, in [rank2]: [build_layer(i + 1 + offset) for i in range(self.num_layers)]) [rank2]: File "/gpfs01/unifiedcsi/gpfs/csi-dfs-ti-platform-fs/wcp/vllm_test/torchtune/Pai-Megatron-Patch/megatron_patch/model/llama2/transformer.py", line 1559, in build_layer [rank2]: return transformer_engine.pytorch.TransformerLayer( [rank2]: TypeError: TransformerLayer.init() got an unexpected keyword argument 'apply_query_key_layer_scaling'

NVIDIA / TransformerEngine

apply_query_key_layer_scaling #1030