Zarr-based strategies will not be registered because of missing packages
Traceback (most recent call last):
File "tools/preprocess_data.py", line 23, in <module>
from megatron.tokenizer import build_tokenizer
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/__init__.py", line 16, in <module>
from .initialize import initialize_megatron
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/initialize.py", line 18, in <module>
from megatron.arguments import parse_args, validate_args
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/arguments.py", line 16, in <module>
from megatron.core.models.retro import RetroConfig
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/core/models/retro/__init__.py", line 4, in <module>
from .decoder_spec import get_retro_decoder_block_spec
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/core/models/retro/decoder_spec.py", line 5, in <module>
from megatron.core.models.gpt.gpt_layer_specs import (
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/core/models/gpt/__init__.py", line 1, in <module>
from .gpt_model import GPTModel
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/core/models/gpt/gpt_model.py", line 17, in <module>
from megatron.core.transformer.transformer_block import TransformerBlock
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/core/transformer/transformer_block.py", line 16, in <module>
from megatron.core.transformer.custom_layers.transformer_engine import (
File "/n/home06/zhentingqi/LLM_safety/Megatron-LM/megatron/core/transformer/custom_layers/transformer_engine.py", line 7, in <module>
import transformer_engine as te
ModuleNotFoundError: No module named 'transformer_engine'
Describe the bug I am running data preprocessing script using the following command:
And I run into the following error:
Environment: