Describe the bug
When using the Latest Deepspeed with GPT-NeoX:
Traceback (most recent call last):
File "/home/hatef.4/neox/gpt-neox/train.py", line 35, in <module>
main()
File "/home/hatef.4/neox/gpt-neox/train.py", line 31, in main
pretrain(neox_args=neox_args)
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 296, in pretrain
iteration = train(
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 1465, in train
loss_dict, skipped_iter = train_step(
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 1277, in train_step
reduced_loss = train_step_pipe(
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 1374, in train_step_pipe
Traceback (most recent call last):
loss = model.train_batch(data_iter=data_iterator)
File "/home/hatef.4/neox/DeepSpeed/deepspeed/runtime/pipe/engine.py", line 388, in train_batch
self._exec_schedule(sched)
File "/home/hatef.4/neox/DeepSpeed/deepspeed/runtime/pipe/engine.py", line 1420, in _exec_schedule
File "/home/hatef.4/neox/gpt-neox/train.py", line 35, in <module>
main()
File "/home/hatef.4/neox/gpt-neox/train.py", line 31, in main
pretrain(neox_args=neox_args)
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 296, in pretrain
iteration = train(
self._exec_instr(**cmd.kwargs)
File "/home/hatef.4/neox/DeepSpeed/deepspeed/runtime/pipe/engine.py", line 1236, in _exec_optimizer_step
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 1465, in train
loss_dict, skipped_iter = train_step(
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 1277, in train_step
reduced_loss = train_step_pipe(
File "/home/hatef.4/neox/gpt-neox/megatron/training.py", line 1374, in train_step_pipe
loss = model.train_batch(data_iter=data_iterator)
if self.global_steps % self.steps_per_print() == 0:
TypeError: File "/home/hatef.4/neox/DeepSpeed/deepspeed/runtime/pipe/engine.py", line 388, in train_batch
self._exec_schedule(sched)
unsupported operand type(s) for %: 'int' and 'NoneType'
File "/home/hatef.4/neox/DeepSpeed/deepspeed/runtime/pipe/engine.py", line 1420, in _exec_schedule
self._exec_instr(**cmd.kwargs)
File "/home/hatef.4/neox/DeepSpeed/deepspeed/runtime/pipe/engine.py", line 1236, in _exec_optimizer_step
if self.global_steps % self.steps_per_print() == 0:
TypeError: unsupported operand type(s) for %: 'int' and 'NoneType'
Describe the bug When using the Latest Deepspeed with GPT-NeoX:
To Reproduce Steps to reproduce the behavior:
1-3B.yml
config