Open Aishwarya-Kumaran opened 1 year ago
File output
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
Traceback (most recent call last):
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 478, in <module>
main()
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/dist_clm_train.py", line 397, in main
init_communicators(args)
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/comm_utils.py", line 103, in init_communicators
_PIPELINE_PARALLEL_COMM = NCCLCommunicator(_PIPELINE_PARALLEL_RANK, args.cuda_id, args.pipeline_group_size,
File "/mnt/camelot/Team4/fall23/llama2/Book/OpenChatKit-main/training/comm/nccl_backend.py", line 31, in __init__
cupy.cuda.Device(cuda_id).use()
File "cupy/cuda/device.pyx", line 192, in cupy.cuda.device.Device.use
File "cupy/cuda/device.pyx", line 198, in cupy.cuda.device.Device.use
File "cupy_backends/cuda/api/runtime.pyx", line 375, in cupy_backends.cuda.api.runtime.setDevice
File "cupy_backends/cuda/api/runtime.pyx", line 144, in cupy_backends.cuda.api.runtime.check_status
cupy_backends.cuda.api.runtime.CUDARuntimeError: cudaErrorInvalidDevice: invalid device ordinal
output_and_error.log Describe the bug A clear and concise description of what the bug is.
To Reproduce Steps to reproduce the behavior:
Expected behavior A clear and concise description of what you expected to happen.
Screenshots If applicable, add screenshots to help explain your problem.
Desktop (please complete the following information):
Smartphone (please complete the following information):
Additional context Add any other context about the problem here.