The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/ubuntu/wcl/xlstm/test.py", line 35, in
xlstm_stack = xLSTMBlockStack(cfg)
^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/xlstm_block_stack.py", line 83, in init
self.blocks = self._create_blocks(config=config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/xlstm_block_stack.py", line 104, in _create_blocks
blocks.append(sLSTMBlock(config=config))
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/block.py", line 32, in init
super().init(
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/xlstm_block.py", line 62, in init
self.xlstm = sLSTMLayer(config=self.config.slstm)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/layer.py", line 78, in init
self.slstm_cell = sLSTMCell(self.config)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 780, in new
return sLSTMCell_cuda(config, skip_backend_init=skip_backend_init)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 690, in init
self.func = sLSTMCellFuncGenerator(self.training, config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 536, in sLSTMCellFuncGenerator
slstm_cuda = sLSTMCellCUDA.instance(config=config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 515, in instance
cls.mod[repr(config)] = load(
^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda_init.py", line 84, in load
mod = _load(name + suffix, sources, **myargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 1306, in load
return _jit_compile(
^^^^^^^^^^^^^
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 1710, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 1823, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 2112, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0'`
`Emitting ninja build file /home/ubuntu/.cache/torch_extensions/py311_cu121/slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0/build.ninja... Building extension module slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) [1/7] /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_backward_cut.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=__nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -U__CUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_backward_cut.cu -o slstm_backward_cut.cuda.o FAILED: slstm_backward_cut.cuda.o /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_backward_cut.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=__nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_backward_cut.cu -o slstm_backward_cut.cuda.o In file included from /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.h:4019, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_api.h:77, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_v2.h:69, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_backward_cut.cu:22: /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.hpp:65:10: fatal error: nv/target: No such file or directory 65 | #include <nv/target> | ^
~~compilation terminated. [2/7] /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_forward.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=__nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_forward.cu -o slstm_forward.cuda.o FAILED: slstm_forward.cuda.o /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_forward.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=__nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_forward.cu -o slstm_forward.cuda.o In file included from /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.h:4019, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_api.h:77, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_v2.h:69, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_forward.cu:22: /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.hpp:65:10: fatal error: nv/target: No such file or directory 65 | #include <nv/target> | ^~~compilation terminated. [3/7] /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_backward.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=__nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -U__CUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_backward.cu -o slstm_backward.cuda.o FAILED: slstm_backward.cuda.o /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_backward.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=__nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_backward.cu -o slstm_backward.cuda.o In file included from /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.h:4019, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_api.h:77, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_v2.h:69, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_backward.cu:22: /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.hpp:65:10: fatal error: nv/target: No such file or directory 65 | #include <nv/target> | ^~~compilation terminated. [4/7] /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output blas.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=__nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/util/blas.cu -o blas.cuda.o FAILED: blas.cuda.o /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output blas.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=__nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/util/blas.cu -o blas.cuda.o In file included from /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.h:4019, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_api.h:77, from /home/ubuntu/anaconda3/envs/xlstm/include/cublas_v2.h:69, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/util/blas.h:23, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/util/blas.cu:21: /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.hpp:65:10: fatal error: nv/target: No such file or directory 65 | #include <nv/target> | ^~~compilation terminated. [5/7] /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_pointwise.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=__nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -U__CUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_pointwise.cu -o slstm_pointwise.cuda.o FAILED: slstm_pointwise.cuda.o /home/ubuntu/anaconda3/envs/xlstm/bin/nvcc --generate-dependencies-with-compile --dependency-output slstm_pointwise.cuda.o.d -ccbin /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-cc -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_BFLOAT16_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -Xptxas="-v" -gencode arch=compute_80,code=compute_80 -res-usage --use_fast_math -O3 -Xptxas -O3 --extra-device-vectorization -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=__nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -std=c++17 -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_pointwise.cu -o slstm_pointwise.cuda.o In file included from /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.h:4019, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/../util/inline_ops.cuh:248, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm_pointwise.cu:5: /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.hpp:65:10: fatal error: nv/target: No such file or directory 65 | #include <nv/target> | ^~~compilation terminated. [6/7] /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-c++ -MMD -MF slstm.o.d -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=nv_bfloat16 -DSLSTM_DTYPE_G=__nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -UCUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -UCUDA_NO_BFLOAT162_CONVERSIONS -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm.cc -o slstm.o FAILED: slstm.o /home/ubuntu/anaconda3/envs/xlstm/bin/x86_64-conda-linux-gnu-c++ -MMD -MF slstm.o.d -DTORCH_EXTENSION_NAME=slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0 -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/torch/csrc/api/include -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/TH -isystem /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/THC -isystem /home/ubuntu/anaconda3/envs/xlstm/include -isystem /home/ubuntu/anaconda3/envs/xlstm/include/python3.11 -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++17 -DSLSTM_HIDDEN_SIZE=128 -DSLSTM_BATCH_SIZE=8 -DSLSTM_NUM_HEADS=4 -DSLSTM_NUM_STATES=4 -DSLSTM_DTYPE_B=float -DSLSTM_DTYPE_R=nv_bfloat16 -DSLSTM_DTYPE_W=__nv_bfloat16 -DSLSTM_DTYPE_G=nv_bfloat16 -DSLSTM_DTYPE_S=nv_bfloat16 -DSLSTM_DTYPE_A=float -DSLSTM_NUM_GATES=4 -DSLSTM_SIMPLE_AGG=true -DSLSTM_GRADIENT_RECURRENT_CLIPVAL_VALID=false -DSLSTM_GRADIENT_RECURRENT_CLIPVAL=0.0 -DSLSTM_FORWARD_CLIPVAL_VALID=false -DSLSTM_FORWARD_CLIPVAL=0.0 -U__CUDA_NO_HALF_OPERATORS -UCUDA_NO_HALF_CONVERSIONS -UCUDA_NO_BFLOAT16_OPERATORS -UCUDA_NO_BFLOAT16_CONVERSIONS -UCUDA_NO_BFLOAT162_OPERATORS -U__CUDA_NO_BFLOAT162_CONVERSIONS__ -c /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm.cc -o slstm.o In file included from /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.h:4019, from /home/ubuntu/anaconda3/envs/xlstm/include/cusparse.h:59, from /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/ATen/cuda/CUDAContextLight.h:7, from /home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/include/ATen/cuda/CUDAContext.h:3, from /home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda/slstm.cc:23: /home/ubuntu/anaconda3/envs/xlstm/include/cuda_fp16.hpp:65:10: fatal error: nv/target: No such file or directory 65 | #include <nv/target> | ^~~compilation terminated. ninja: build stopped: subcommand failed. Traceback (most recent call last): File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 2096, in _run_ninja_build subprocess.run( File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/subprocess.py", line 571, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "/home/ubuntu/wcl/xlstm/test.py", line 35, in
xlstm_stack = xLSTMBlockStack(cfg)
^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/xlstm_block_stack.py", line 83, in init
self.blocks = self._create_blocks(config=config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/xlstm_block_stack.py", line 104, in _create_blocks
blocks.append(sLSTMBlock(config=config))
^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/block.py", line 32, in init
super().init(
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/xlstm_block.py", line 62, in init
self.xlstm = sLSTMLayer(config=self.config.slstm)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/layer.py", line 78, in init
self.slstm_cell = sLSTMCell(self.config)
^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 780, in new
return sLSTMCell_cuda(config, skip_backend_init=skip_backend_init)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 690, in init
self.func = sLSTMCellFuncGenerator(self.training, config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 536, in sLSTMCellFuncGenerator
slstm_cuda = sLSTMCellCUDA.instance(config=config)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/cell.py", line 515, in instance
cls.mod[repr(config)] = load(
^^^^^
File "/home/ubuntu/wcl/xlstm/xlstm/blocks/slstm/src/cuda_init.py", line 84, in load
mod = _load(name + suffix, sources, **myargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 1306, in load
return _jit_compile(
^^^^^^^^^^^^^
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 1710, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 1823, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/ubuntu/anaconda3/envs/xlstm/lib/python3.11/site-packages/torch/utils/cpp_extension.py", line 2112, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'slstm_HS128BS8NH4NS4DBfDRbDWbDGbDSbDAfNG4SA1GRCV0GRC0d0FCV0FC0d0'`