Closed zixiliuUSC closed 1 year ago
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu:329:345: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh: In instantiation of ‘void multi_tensor_apply(int, int, const at::Tensor&, const std::vector<std::vector<at::Tensor> >&, T, ArgTypes ...) [with int depth = 4; T = LAMBStage1Functor<c10::Half>; ArgTypes = {float, float, float, float, float, float, adamMode_t, float, float*, float}]’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu:329:664: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh: In instantiation of ‘void multi_tensor_apply(int, int, const at::Tensor&, const std::vector<std::vector<at::Tensor> >&, T, ArgTypes ...) [with int depth = 2; T = LAMBStage2Functor<float>; ArgTypes = {float*, float*, float, float, bool}]’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu:345:334: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh: In instantiation of ‘void multi_tensor_apply(int, int, const at::Tensor&, const std::vector<std::vector<at::Tensor> >&, T, ArgTypes ...) [with int depth = 2; T = LAMBStage2Functor<c10::Half>; ArgTypes = {float*, float*, float, float, bool}]’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_lamb.cu:345:642: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
[6/7] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=fused_optim -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -I/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/kernels/include -I/usr/local/cuda/include -isystem /home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include -isystem /home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -isystem /home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/TH -isystem /home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /home/liuzixi01/.conda/envs/torch-cuda116/include/python3.9 -D_GLIBCXX_USE_CXX11_ABI=0 -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86 --compiler-options '-fPIC' -O3 --use_fast_math -lineinfo -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -std=c++14 -c /home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu -o multi_tensor_l2norm_kernel.cuda.o
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu: In function ‘std::tuple<at::Tensor, at::Tensor> multi_tensor_l2norm_cuda(int, at::Tensor, std::vector<std::vector<at::Tensor> >, c10::optional<bool>)’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:288:217: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:288:265: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:288:504: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:288:552: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:305:115: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup<<<per_tensor ? ntensors : 1, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:305:163: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup<<<per_tensor ? ntensors : 1, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:305:196: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup<<<per_tensor ? ntensors : 1, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:305:241: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup<<<per_tensor ? ntensors : 1, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu: In function ‘void multi_tensor_norm_out_cuda(int, at::Tensor, std::vector<std::vector<at::Tensor> >, at::Tensor, float, float, int)’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:348:218: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:348:253: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:348:475: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:348:510: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:355:217: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:355:252: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:355:473: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:355:508: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
DISPATCH_FLOAT_AND_HALF(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:376:101: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup_v2<<<ntensors, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:376:136: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup_v2<<<ntensors, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:376:157: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup_v2<<<ntensors, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:376:178: warning: ‘T* at::Tensor::data() const [with T = float]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
cleanup_v2<<<ntensors, 512, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh: In instantiation of ‘void multi_tensor_apply(int, int, const at::Tensor&, const std::vector<std::vector<at::Tensor> >&, T, ArgTypes ...) [with int depth = 1; T = L2NormFunctor<float>; ArgTypes = {float*, float*, bool, int}]’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:288:313: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh: In instantiation of ‘void multi_tensor_apply(int, int, const at::Tensor&, const std::vector<std::vector<at::Tensor> >&, T, ArgTypes ...) [with int depth = 1; T = L2NormFunctor<c10::Half>; ArgTypes = {float*, float*, bool, int}]’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:288:600: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh: In instantiation of ‘void multi_tensor_apply(int, int, const at::Tensor&, const std::vector<std::vector<at::Tensor> >&, T, ArgTypes ...) [with int depth = 1; T = MaxNormFunctor<float>; ArgTypes = {float*, float*, bool, int}]’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:348:283: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh: In instantiation of ‘void multi_tensor_apply(int, int, const at::Tensor&, const std::vector<std::vector<at::Tensor> >&, T, ArgTypes ...) [with int depth = 1; T = MaxNormFunctor<c10::Half>; ArgTypes = {float*, float*, bool, int}]’:
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_l2norm_kernel.cu:348:540: required from here
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/colossalai/kernel/cuda_native/csrc/multi_tensor_apply.cuh:104:150: warning: ‘T* at::Tensor::data() const [with T = int]’ is deprecated: Tensor.data<T>() is deprecated. Please use Tensor.data_ptr<T>() instead. [-Wdeprecated-declarations]
multi_tensor_apply_kernel<<<loc_block_info, block_size, 0, stream>>>(
^
/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/include/ATen/core/TensorBody.h:235:1: note: declared here
T * data() const {
^ ~~
[7/7] c++ colossal_C_frontend.o multi_tensor_sgd_kernel.cuda.o multi_tensor_scale_kernel.cuda.o multi_tensor_adam.cuda.o multi_tensor_l2norm_kernel.cuda.o multi_tensor_lamb.cuda.o -shared -L/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/usr/local/cuda/lib64 -lcudart -o fused_optim.so
Loading extension module fused_optim...
Time to load fused_optim op: 49.78874206542969 seconds
Loading extension module fused_optim...
searching chunk configuration is completed in 3.64 s.
used number: 9421.95 MB, wasted number: 56.38 MB
total wasted percentage is 0.59%
WARNING:torch.distributed.elastic.multiprocessing.api:Sending process 317 closing signal SIGTERM
ERROR:torch.distributed.elastic.multiprocessing.api:failed (exitcode: -9) local_rank: 1 (pid: 318) of binary: /home/liuzixi01/.conda/envs/torch-cuda116/bin/python3.9
Traceback (most recent call last):
File "/home/liuzixi01/.conda/envs/torch-cuda116/bin/torchrun", line 8, in <module>
sys.exit(main())
File "/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 345, in wrapper
return f(*args, **kwargs)
File "/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/distributed/run.py", line 761, in main
run(args)
File "/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/distributed/run.py", line 752, in run
elastic_launch(
File "/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/distributed/launcher/api.py", line 131, in __call__
return launch_agent(self._config, self._entrypoint, list(args))
File "/home/liuzixi01/.conda/envs/torch-cuda116/lib/python3.9/site-packages/torch/distributed/launcher/api.py", line 245, in launch_agent
raise ChildFailedError(
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
====================================================
./train_glm_demo.py FAILED
----------------------------------------------------
Failures:
<NO_OTHER_FAILURES>
----------------------------------------------------
Root Cause (first observed failure):
[0]:
time : 2023-02-27_17:40:06
host : gzailab-liuzixi01-colossalai-0
rank : 1 (local_rank: 1)
exitcode : -9 (pid: 318)
error_file: <N/A>
traceback : Signal 9 (SIGKILL) received by PID 318
====================================================
Hi, could you set num_workers=0
in your dataloader and then run it again?
If it fails again, then try with a single GPU to locate the error.
I resolve this problem by reinstall and i will close the issue, thx
数据集格式是什么?/ What is the dataset format?
我看别的框架是这样的 / I think other frameworks look like this:
{ "prompt": "长城h3风扇不转。继电器好的。保险丝好的传感器新的风扇也新的这是为什么。就是继电器缺一个信号线", "response": "用电脑能读数据流吗?水温多少", "history": [] } { "prompt": "95", "response": "上下水管温差怎么样啊?空气是不是都排干净了呢?", "history": [ ["长城h3风扇不转。继电器好的。保险丝好的传感器新的风扇也新的这是为什么。就是继电器缺一个信号线", "用电脑能读数据流吗?水温多少"] ] } { "prompt": "是的。上下水管都好的", "response": "那就要检查线路了,一般风扇继电器是由电脑控制吸合的,如果电路存在断路,或者电脑坏了的话会出现继电器不吸合的情况!", "history": [ ["长城h3风扇不转。继电器好的。保险丝好的传感器新的风扇也新的这是为什么。就是继电器缺一个信号线", "用电脑能读数据流吗?水温多少"], ["95", "上下水管温差怎么样啊?空气是不是都排干净了呢?"] ] }
Bot detected the issue body's language is not English, translate it automatically. 👯👭🏻🧑🤝🧑👫🧑🏿🤝🧑🏻👩🏾🤝👨🏿👬🏿
What is the dataset format? / What is the dataset format?
I think other frameworks look like this / I think other frameworks look like this:
{ "prompt": "Great Wall h3 fan does not turn. The relay is good. The fuse is good. The sensor is new and the fan is new. This is why. The relay is missing a signal line", "response": "Can I read the data stream with a computer? What is the water temperature", "history": [] } { "prompt": "95", "response": "How is the temperature difference between the upper and lower water pipes? Has the air been drained?", "history": [ ["Great Wall h3 fan does not turn. The relay is good. The fuse is good. The sensor is new and the fan is new. This is why. The relay lacks a signal line", "Can I read the data stream with a computer? What is the water temperature"] ] } { "prompt": "Yes. Both the upper and lower water pipes are fine", "response": "Then check the circuit. Generally, the fan relay is controlled by the computer. If the circuit is open or the computer is broken, the relay will not be closed!", "history": [ ["Great Wall h3 fan does not turn. The relay is good. The fuse is good. The sensor is new and the fan is new. This is why. The relay lacks a signal line", "Can I read the data stream with a computer? What is the water temperature"], ["95", "How is the temperature difference between the upper and lower water pipes? Has the air been drained?"] ] }
🐛 Describe the bug
使用huggingface的模型接口,加载glm,按照gpt2示例的方式运行训练代码,数据集合采用自建数据集,数据集的运行模式如下,仅仅修改了https://github.com/hpcaitech/ColossalAI/blob/dbc01b9c0479a6fd3fb04450b9dc01b5162d8c0d/examples/language/gpt/gemini/train_gpt_demo.py#L342这行代码,train_step 函数并无修改。 运行脚本如下:
报错如下:
评论区还有剩余报错
Environment
python 3.9.13, pytorch 1.13+cu11.6, CUDA 11.6, colossal-ai 0.2.5