Problem:
I recive the below mistake when running sdne_wiki.py:
Epoch 1/40
2023-06-06 09:11:55.533650: E tensorflow/stream_executor/cuda/cuda_blas.cc:636] failed to run cuBLAS routine cublasSgemm_v2: CUBLAS_STATUS_EXECUTION_FAILED
Traceback (most recent call last):
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_call
return fn(*args)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1329, in _run_fn
status, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in exit
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 146, in
sdne_model.train(batch_size=1024, epochs=40, verbose=2)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 111, in train
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1678, in fit
validation_steps=validation_steps)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1223, in _fit_loop
outs = f(ins_batch)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/backend.py", line 2553, in call
fetches=fetches, feed_dict=feed_dict, **self.session_kwargs)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1128, in _run
feed_dict_tensor, options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1344, in _do_run
options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1363, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'loss/1st_loss/MatMul', defined at:
File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 145, in
sdne_model = SDNE(G, hidden_size=[256, 50], )
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 92, in init
self.reset_model()
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 100, in reset_model
self.model.compile(opt, [l_2nd(self.beta), l_1st(self.alpha)])
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 849, in compile
output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 454, in weighted
score_array = fn(y_true, y_pred)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 50, in loss_1st
return alpha 2 tf.linalg.trace(tf.matmul(tf.matmul(Y, L, transpose_a=True), Y)) / batch_size
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 2022, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2516, in _mat_mul
name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1625, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
running: python 3.6 tf-gpu 1.5
Problem: I recive the below mistake when running sdne_wiki.py:
Epoch 1/40 2023-06-06 09:11:55.533650: E tensorflow/stream_executor/cuda/cuda_blas.cc:636] failed to run cuBLAS routine cublasSgemm_v2: CUBLAS_STATUS_EXECUTION_FAILED Traceback (most recent call last): File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_call return fn(*args) File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1329, in _run_fn status, run_metadata) File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in exit c_api.TF_GetCode(self.status.status)) tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5 [[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]] [[Node: loss/1st_loss/Mean_2/_107 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last): File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 146, in
sdne_model.train(batch_size=1024, epochs=40, verbose=2)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 111, in train
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1678, in fit
validation_steps=validation_steps)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 1223, in _fit_loop
outs = f(ins_batch)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/backend.py", line 2553, in call
fetches=fetches, feed_dict=feed_dict, **self.session_kwargs)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 895, in run
run_metadata_ptr)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1128, in _run
feed_dict_tensor, options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1344, in _do_run
options, run_metadata)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1363, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InternalError: Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5
[[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]]
[[Node: loss/1st_loss/Mean_2/_107 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Caused by op 'loss/1st_loss/MatMul', defined at: File "/sda1/wys_file/siki/ChatterNet/Baselines/DeepCas/dataProcess/test_generate_nodevector.py", line 145, in
sdne_model = SDNE(G, hidden_size=[256, 50], )
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 92, in init
self.reset_model()
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 100, in reset_model
self.model.compile(opt, [l_2nd(self.beta), l_1st(self.alpha)])
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 849, in compile
output_loss = weighted_loss(y_true, y_pred, sample_weight, mask)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/keras/_impl/keras/engine/training.py", line 454, in weighted
score_array = fn(y_true, y_pred)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/ge-0.0.0-py3.6.egg/ge/models/sdne.py", line 50, in loss_1st
return alpha 2 tf.linalg.trace(tf.matmul(tf.matmul(Y, L, transpose_a=True), Y)) / batch_size
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 2022, in matmul
a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2516, in _mat_mul
name=name)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
op_def=op_def)
File "/sda1/wys_file/sda1/wys_file/anaconda3/envs/sikitf15/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1625, in init
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(5, 50), b.shape=(5, 5), m=50, n=5, k=5 [[Node: loss/1st_loss/MatMul = MatMul[T=DT_FLOAT, transpose_a=true, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:GPU:0"](1st/Relu, _arg_1st_target_0_1/_65)]] [[Node: loss/1st_loss/Mean_2/_107 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_744_loss/1st_loss/Mean_2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Does anython get the same problem with me ? how can i fix it? HELP