This log file path is /data/paddleX/projects/P0003/T0006/err.log
注意:标志为WARNING/INFO类的仅为警告或提示类信息,非错误信息
/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/executor.py:1070: UserWarning: The following exception is not an EOF exception.
"The following exception is not an EOF exception.")
Process Process-15:
Traceback (most recent call last):
File "/root/miniconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/root/miniconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex_restful/restful/project/operate.py", line 88, in _call_paddlex_train
train(task_path, dataset_path, params['train'])
File "/root/miniconda3/lib/python3.7/site-packages/paddlex_restful/restful/project/train/detection.py", line 205, in train
resume_checkpoint=params.resume_checkpoint)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/models/yolo_v3.py", line 174, in train
early_stop_patience, resume_checkpoint, False)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/models/ppyolo.py", line 366, in train
early_stop_patience=early_stop_patience)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/models/base.py", line 500, in train_loop
fetch_list=list(self.train_outputs.values()))
File "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/executor.py", line 1071, in run
six.reraise(*sys.exc_info())
File "/root/miniconda3/lib/python3.7/site-packages/six.py", line 703, in reraise
raise value
File "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/executor.py", line 1066, in run
return_merged=return_merged)
File "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/executor.py", line 1167, in _run_impl
return_merged=return_merged)
File "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/executor.py", line 879, in _run_parallel
tensors = exe.run(fetch_var_names, return_merged)._move_to_list()
paddle.fluid.core_avx.EnforceNotMet:
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<char const*>(char const*&&, char const*, int)
1 paddle::platform::EnforceNotMet::EnforceNotMet(std::__exception_ptr::exception_ptr, char const*, int)
2 paddle::operators::BatchNormKernel<paddle::platform::CUDADeviceContext, float>::Compute(paddle::framework::ExecutionContext const&) const
3 std::_Function_handler<void (paddle::framework::ExecutionContext const&), paddle::framework::OpKernelRegistrarFunctor<paddle::platform::CUDAPlace, false, 0ul, paddle::operators::BatchNormKernel<paddle::platform::CUDADeviceContext, float>, paddle::operators::BatchNormKernel<paddle::platform::CUDADeviceContext, double>, paddle::operators::BatchNormKernel<paddle::platform::CUDADeviceContext, paddle::platform::float16> >::operator()(char const*, char const*, int) const::{lambda(paddle::framework::ExecutionContext const&)#1}>::_M_invoke(std::_Any_data const&, paddle::framework::ExecutionContext const&)
4 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, paddle::platform::Place const&, paddle::framework::RuntimeContext*) const
5 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, paddle::platform::Place const&) const
6 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, paddle::platform::Place const&)
7 paddle::framework::details::ComputationOpHandle::RunImpl()
8 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync(paddle::framework::details::OpHandleBase*)
9 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp(paddle::framework::details::OpHandleBase*, std::shared_ptr<paddle::framework::BlockingQueue<unsigned long> > const&, unsigned long*)
10 std::_Function_handler<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> (), std::__future_base::_Task_setter<std::unique_ptr<std::__future_base::_Result<void>, std::__future_base::_Result_base::_Deleter>, void> >::_M_invoke(std::_Any_data const&)
11 std::__future_base::_State_base::_M_do_set(std::function<std::unique_ptr<std::__future_base::_Result_base, std::__future_base::_Result_base::_Deleter> ()>&, bool&)
12 ThreadPool::ThreadPool(unsigned long)::{lambda()#1}::operator()() const
------------------------------------------
Python Call Stacks (More useful to users):
------------------------------------------
File "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/framework.py", line 2610, in append_op
attrs=kwargs.get("attrs", None))
File "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/layer_helper.py", line 43, in append_op
return self.main_program.current_block().append_op(*args, **kwargs)
File "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/layers/nn.py", line 4207, in batch_norm
type="batch_norm", inputs=inputs, outputs=outputs, attrs=attrs)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/nets/mobilenet_v1.py", line 96, in _conv_norm
moving_variance_name=bn_name + '_variance')
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/nets/mobilenet_v1.py", line 156, in __call__
input, 3, int(32 * scale), 2, 1, name=self.prefix_name + "conv1")
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/nets/detection/yolo_v3.py", line 507, in build_net
feats = self.backbone(image)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/models/ppyolo.py", line 175, in build_net
model_out = model.build_net(inputs)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/models/base.py", line 105, in build_program
self.train_inputs, self.train_outputs = self.build_net(mode='train')
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/models/ppyolo.py", line 346, in train
self.build_program()
File "/root/miniconda3/lib/python3.7/site-packages/paddlex/cv/models/yolo_v3.py", line 174, in train
early_stop_patience, resume_checkpoint, False)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex_restful/restful/project/train/detection.py", line 205, in train
resume_checkpoint=params.resume_checkpoint)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex_restful/restful/project/operate.py", line 88, in _call_paddlex_train
train(task_path, dataset_path, params['train'])
File "/root/miniconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/root/miniconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/root/miniconda3/lib/python3.7/multiprocessing/popen_fork.py", line 74, in _launch
code = process_obj._bootstrap()
File "/root/miniconda3/lib/python3.7/multiprocessing/popen_fork.py", line 20, in __init__
self._launch(process_obj)
File "/root/miniconda3/lib/python3.7/multiprocessing/context.py", line 277, in _Popen
return Popen(process_obj)
File "/root/miniconda3/lib/python3.7/multiprocessing/context.py", line 223, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "/root/miniconda3/lib/python3.7/multiprocessing/process.py", line 112, in start
self._popen = self._Popen(self)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex_restful/restful/project/operate.py", line 527, in train_model
p.start()
File "/root/miniconda3/lib/python3.7/site-packages/paddlex_restful/restful/project/task.py", line 402, in start_train_task
p = train_model(path)
File "/root/miniconda3/lib/python3.7/site-packages/paddlex_restful/restful/app.py", line 468, in task_train
ret = start_train_task(data, SD.workspace, SD.monitored_processes)
File "/root/miniconda3/lib/python3.7/site-packages/flask/app.py", line 1936, in dispatch_request
return self.view_functions[rule.endpoint](**req.view_args)
File "/root/miniconda3/lib/python3.7/site-packages/flask/app.py", line 1950, in full_dispatch_request
rv = self.dispatch_request()
File "/root/miniconda3/lib/python3.7/site-packages/flask/app.py", line 2447, in wsgi_app
response = self.full_dispatch_request()
File "/root/miniconda3/lib/python3.7/site-packages/flask/app.py", line 2464, in __call__
return self.wsgi_app(environ, start_response)
File "/root/miniconda3/lib/python3.7/site-packages/werkzeug/serving.py", line 292, in execute
application_iter = app(environ, start_response)
File "/root/miniconda3/lib/python3.7/site-packages/werkzeug/serving.py", line 304, in run_wsgi
execute(self.server.app)
File "/root/miniconda3/lib/python3.7/site-packages/werkzeug/serving.py", line 362, in handle_one_request
return self.run_wsgi()
File "/root/miniconda3/lib/python3.7/http/server.py", line 426, in handle
self.handle_one_request()
File "/root/miniconda3/lib/python3.7/site-packages/werkzeug/serving.py", line 327, in handle
rv = BaseHTTPRequestHandler.handle(self)
File "/root/miniconda3/lib/python3.7/socketserver.py", line 720, in __init__
self.handle()
File "/root/miniconda3/lib/python3.7/socketserver.py", line 360, in finish_request
self.RequestHandlerClass(request, client_address, self)
File "/root/miniconda3/lib/python3.7/socketserver.py", line 650, in process_request_thread
self.finish_request(request, client_address)
File "/root/miniconda3/lib/python3.7/threading.py", line 870, in run
self._target(*self._args, **self._kwargs)
File "/root/miniconda3/lib/python3.7/threading.py", line 926, in _bootstrap_inner
self.run()
File "/root/miniconda3/lib/python3.7/threading.py", line 890, in _bootstrap
self._bootstrap_inner()
----------------------
Error Message Summary:
----------------------
ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/paddle/paddle/fluid/operators/batch_norm_op.cu:319)
[operator < batch_norm > error]
问题类型:模型训练
环境:
paddle:
====================
目标检测示例数据,昆虫项目