突然要分配20G显存，因此发生Out of memory

xiemeilong commented 1 year ago

系统环境/System Environment：ubuntu 22.04
版本号/Version：Paddle：2.5.2 PaddleOCR：2.7.0.3 问题相关组件/Related components：

运行指令/Command Code：

ocr = PaddleOCR(use_angle_cls=True, show_log=False,
            det_model_dir="weights/ch_PP-OCRv4_det_server_infer",
            rec_model_dir="weights/ch_PP-OCRv4_rec_server_infer",
            cls_model_dir="weights/ch_ppocr_mobile_v2.0_cls_slim_infer",
            lang="ch") 
ocr_result = ocr.ocr(img, cls=True)

完整报错/Complete Error Message：
```
Exception has occurred: MemoryError
```

C++ Traceback (most recent call last):

0 paddle::AnalysisPredictor::ZeroCopyRun() 1 paddle::framework::NaiveExecutor::Run() 2 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, phi::Place const&) 3 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, phi::Place const&) const 4 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, phi::Place const&, paddle::framework::RuntimeContext) const 5 void phi::KernelImpl<void ()(phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >), &(void phi::fusion::ConvFusionKernel<float, phi::GPUContext>(phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >))>::KernelCallHelper<paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >, phi::TypeTag >::Compute<1, 3, 0, 0, phi::GPUContext const, phi::DenseTensor const, phi::DenseTensor const, phi::DenseTensor const>(phi::KernelContext, phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&) 6 void phi::fusion::ConvFusionKernel<float, phi::GPUContext>(phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >) 7 phi::DnnWorkspaceHandle::RunFunc(std::function<void (void)> const&, unsigned long) 8 phi::DnnWorkspaceHandle::ReallocWorkspace(unsigned long) 9 paddle::memory::allocation::Allocator::Allocate(unsigned long) 10 paddle::memory::allocation::StatAllocator::AllocateImpl(unsigned long) 11 paddle::memory::allocation::Allocator::Allocate(unsigned long) 12 paddle::memory::allocation::Allocator::Allocate(unsigned long) 13 paddle::memory::allocation::Allocator::Allocate(unsigned long) 14 paddle::memory::allocation::CUDAAllocator::AllocateImpl(unsigned long) 15 std::string phi::enforce::GetCompleteTraceBackString(std::string&&, char const*, int) 16 phi::enforce::GetCurrentTraceBackStringabi:cxx11

Error Message Summary:

ResourceExhaustedError:

Out of memory error on GPU 0. Cannot allocate 20.409576GB memory on GPU 0, 4.865601GB memory has been allocated and available memory is only 18.784119GB.

Please check whether there is any other process using GPU 0.

If yes, please stop them, or start PaddlePaddle on another GPU.
If no, please decrease the batch size of your model. (at ../paddle/fluid/memory/allocation/cuda_allocator.cc:86) File "/home/xiemeilong/labs/nameplate/main.py", line 83, in ocr_result = ocr.ocr(img, cls=True) MemoryError:

C++ Traceback (most recent call last):

0 paddle::AnalysisPredictor::ZeroCopyRun() 1 paddle::framework::NaiveExecutor::Run() 2 paddle::framework::OperatorBase::Run(paddle::framework::Scope const&, phi::Place const&) 3 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, phi::Place const&) const 4 paddle::framework::OperatorWithKernel::RunImpl(paddle::framework::Scope const&, phi::Place const&, paddle::framework::RuntimeContext) const 5 void phi::KernelImpl<void ()(phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >), &(void phi::fusion::ConvFusionKernel<float, phi::GPUContext>(phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >))>::KernelCallHelper<paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >, phi::TypeTag >::Compute<1, 3, 0, 0, phi::GPUContext const, phi::DenseTensor const, phi::DenseTensor const, phi::DenseTensor const>(phi::KernelContext, phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&) 6 void phi::fusion::ConvFusionKernel<float, phi::GPUContext>(phi::GPUContext const&, phi::DenseTensor const&, phi::DenseTensor const&, phi::DenseTensor const&, paddle::optional const&, std::vector<int, std::allocator > const&, std::vector<int, std::allocator > const&, std::string const&, std::vector<int, std::allocator > const&, int, std::string const&, std::string const&, bool, std::vector<int, std::allocator > const&, int, phi::DenseTensor, std::vector<phi::DenseTensor, std::allocator<phi::DenseTensor> >) 7 phi::DnnWorkspaceHandle::RunFunc(std::function<void (void)> const&, unsigned long) 8 phi::DnnWorkspaceHandle::ReallocWorkspace(unsigned long) 9 paddle::memory::allocation::Allocator::Allocate(unsigned long) 10 paddle::memory::allocation::StatAllocator::AllocateImpl(unsigned long) 11 paddle::memory::allocation::Allocator::Allocate(unsigned long) 12 paddle::memory::allocation::Allocator::Allocate(unsigned long) 13 paddle::memory::allocation::Allocator::Allocate(unsigned long) 14 paddle::memory::allocation::CUDAAllocator::AllocateImpl(unsigned long) 15 std::string phi::enforce::GetCompleteTraceBackString(std::string&&, char const*, int) 16 phi::enforce::GetCurrentTraceBackStringabi:cxx11

Error Message Summary:

ResourceExhaustedError:

Out of memory error on GPU 0. Cannot allocate 20.409576GB memory on GPU 0, 4.865601GB memory has been allocated and available memory is only 18.784119GB.

Please check whether there is any other process using GPU 0.

If yes, please stop them, or start PaddlePaddle on another GPU.
If no, please decrease the batch size of your model. (at ../paddle/fluid/memory/allocation/cuda_allocator.cc:86)

sssjc666 commented 12 months ago

你好楼主，此问题解决了吗？能够说一下如何解决的吗？

xiemeilong commented 12 months ago

没解决，直接try catch 掉了

Hiiamein commented 11 months ago

I've also encountered the same problem.

PaddlePaddle / PaddleOCR