Open xiaolongzhuanshi opened 5 months ago
收到您的问题,我联系下推理的同学帮忙看下。
好的
predictor执行器在被实例化后,会一直占用一部分内存,只有当它被析构的时候,才会释放这部分资源
predictor执行器在被实例化后,会一直占用一部分内存,只有当它被析构的时候,才会释放这部分资源
不好意思,请问如何才能析构它,我使用del self.predictor显式删除好像没有效果
可以将 del config,del predictor都执行一下试试。建议把推理的执行过程封装成一个函数,出了函数作用域就自动析构了,不需要手动管理 可以参照 demo https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/python/gpu/resnet50
可以将 del config,del predictor都执行一下试试。建议把推理的执行过程封装成一个函数,出了函数作用域就自动析构了,不需要手动管理 可以参照 demo https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/python/gpu/resnet50
感谢回答,我再次实验了下发现依然没有解决,del config,del predictor都执行过,且把推理的执行过程封装成一个函数,并且在函数内也消除过,占用的那一部分内存还是没有的到释放。下面是我的代码结构希望能解惑
def init(self, args):
self.args = args
self.det_algorithm = args.det_algorithm
pre_process_list = [{
'DetResizeForTest': {
'limit_side_len': args.det_limit_side_len,
'limit_type': args.det_limit_type
}
}, {
'NormalizeImage': {
'std': [0.229, 0.224, 0.225],
'mean': [0.485, 0.456, 0.406],
'scale': '1./255.',
'order': 'hwc'
}
}, {
'ToCHWImage': None
}, {
'KeepKeys': {
'keep_keys': ['image', 'shape']
}
}]
postprocess_params = {}
if self.det_algorithm == "DB":
postprocess_params['name'] = 'DBPostProcess'
postprocess_params["thresh"] = args.det_db_thresh
postprocess_params["box_thresh"] = args.det_db_box_thresh
postprocess_params["max_candidates"] = 1000
postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
postprocess_params["use_dilation"] = args.use_dilation
if hasattr(args, "det_db_score_mode"):
postprocess_params["score_mode"] = args.det_db_score_mode
elif self.det_algorithm == "EAST":
postprocess_params['name'] = 'EASTPostProcess'
postprocess_params["score_thresh"] = args.det_east_score_thresh
postprocess_params["cover_thresh"] = args.det_east_cover_thresh
postprocess_params["nms_thresh"] = args.det_east_nms_thresh
elif self.det_algorithm == "SAST":
pre_process_list[0] = {
'DetResizeForTest': {
'resize_long': args.det_limit_side_len
}
}
postprocess_params['name'] = 'SASTPostProcess'
postprocess_params["score_thresh"] = args.det_sast_score_thresh
postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
self.det_sast_polygon = args.det_sast_polygon
if self.det_sast_polygon:
postprocess_params["sample_pts_num"] = 6
postprocess_params["expand_scale"] = 1.2
postprocess_params["shrink_ratio_of_width"] = 0.2
else:
postprocess_params["sample_pts_num"] = 2
postprocess_params["expand_scale"] = 1.0
postprocess_params["shrink_ratio_of_width"] = 0.3
else:
logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
sys.exit(0)
self.preprocess_op = create_operators(pre_process_list)
self.postprocess_op = build_post_process(postprocess_params)
self.predictor, self.input_tensor, self.output_tensors = utility.create_predictor(
args, 'det', logger) # paddle.jit.load(args.det_model_dir)
# self.predictor.eval()
def call(self, img):
ori_im = img.copy()
data = {'image': img}
data = transform(data, self.preprocess_op)
img, shape_list = data
if img is None:
return None, 0
img = np.expand_dims(img, axis=0)
shape_list = np.expand_dims(shape_list, axis=0)
img = img.copy()
starttime = time.time()
self.input_tensor.copy_from_cpu(img)
import gc
import paddle
import psutil
# 检查初始内存使用情况
process = psutil.Process()
print(f"Initial memory usage: {process.memory_info().rss / 1024 ** 2:.2f} MB")
# 运行预测
run(self.predictor)
print(f"After run: {process.memory_info().rss / 1024 ** 2:.2f} MB")
# 清理中间张量
self.predictor.clear_intermediate_tensor()
print(f"After clear_intermediate_tensor: {process.memory_info().rss / 1024 ** 2:.2f} MB")
# 尝试收缩内存
self.predictor.try_shrink_memory()
print(f"After try_shrink_memory: {process.memory_info().rss / 1024 ** 2:.2f} MB")
# 显式删除 predictor
del self.predictor
print(f"After del self.predictor: {process.memory_info().rss / 1024 ** 2:.2f} MB")
# 手动调用垃圾回收
gc.collect()
print(f"After gc.collect: {process.memory_info().rss / 1024 ** 2:.2f} MB")
# 清理 GPU 内存
paddle.device.cuda.empty_cache()
print(f"After cuda.empty_cache: {process.memory_info().rss / 1024 ** 2:.2f} MB")
return [], []
def run(predictor):
predictor.run()
del predictor
return []
def create_predictor(args, mode, logger):
if mode == "det":
model_dir = args.det_model_dir
elif mode == 'cls':
model_dir = args.cls_model_dir
elif mode == 'rec':
model_dir = args.rec_model_dir
else:
model_dir = args.e2e_model_dir
if model_dir is None:
logger.info("not find {} model file path {}".format(mode, model_dir))
sys.exit(0)
model_file_path = model_dir + "/inference.pdmodel"
params_file_path = model_dir + "/inference.pdiparams"
if not os.path.exists(model_file_path):
logger.info("not find model file path {}".format(model_file_path))
sys.exit(0)
if not os.path.exists(params_file_path):
logger.info("not find params file path {}".format(params_file_path))
sys.exit(0)
config = inference.Config(model_file_path, params_file_path)
if args.use_gpu:
config.enable_use_gpu(args.gpu_mem, 0)
if args.use_tensorrt:
config.enable_tensorrt_engine(
precision_mode=inference.PrecisionType.Half
if args.use_fp16 else inference.PrecisionType.Float32,
max_batch_size=args.max_batch_size)
else:
config.disable_gpu()
config.set_cpu_math_library_num_threads(6)
if args.enable_mkldnn:
# cache 10 different shapes for mkldnn to avoid memory leak
config.set_mkldnn_cache_capacity(10)
config.enable_mkldnn()
# TODO LDOUBLEV: fix mkldnn bug when bach_size > 1
#config.set_mkldnn_op({'conv2d', 'depthwise_conv2d', 'pool2d', 'batch_norm'})
args.rec_batch_num = 1
# enable memory optim
config.enable_memory_optim()
config.disable_glog_info()
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
config.switch_use_feed_fetch_ops(False)
config.set_cpu_math_library_num_threads(4)
# 调用 summary() 方法
summary_info = config.summary()
# 打印配置信息
print(summary_info)
# create predictor
predictor = inference.create_predictor(config)
input_names = predictor.get_input_names()
for name in input_names:
input_tensor = predictor.get_input_handle(name)
output_names = predictor.get_output_names()
output_tensors = []
for output_name in output_names:
output_tensor = predictor.get_output_handle(output_name)
output_tensors.append(output_tensor)
return predictor, input_tensor, output_tensors
代码执行后显示信息 +--------------------------+------------------------------------------------------------------+ | Option | Value | +--------------------------+------------------------------------------------------------------+ | model_file | C:\Users\Administrator/.paddleocr/2.1\rec\ch/inference.pdmodel | | params_file | C:\Users\Administrator/.paddleocr/2.1\rec\ch/inference.pdiparams | +--------------------------+------------------------------------------------------------------+ | cpu_math_thread | 4 | | enable_mkldnn | false | | mkldnn_cache_capacity | 10 | +--------------------------+------------------------------------------------------------------+ | use_gpu | false | +--------------------------+------------------------------------------------------------------+ | use_xpu | false | +--------------------------+------------------------------------------------------------------+ | ir_optim | true | | ir_debug | false | | memory_optim | true | | enable_profile | false | | enable_log | false | | collect_shape_range_info | false | +--------------------------+------------------------------------------------------------------+
Initial memory usage: 288.16 MB After run: 382.55 MB After clear_intermediate_tensor: 382.55 MB After try_shrink_memory: 382.55 MB After del self.predictor: 372.36 MB After gc.collect: 372.36 MB After cuda.empty_cache: 372.36 MB
可以将 del config,del predictor都执行一下试试。建议把推理的执行过程封装成一个函数,出了函数作用域就自动析构了,不需要手动管理 可以参照 demo https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/python/gpu/resnet50
关于这个demo我在本地运行后依旧有上面的问题 代码resnet50/infer_resnet.py的改修
if name == 'main':
args = parse_args()
pred = init_predictor(args)
img = cv2.imread('E:/PHPStudy/phpstudy_pro/WWW/php_e-invoice/pdf/11223344.png')
img = preprocess(img)
#img = np.ones((1, 3, 224, 224)).astype(np.float32)
import psutil
import gc
process = psutil.Process()
print(f"Initial memory usage: {process.memory_info().rss / 1024 ** 2:.2f} MB")
result = run(pred, [img])
print(f"After del self.predictor: {process.memory_info().rss / 1024 ** 2:.2f} MB")
# 手动调用垃圾回收
gc.collect()
print(f"After gc.collect: {process.memory_info().rss / 1024 ** 2:.2f} MB")
print("class index: ", np.argmax(result[0][0]))
执行后所显示的信息
I0527 11:09:54.261554 3660 naive_executor.cc:102] --- skip [save_infer_model/scale_0.tmp_1], fetch -> fetch Initial memory usage: 256.06 MB I0527 11:09:54.308559 3660 device_context.cc:737] oneDNN v2.5.4 After del self.predictor: 375.02 MB After gc.collect: 375.02 MB class index: 916
可以看出predictor.run()执行后增加的内存依旧没有释放目前只有停止进程才释放
可以将 del config,del predictor都执行一下试试。建议把推理的执行过程封装成一个函数,出了函数作用域就自动析构了,不需要手动管理 可以参照 demo https://github.com/PaddlePaddle/Paddle-Inference-Demo/tree/master/python/gpu/resnet50
关于这个demo我在本地运行后依旧有上面的问题 代码resnet50/infer_resnet.py的改修
if name == 'main':
args = parse_args() pred = init_predictor(args) img = cv2.imread('E:/PHPStudy/phpstudy_pro/WWW/php_e-invoice/pdf/11223344.png') img = preprocess(img) #img = np.ones((1, 3, 224, 224)).astype(np.float32) import psutil import gc process = psutil.Process() print(f"Initial memory usage: {process.memory_info().rss / 1024 ** 2:.2f} MB") result = run(pred, [img]) print(f"After del self.predictor: {process.memory_info().rss / 1024 ** 2:.2f} MB") # 手动调用垃圾回收 gc.collect() print(f"After gc.collect: {process.memory_info().rss / 1024 ** 2:.2f} MB") print("class index: ", np.argmax(result[0][0]))
执行后所显示的信息
I0527 11:09:54.261554 3660 naive_executor.cc:102] --- skip [save_infer_model/scale_0.tmp_1], fetch -> fetch Initial memory usage: 256.06 MB I0527 11:09:54.308559 3660 device_context.cc:737] oneDNN v2.5.4 After del self.predictor: 375.02 MB After gc.collect: 375.02 MB class index: 916
可以看出predictor.run()执行后增加的内存依旧没有释放目前只有停止进程才释放
是的呢,因为predictor执行器的生命周期还在,想全部释放资源需要析构predictor
del predictor没有作用,退出实例作用域后不会释放,结束线程也不会释放,只有在结束进程时才会释放。
碰到了同样的问题,模型预测作为线上服务,运行一段时间就内存占用从6G逐渐涨到40G。
请提出你的问题 Please ask your question
我是这样调用的 `
from paddle import inference config = inference.Config(model_file_path, params_file_path) predictor = inference.create_predictor(config)
运行预测
After run: 527.06 MB After clear_intermediate_tensor: 527.06 MB After try_shrink_memory: 527.06 MB After gc.collect: 527.06 MB 127.0.0.1 - - [23/May/2024 15:36:15] "POST /upload HTTP/1.1" 200 -` 也就是说执行预测模型后所增加的300M内存怎么也释放不出来,不知道怎么解决
环境配置: OS:win10 python:3.8 paddlepaddle:paddlepaddle-2.3.2-cp38-cp38-win_amd64.whl