learning-boy commented 4 years ago

import numpy as np import time import paddle.fluid as fluid from PIL import Image from PIL import ImageDraw import cv2

train_parameters = {

"label_dict": {0:"apple",1:"banana",2:"orange"},

"label_dict": {0:"NO_MATURE"},
"use_gpu": False,
"input_size": [3, 608, 608],    # 原版的边长大小为608，为了提高训练速度和预测速度，此处压缩为448

}

target_size = train_parameters['input_size']#目标大小 label_dict = train_parameters['label_dict']

anchors = train_parameters['anchors']

anchor_mask = train_parameters['anchor_mask']

print(label_dict[0])

class_dim = train_parameters['class_dim']

print("label_dict:{} class dim:{}".format(label_dict, class_dim))

place = fluid.CUDAPlace(0) if train_parameters['use_gpu'] else fluid.CPUPlace() exe = fluid.Executor(place)

target_size = train_parameters['input_size']#目标大小3,504,377 label_dict = train_parameters['label_dict']#将标签做成字典的形式

path="D:/moxing"#_mobilenet_v1 inference_program用于预测 [inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model\ (dirname=path, executor=exe,model_filename='model', params_filename='params')

print("feed_target_names:",feed_target_names) print("fetch_targets:",feed_target_names)

class inference(): def init(self): print("8888888888")

    def read_image(self, img_path):
        origin = Image.open(img_path)
        img = self.resize_img(origin, target_size)
        resized_img = img.copy()
        if img.mode != 'RGB':
            img = img.convert('RGB')
        img = np.array(img).astype('float32').transpose((2, 0, 1))  # HWC to CHW 让矩阵进行方向的转置
        print("img_new",img.shape)
        img = img / 255.0

        img[0, :, :] -= 0.485
        img[1, :, :] -= 0.456
        img[2, :, :] -= 0.406

        img[0, :, :] /= 0.229
        img[1, :, :] /= 0.224
        img[2, :, :] /= 0.225
        img = img[np.newaxis, :]
        return origin, img, resized_img

def draw_bbox_image(self,img, boxes, labels,scores, save_name):
    """
    给图片画上外接矩形框
    :param img:
    :param boxes:
    :param save_name:
    :param labels
    :return:
    """
    draw = ImageDraw.Draw(img)
    for box, label,score in zip(boxes, labels,scores):
        #print([box, label, score])
        if(score >0.1):
            xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
            draw.rectangle((xmin, ymin, xmax, ymax), None, 'red')
            draw.text((xmin, ymin), label_dict[label], (255, 0, 255))
    img.save(save_name)

def resize_img(self, img, target_size):#将图片resize到target_size
    """
    保持比例的缩放图片
    :param img:
    :param target_size:
    :return:
    """
    img = img.resize(target_size[1:], Image.BILINEAR)
    return img

def read_image(self,img_path):

    origin = Image.open(img_path)
    img = self.resize_img(origin, target_size)#[3, 504, 377]
    resized_img = img.copy()#将图片复制一下
    #将图片转换成rgb格式
    if img.mode != 'RGB':
        img = img.convert('RGB')
    #转换成数组的形式，转换成 -> float32,然后通过transpose,将H-W-C --->  C-H-W，即把图片的颜色通道数放在前面
    img = np.array(img).astype('float32').transpose((2, 0, 1))  # HWC to CHW 让矩阵进行方向的转置
    #做归一化处理
    img = img / 255.0
    print("img:",img.shape)
    #print( img[0, :, :], img[1, :, :], img[2, :, :])
    img[0, :, :] -= 0.485#img[0, :, :] = img[0, :, :] -0.485
    img[1, :, :] -= 0.456
    img[2, :, :] -= 0.406

    img[0, :, :] /=0.229
    img[1, :, :] /=0.224
    img[2, :, :] /=0.225
    print("img:", img.shape)
    img = img[np.newaxis, :]
    print("img:", img.shape)
    return origin, img, resized_img

def infer(self,image_path):
    """
    预测，将结果保存到一副新的图片中
    :param image_path:
    :return:
    """
    origin, tensor_img, resized_img = self.read_image(image_path)#对读取进来的照片进行处理

    input_w, input_h = origin.size[0], origin.size[1]
    image_shape = np.array([input_h, input_w], dtype='int32')
    t1 = time.time()
    #np.newaxis是增加一个维度

    batch_outputs = exe.run(inference_program,
                            feed={feed_target_names[0]: tensor_img,
                                  feed_target_names[1]: image_shape[np.newaxis, :]},
                            fetch_list=fetch_targets,
                            return_numpy=False)

    period = time.time() - t1
    print("predict cost time:{0}".format("%2.2f sec" % period))

    bboxes = np.array(batch_outputs[0])

    if bboxes.shape[1] != 6:
        print("No object found in {}".format(image_path))
        return
    labels = bboxes[:, 0].astype('int32')
    scores = bboxes[:, 1].astype('float32')
    boxes = bboxes[:, 2:].astype('float32')

    last_dot_index = image_path.rfind('.')
    out_path = image_path[:last_dot_index]
    out_path += '-result.jpg'
    self.draw_bbox_image(origin, boxes, labels,scores, out_path)

if name == 'main': image_path= "D:/71.jpg" a=inference() a.infer(image_path)

这是之前加载yolo模型预测的代码，请问在此基础上，怎么调整才能预测使用mask_rcnn训练出来的模型呢

learning-boy commented 4 years ago

[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model\ (dirname=path, executor=exe,model_filename='model', params_filename='params') 打印出feed_target_names，feed_target_names: ['image', 'im_info', 'im_shape'] 报错提示Exception: 'feed_targets' does not have im_shape variable 是不是需要在exe.run指定im_shape，请问怎么指定

jerrywgz commented 4 years ago

https://github.com/PaddlePaddle/PaddleDetection/blob/master/tools/export_model.py mask rcnn 的话需要重新生成模型，不能直接加载通过yolo保存的预测模型，相关文档可以参考 https://github.com/PaddlePaddle/PaddleDetection/blob/master/docs/advanced_tutorials/inference/EXPORT_MODEL.md

learning-boy commented 4 years ago

我原本就是通过tools/export_model.py将训练好的mask_RCNN模型导出的，但没特意指定输入图像的大小，mask_RCNN需要这样做嘛？您说“不能直接加载通过yolo保存的预测模型”是什么意思？本身就是mask_RCNN模型呀，在加载导出的模型时报错，在python端预测，是需要增加预处理嘛还有一个问题想问问您，使用YOLOV3对自定义数据集的mAP可以达到90%多，但是用mask_RCNN只有70%多，请问这是什么原因

jerrywgz commented 4 years ago

你这里的预测代码是自己写的吧，确实在exe run的时候需要增加im_shape这个输入，这个输入表示原始输入图片的大小，具体代码可以参考https://github.com/PaddlePaddle/PaddleDetection/blob/master/tools/cpp_infer.py#L232

关于精度低的问题，可以首先确认下mask_rcnn的类别数设置是否增加了背景类，另外就是yolo在训练过程中增加了很多预处理方式，在mask rcnn中只有随机反转和归一化，预处理会相对简单。还有一点可以增强的是anchor的设置方式，可能yolo目前默认的设置anchor和你这边自定义数据集的任务比较匹配。

learning-boy commented 4 years ago

谢谢您的回复！关于精度中双阶段模型设置背景类，我采用的coco数据集，在使用labelme标注完成后，使用coco自带的脚本转成coco数据集json格式时，其中有个label.txt的文档，其中前两行是ignore ，background，具体格式如下，其中NO_Mature是目标类别，我的数据集只有一个类别： ‘’‘ ignore background NO_Mature ’‘’ 在转成json时，需要带着这两个，否则会报错，然后我在paddle-detection中的mask_rcnn配置文件中，class_num设置的是3，或者是4，这两个训练都不会报错，但设置成2，训练是会报错，但我实际的数据只有一个类别，加上背景类，class_num=2才是对的，这是什么原因还有就是预处理方式，如果增加的化，是不是需要修改train.py 谢谢您在假期中回复，麻烦您了

jerrywgz commented 4 years ago

那你这边转换得到的ignore和background在json格式下有什么区别呢？另外如果设置成2报错是什么样的？

关于预处理方式，可以直接在配置文件中增加，个人认为mask-rcnn的效果不理想主要还是和你的类别数和数据有关

learning-boy commented 4 years ago

这是将num_class设置为2，训练所报的错误： 2020-05-06 22:18:04,945-INFO: 160 samples in file /home/aistudio/PaddleDetection-release-0.2/dataset/data_4/annotations/instances_train2017.json 2020-05-06 22:18:04,946-INFO: places would be ommited when DataLoader is not iterable I0506 22:18:05.023321 344 parallel_executor.cc:440] The Program will be executed on CUDA using ParallelExecutor, 1 cards are used, so 1 programs are executed in parallel. I0506 22:18:05.073007 344 build_strategy.cc:365] SeqOnlyAllReduceOps:0, num_trainers:1 I0506 22:18:05.162714 344 parallel_executor.cc:307] Inplace strategy is enabled, when build_strategy.enable_inplace = True I0506 22:18:05.193831 344 parallel_executor.cc:375] Garbage collection strategy is enabled, when FLAGS_eager_delete_tensor_gb = 0 W0506 22:18:05.410173 432 init.cc:209] Warning: PaddlePaddle catches a failure signal, it may not work properly W0506 22:18:05.410246 432 init.cc:211] You could check whether you killed PaddlePaddle thread/process accidentally or report the case to PaddlePaddle W0506 22:18:05.410254 432 init.cc:214] The detail failure signal is:

W0506 22:18:05.410271 432 init.cc:217] W0506 22:18:05.412127 432 init.cc:217] PC: @ W0506 22:18:05.412479 432 init.cc:217] W0506 22:18:05.413931 432 init.cc:217] W0506 22:18:05.415714 432 init.cc:217] W0506 22:18:05.417273 432 init.cc:217] W0506 22:18:05.420707 432 init.cc:217] W0506 22:18:05.422431 432 init.cc:217] W0506 22:18:05.423754 432 init.cc:217] W0506 22:18:05.426916 432 init.cc:217] W0506 22:18:05.428175 432 init.cc:217] W0506 22:18:05.430378 432 init.cc:217] W0506 22:18:05.433609 432 init.cc:217] W0506 22:18:05.435348 432 init.cc:217] W0506 22:18:05.437835 432 init.cc:217] W0506 22:18:05.440111 432 init.cc:217] W0506 22:18:05.442603 432 init.cc:217] W0506 22:18:05.443538 432 init.cc:217] W0506 22:18:05.446483 432 init.cc:217] W0506 22:18:05.449410 432 init.cc:217] W0506 22:18:05.450861 432 init.cc:217] W0506 22:18:05.451678 432 init.cc:217] W0506 22:18:05.454497 432 init.cc:217] W0506 22:18:05.455449 432 init.cc:217] W0506 22:18:05.456723 432 init.cc:217] W0506 22:18:05.457922 432 init.cc:217] W0506 22:18:05.459084 432 init.cc:217] @ Segmentation fault (core dumped) Aborted at 1588774685 (unix time) try "date -d @1588774685" if you are using GNU date 0x0 (unknown) SIGSEGV (@0x100000029) received by PID 344 (TID 0x7fd40f7fe700) from PID 41; stack trace: @ 0x7fd598cea390 (unknown) @ 0x7fd535320171 paddle::memory::detail::MemoryBlock::Merge() @ 0x7fd53531f920 paddle::memory::detail::BuddyAllocator::Free() @ 0x7fd53530ee85 paddle::memory::legacy::Free<>() @ 0x7fd53530fd25 paddle::memory::allocation::NaiveBestFitAllocator::FreeImpl() @ 0x7fd532716659 std::_Sp_counted_base<>::_M_release() @ 0x7fd534a1c2bd paddle::operators::GenerateMaskLabelsKernel<>::Compute() @ 0x7fd534a1d853 _ZNSt17_Function_handlerIFvRKN6paddle9framework16ExecutionContextEEZNKS1_24OpKernelRegistrarFunctorINS0_8platform8CPUPlaceELb0ELm0EINS0_9operators24GenerateMaskLabelsKernelIfEEEEclEPKcSE_iEUlS4_E_E9_M_invokeERKSt9_AnydataS4 @ 0x7fd53526d616 paddle::framework::OperatorWithKernel::RunImpl() @ 0x7fd53526dde1 paddle::framework::OperatorWithKernel::RunImpl() @ 0x7fd535266f30 paddle::framework::OperatorBase::Run() @ 0x7fd534febc26 paddle::framework::details::ComputationOpHandle::RunImpl() @ 0x7fd534fa2de1 paddle::framework::details::FastThreadedSSAGraphExecutor::RunOpSync() @ 0x7fd534fa1b4f paddle::framework::details::FastThreadedSSAGraphExecutor::RunOp() @ 0x7fd534fa1e14 _ZNSt17_Function_handlerIFvvESt17reference_wrapperISt12_Bind_simpleIFS1_ISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS6_12OpHandleBaseESt6atomicIiESt4hashISA_ESt8equal_toISA_ESaISt4pairIKSA_SC_EEESA_RKSt10shared_ptrINS5_13BlockingQueueImEEEEUlvE_vEEEvEEEE9_M_invokeERKSt9_Any_data @ 0x7fd532a88513 std::_Function_handler<>::_M_invoke() @ 0x7fd532818717 std::future_base::_State_base::_M_do_set() @ 0x7fd598ce7a99 pthread_once_slow @ 0x7fd534f9d302 _ZNSt13__future_base11_Task_stateISt5_BindIFZN6paddle9framework7details28FastThreadedSSAGraphExecutor10RunOpAsyncEPSt13unordered_mapIPNS4_12OpHandleBaseESt6atomicIiESt4hashIS8_ESt8equal_toIS8_ESaISt4pairIKS8_SA_EEES8_RKSt10shared_ptrINS3_13BlockingQueueImEEEEUlvE_vEESaIiEFvvEE6_M_runEv @ 0x7fd53281a944 _ZZN10ThreadPoolC1EmENKUlvE_clEv @ 0x7fd559fd1421 execute_native_thread_routine_compat @ 0x7fd598ce06ba start_thread @ 0x7fd598a1641d clone 0x0 (unknown)

以下是部分配置文件： architecture: MaskRCNN use_gpu: true max_iters: 20000 snapshot_iter: 1000 log_smooth_window: 20 save_dir: output pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_fpn_1x.tar finetune_exclude_pretrained_params: ['cls_score','bbox_pred','mask_fcn_logits'] metric: COCO weights: output/mask_rcnn_r50_fpn_1x/best_model num_classes: 2

在使用coco内置的脚本将每张照片的json转换成一个整体的json时，其中一个txt文件名称为：labels.txt,里面的内容为： ignore backgeound NO_Mature

以上的三个只有NO_Mature，我的数据集的目标类别只有这一个，也就是1

"root":{6 items "info":{6 items "description":NULL "url":NULL "version":NULL "year":int2020 "contributor":NULL "date_created":string"2020-05-02 23:01:36.555680" } "licenses":[1 item 0:{3 items "url":NULL "id":int0 "name":NULL } ] "images":[20 items 0:{7 items "license":int0 "url":NULL "file_name":string"(24).jpg" "height":int377 "width":int504 "date_captured":NULL "id":int0 } 1:{7 items "license":int0 "url":NULL "file_name":string"(7).jpg" "height":int377 "width":int504 "date_captured":NULL "id":int1 } 2:{7 items "license":int0 "url":NULL "file_name":string"(80).jpg" "height":int377 "width":int504 "date_captured":NULL "id":int2 } 3:{7 items "license":int0 "url":NULL "file_name":string"58.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int3 } 4:{7 items "license":int0 "url":NULL "file_name":string"59.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int4 } 5:{7 items "license":int0 "url":NULL "file_name":string"6.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int5 } 6:{7 items "license":int0 "url":NULL "file_name":string"60.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int6 } 7:{7 items "license":int0 "url":NULL "file_name":string"61.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int7 } 8:{7 items "license":int0 "url":NULL "file_name":string"64.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int8 } 9:{7 items "license":int0 "url":NULL "file_name":string"65.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int9 } 10:{7 items "license":int0 "url":NULL "file_name":string"66.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int10 } 11:{7 items "license":int0 "url":NULL "file_name":string"67.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int11 } 12:{7 items "license":int0 "url":NULL "file_name":string"68.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int12 } 13:{7 items "license":int0 "url":NULL "file_name":string"69.jpg" "height":int377 "width":int504 "date_captured":NULL "id":int13 }

以上是复制的部分json文件

jerrywgz commented 4 years ago

看你发的json文件中只包含了image的部分，可以发下annotation的部分吗，怀疑是检测库中错误的将ignore，backgeound，NO_Mature认为是3类，可以结合annotation看下

learning-boy commented 4 years ago

type":string"instances" "annotations":[36 items 0:{7 items "id":int0 "image_id":int0 "category_id":int1 "segmentation":[1 item 0:[34 items 0:float321.87649402390434 1:float174.1035856573705 2:float300.76095617529876 3:float182.47011952191232 4:float286.81673306772905 5:float199.203187250996 6:float285.2231075697211 7:float221.11553784860556 8:float292.79282868525894 9:float245.41832669322707 10:float313.1115537848605 11:float264.1434262948207 12:float329.0478087649402 13:float279.68127490039836 14:float362.91235059760953 15:float282.47011952191235 16:float390.80079681274896 17:float272.9083665338645 18:float404.3466135458167 19:float260.1593625498008 20:float408.3306772908366 21:float237.84860557768923 22:float411.91633466135454 23:float218.7250996015936 24:float411.51792828685257 25:float201.19521912350595 26:float401.95617529880474 27:float186.85258964143424 28:float387.215139442231 29:float176.09561752988046 30:float374.4661354581673 31:float168.5258964143426 32:float350.5617529880478 33:float168.1274900398406 ] ] "area":int11545 "bbox":[4 items 0:int285 1:int168 2:int127 3:int115 ] "iscrowd":int0 } 1:{7 items "id":int1 "image_id":int0 "category_id":int1 "segmentation":[1 item 0:[28 items 0:float260.5219123505976 1:float225.89641434262947 2:float243.78884462151393 3:float237.05179282868525 4:float238.211155378486 5:float249.80079681274898 6:float239.80478087649402 7:float270.91633466135454 8:float246.1792828685259 9:float282.0717131474103 10:float272.07569721115533 11:float293.6254980079681 12:float294.3864541832669 13:float293.6254980079681 14:float310.3227091633466 15:float286.05577689243023 16:float318.68924302788844 17:float275.2988047808765 18:float319.88446215139436 19:float266.53386454183266 20:float317.49402390438246 21:float249.003984063745 22:float311.91633466135454 23:float238.2470119521912 24:float303.1513944223107 25:float230.67729083665336 26:float287.215139442231 27:float221.91235059760953 ] ] "area":int4722 "bbox":[4 items 0:int238 1:int221 2:int82 3:int73 ] "iscrowd":int0 }

这是annotation部分，麻烦您看一下

learning-boy commented 4 years ago

jerrywgz commented 4 years ago

你的annotation中，category_id一共有几种呢，看你发的这个是1，可以检查下如果你的数据只有一类的话，是否category_id还有其他值

learning-boy commented 4 years ago

我检索了下json文件，category_id都是1，但是在最后在整个json文件的最后有一个background，但是没有ignore，最后修改了一下，标签数量没问题了

---原始邮件--- 发件人: "wangguanzhong"<notifications@github.com> 发送时间: 2020年5月9日(周六) 上午10:53 收件人: "PaddlePaddle/PaddleDetection"<PaddleDetection@noreply.github.com>; 抄送: "learning-boy"<1342346303@qq.com>;"Author"<author@noreply.github.com>; 主题: Re: [PaddlePaddle/PaddleDetection] 请问使用fluid.io.load_inference_model加载mask—RCNN训练好的模型，跟加载YOLO相比，还需要改变哪些地方，有代码示例嘛，谢谢 (#591)

你的annotation中，category_id一共有几种呢，看你发的这个是1，可以检查下如果你的数据只有一类的话，是否category_id还有其他值

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.

learning-boy commented 4 years ago

我修改数据集后，发现IoU =0.5和0.75的mAP很高，但是那十个IoU平均后，就很低了

learning-boy commented 4 years ago

您好，还有就是用paddledetion的模型使用coco数据集，最后做评估的时候，mAP取的是10个IoU.取平均后算出的mAP，能不能只取单个IoU算出的mAP

---原始邮件--- 发件人: "wangguanzhong"<notifications@github.com> 发送时间: 2020年5月9日(周六) 上午10:53 收件人: "PaddlePaddle/PaddleDetection"<PaddleDetection@noreply.github.com>; 抄送: "learning-boy"<1342346303@qq.com>;"Author"<author@noreply.github.com>; 主题: Re: [PaddlePaddle/PaddleDetection] 请问使用fluid.io.load_inference_model加载mask—RCNN训练好的模型，跟加载YOLO相比，还需要改变哪些地方，有代码示例嘛，谢谢 (#591)

你的annotation中，category_id一共有几种呢，看你发的这个是1，可以检查下如果你的数据只有一类的话，是否category_id还有其他值

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub, or unsubscribe.

Sencc commented 3 years ago

[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model (dirname=path, executor=exe,model_filename='model', params_filename='params') 打印出feed_target_names，feed_target_names: ['image', 'im_info', 'im_shape'] 报错提示Exception: 'feed_targets' does not have im_shape variable 是不是需要在exe.run指定im_shape，请问怎么指定

[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model (dirname=path, executor=exe,model_filename='model', params_filename='params') 打印出feed_target_names，feed_target_names: ['image', 'im_info', 'im_shape'] 报错提示Exception: 'feed_targets' does not have im_shape variable 是不是需要在exe.run指定im_shape，请问怎么指定

请问你是怎么解决这个问题的？

PaddlePaddle / PaddleDetection

请问使用fluid.io.load_inference_model加载mask—RCNN训练好的模型，跟加载YOLO相比，还需要改变哪些地方，有代码示例嘛，谢谢 #591

"label_dict": {0:"apple",1:"banana",2:"orange"},

anchors = train_parameters['anchors']

anchor_mask = train_parameters['anchor_mask']

class_dim = train_parameters['class_dim']

print("label_dict:{} class dim:{}".format(label_dict, class_dim))