Open Marouane1993 opened 6 years ago
Some of your bounding boxes could have been wrong.
Use this script to filter the wrong annotations.
import os from xml.dom import minidom os.environ['GLOG_minloglevel'] = '2' import os, sys, cv2 import argparse import shutil import sys ,os from os import listdir from os.path import isfile, join import multiprocessing from time import sleep
CLASSES = ('background','A','B','C') def read_xml(xml_path, new_path): flag = True doc = minidom.parse(xml_path) objects = doc.getElementsByTagName("object") fname = doc.getElementsByTagName("filename")[0].firstChild.data size = doc.getElementsByTagName("size")[0] width = size.getElementsByTagName("width")[0].firstChild.data height = size.getElementsByTagName("height")[0].firstChild.data labelledObjects = dict() c = 0 for obj in objects: if obj.getElementsByTagName("name")[0].firstChild.data in CLASSES: c = c+1 if not c>0: flag = False return flag
for obj in objects:
name = obj.getElementsByTagName("name")[0].firstChild.data
xmin = obj.getElementsByTagName("xmin")[0].firstChild.data
ymin = obj.getElementsByTagName("ymin")[0].firstChild.data
xmax = obj.getElementsByTagName("xmax")[0].firstChild.data
ymax = obj.getElementsByTagName("ymax")[0].firstChild.data
if int(ymin) >= int(height):
continue
if int(ymax) >= int(height):
ymax = height
if int(xmin) >= int(xmax):
continue
if int(ymin) >= int(ymax):
continue
if int(xmin) >= int(width):
continue
if int(xmax) >= int(width) :
xmax = width
if int(xmin) <=0:
xmin = str(1)
if int(ymin) <=0:
ymin = str(1)
if int(xmax) <=0:
continue
if int(ymax) <=0:
continue
box = [xmin, ymin, xmax, ymax]
bb = []
for b in box:
if int(b) ==0:
b = 1
bb.append(b)
box = (bb[0], bb[1], bb[2] , bb[3])
if name not in labelledObjects:
labelledObjects[name] = []
labelledObjects[name].append(box)
else:
labelledObjects[name].append(box)
newxml = write_xml(fname, labelledObjects, width, height)
g = open(new_path,'w')
g.write(newxml)
g.close()
return flag
def write_xml(file_name, objects, width, height):
ann = "
if name == 'main':
mypath = sys.argv[1]
outputpath = sys.argv[2]
os.makedirs(outputpath)
images = [mypath+'/'+f for f in listdir(mypath) if isfile(join(mypath, f))]
count = 0
totalCount = 0
jobs = []
for image in images:
p = multiprocessing.Process(target=read_xml, args=(image, outputpath+'/'+image.split('/')[-1], ))
jobs.append(p)
totalCount = totalCount + 1
p.start()
hello @rnsandeep First of all, thank you very much for writing this solution. There is one thing I would like to ask. Can I rewrite this code to the correct tab size? I can not understand this code because of the tab size. Thank you.
sure please do that.
I have changed the calthec data set format to pascal_voc, but i got the following error:
Traceback (most recent call last): File "./tools/trainval_net.py", line 105, in
imdb, roidb = combined_roidb(args.imdb_name)
File "./tools/trainval_net.py", line 76, in combined_roidb
roidbs = [get_roidb(s) for s in imdb_names.split('+')]
File "./tools/trainval_net.py", line 73, in get_roidb
roidb = get_training_roidb(imdb)
File "/raid/users/mhamda/tf-faster-rcnn-master/tools/../lib/model/train_val.py", line 328, in get_training_roidb
imdb.append_flipped_images()
File "/raid/users/mhamda/tf-faster-rcnn-master/tools/../lib/datasets/imdb.py", line 118, in append_flipped_images
assert (boxes[:, 2] >= boxes[:, 0]).all()
AssertionError