experiencor / image-to-3d-bbox

Build a CNN network to predict 3D bounding box of car from 2D image.
https://experiencor.github.io/sdc_3d.html
237 stars 59 forks source link

run final kitti evaluation to project 3d tracklet to 2d imgs error #4

Closed Patrick-Woo closed 6 years ago

Patrick-Woo commented 6 years ago

image

I use your code to explore the kitti data and want to exact .xml file and then project 3d box to 2d image.

however, after the "# Prepare the test data " section in the final kitti evaluation notebook, the result image is as follows. it seems the projection has an orientation issue.

wish you could help me.

if you know other resources that could satisfy my exploration need, please also let me know.

notice: I use the tracklet, calibration and image files from kitti raw data download page not 3d ojection detection page.

Patrick-Woo commented 6 years ago

image also, it has the same issue on the class of car.

Patrick-Woo commented 6 years ago

my code is as follows

kittiDir = '/data/project-o/kitti-3d-object-detection/dataset/2011_09_26/' drive = '2011_09_26_drive_0001_sync/'

label_dir = kittiDir + drive + 'label_02/' image_dir = kittiDir + drive + 'image_02/data/' calib_dir = kittiDir + drive + 'calib_02/'

FIGURE OUT THE LABELS

os.system('rm ' + label_dir + '') os.system('rm ' + calib_dir + '')

Read transformation matrices

for line in open(kittiDir + drive + 'calib_velo_to_cam.txt'): if 'R:' in line: R = line.strip().split(' ') R = np.asarray([float(number) for number in R[1:]]) R = np.reshape(R, (3,3))

if 'T:' in line:
    T = line.strip().split(' ')
    T = np.asarray([float(number) for number in T[1:]])
    T = np.reshape(T, (3,1))

for line in open(kittiDir + drive + 'calib_cam_to_cam.txt'): if 'R_rect_00:' in line:

if 'R_rect_02:' in line:

    R0_rect = line.strip().split(' ')
    R0_rect = np.asarray([float(number) for number in R0_rect[1:]])
    R0_rect = np.reshape(R0_rect, (3,3))

R0_rect = np.append(R0_rect, np.zeros((3,1)), axis=1) R0_rect = np.append(R0_rect, np.zeros((1,4)), axis=0) R0_rect[-1,-1] = 1

Tr_velo_to_cam = np.concatenate([R,T],axis=1) Tr_velo_to_cam = np.append(Tr_velo_to_cam, np.zeros((1,4)), axis=0) Tr_velo_to_cam[-1,-1] = 1

transform = np.dot(R0_rect, Tr_velo_to_cam)

print Tr_velo_to_cam

print R0_rect

print transform

Read the tracklets

for trackletObj in xmlParser.parseXML(kittiDir + drive + 'tracklet_labels.xml'): for translation, rotation, state, occlusion, truncation, amtOcclusion, amtBorders, absoluteFrameNumber in trackletObj: label_file = label_dir + str(absoluteFrameNumber).zfill(10) + '.txt'

    translation = np.append(translation, 1)
    translation = np.dot(transform, translation)
    translation = translation[:3]/translation[3]

    with open(label_file, 'a') as file_writer:
        line = [trackletObj.objectType] + [0,0,rotation[2]] + [0,0,0,0] + list(trackletObj.size) + list(translation) + [rotation[2]]
        line = ' '.join([str(item) for item in line]) + '\n'
        file_writer.write(line)

FIGURE OUT THE CALIBRATION

for line in open(kittiDir + drive + 'calib_cam_to_cam.txt'): if 'P_rect_02' in line: line_P2 = line.replace('P_rect_02', 'P2') print (line_P2)

for image in os.listdir(image_dir): label_file = label_dir + image.split('.')[0] + '.txt' calib_file = calib_dir + image.split('.')[0] + '.txt'

# Create calib files
with open(calib_file, 'w') as file_writer:
    file_writer.write(line_P2)

# Fix missing lables
with open(label_file, 'a') as file_writer:
    file_writer.write('')

all_image = sorted(os.listdir(image_dir))

np.random.shuffle(all_image)

for f in all_image: image_file = image_dir + f calib_file = calib_dir + f.replace('png', 'txt')

predi_file = predi_dir + f.replace('png', 'txt')

# read calibration data
for line in open(calib_file):
    if 'P2:' in line:
        cam_to_img = line.strip().split(' ')
        cam_to_img = np.asarray([float(number) for number in cam_to_img[1:]])
        cam_to_img = np.reshape(cam_to_img, (3,4))

    #if 'R0_rect:' in line:
    #    R0_rect = line.strip().split(' ')
    #    R0_rect = np.asarray([float(number) for number in R0_rect[1:]])
    #    R0_rect = np.reshape(R0_rect, (3,3))

    #if 'Tr_velo_to_cam:' in line:
    #    Tr_velo_to_cam = line.strip().split(' ')
    #    Tr_velo_to_cam = np.asarray([float(number) for number in Tr_velo_to_cam[1:]])
    #    Tr_velo_to_cam = np.reshape(Tr_velo_to_cam, (3,4))

#R0_rect = np.append(R0_rect, np.zeros((3,1)), axis=1)
#R0_rect = np.append(R0_rect, np.zeros((1,4)), axis=0)
#R0_rect[-1,-1] = 1

#Tr_velo_to_cam = np.append(Tr_velo_to_cam, np.zeros((1,4)), axis=0)
#Tr_velo_to_cam[-1,-1] = 1

draw 2D boxes and 3D boxes

image = cv2.imread(image_path + dataset2[index] + '.png')

image = cv2.imread(image_dir + dataset2[50] + '.png')

image = (images[21])

cars = []

for line in open(label_dir + dataset2[50] + '.txt').readlines(): line = line.strip().split(' ')

if 'Car' in line[0]:
    # Draw 2D Bounding Box
    x_min, y_min, x_max, y_max = [int(float(number)) for number in line[4:8]]
    #cv2.rectangle(image, (x_min,y_min), (x_max,y_max), (0,255,0), 3)

    # Draw 3D Bounding Box
    dims   = np.asarray([float(number) for number in line[8:11]])
    center = np.asarray([float(number) for number in line[11:14]])

    if np.abs(float(line[3])) < 0.01:
        continue
    print (line[3], center)

    rot_y  = float(line[3]) + np.arctan(center[0]/center[2])#float(line[14])

    box_3d = []

    for i in [1,-1]:
        for j in [1,-1]:
            for k in [0,1]:
                point = np.copy(center)
                point[0] = center[0] + i * dims[1]/2 * np.cos(-rot_y+np.pi/2) + (j*i) * dims[2]/2 * np.cos(-rot_y)
                point[2] = center[2] + i * dims[1]/2 * np.sin(-rot_y+np.pi/2) + (j*i) * dims[2]/2 * np.sin(-rot_y)                  
                point[1] = center[1] - k * dims[0]

                point = np.append(point, 1)
                point = np.dot(cam_to_img, point)
                point = point[:2]/point[2]
                point = point.astype(np.int16)
                box_3d.append(point)

    for i in range(4):
        point_1_ = box_3d[2*i]
        point_2_ = box_3d[2*i+1]
        cv2.line(image, (point_1_[0], point_1_[1]), (point_2_[0], point_2_[1]), (255,0,0), 2)

    for i in range(8):
        point_1_ = box_3d[i]
        point_2_ = box_3d[(i+2)%8]
        cv2.line(image, (point_1_[0], point_1_[1]), (point_2_[0], point_2_[1]), (255,0,0), 1)

fig = plt.figure(figsize=(20,20)) plt.imshow(image); plt.show()

experiencor commented 6 years ago

Hi @Patrick-Woo, I did also face a lot of orientation issue when exploring this dataset but I don't remember how I get them all right. I do remember that you have to follow their instruction carefully (http://kitti.is.tue.mpg.de/kitti/devkit_raw_data.zip). Make sure that you are able to run their code then start debugging from here.

Patrick-Woo commented 6 years ago

thank you for your prompt reply and could u tell me how do you get the formula: rot_y = float(line[3]) + np.arctan(center[0]/center[2]) as I guess, this is used to calculate the rotation of the objects.

HaoMyelin commented 6 years ago

@Patrick-Woo I meet the same projection error, have you solved it? Thank you.

lucasjinreal commented 5 years ago

@HaoMyelin What does this tracklet use for?

jkstyle2 commented 5 years ago

@Patrick-Woo Hi, have you solved the issue? Could you please share it and the formula you asked?