low test accuracy using semantic segmentation

Good afternoon,

First of all, thank you for your work in point clouds and for this great architecture you made.

I don't know if anyone else has run into this, but I am seeing low accuracy results in my testing code and am not sure why. I have modified the code for LiDAR classification and instead of using XYZRGBX'Y'Z', I use XYZ, Intensity, Curvature, height - minimum height per 3m grid, X',Y',Z'. I split the point cloud into 5m grids and split it into 80 percent training, 10 percent validation, and 10 percent testing. The training and validation accuracy is approximately 80 - 90 percent (depending on the number of epochs). When I go to test it on the 'testing' grids, however, I get results around 45 to 50 percent. I'm using 4 classes, ground, building, vegetation, and other.

Here is my testing code:

Note: I use the term 'hashes' for grids (not originally my idea: Some of this is based on work found here. Also I know this is a bit sloppy and incomplete, I am just trying to get it to work right now.

BATCH_SIZE = 50
NUM_POINT = 6000
MODEL = os.path.join(wash_folder,'log')
MODEL_PATH = os.path.join(MODEL,'model.ckpt')
GPU_INDEX = 0
MAX_EPOCH = 30
DUMP_DIR = os.path.join(wash_folder,'dump')
if not os.path.exists(DUMP_DIR): os.mkdir(DUMP_DIR)
LOG_FOUT = open(os.path.join(DUMP_DIR, 'log_evaluate.txt'), 'w')
VISU = False
POINT_DIM = len(channels)
POINT_VISU = os.path.join(wash_folder,'test-pc')
if not os.path.exists(POINT_VISU): os.mkdir(POINT_VISU)
open(FLAGS.room_data_filelist)]
NUM_CLASSES = 4

def log_string(out_str):
    LOG_FOUT.write(out_str+'\n')
    LOG_FOUT.flush()
    print(out_str)

def eval_one_epoch(sess, ops, df_test,test_columns,epoch):
    error_cnt = 0
    is_training = False
    total_correct = 0
    total_seen = 0
    loss_sum = 0
    total_seen_class = [0 for _ in range(NUM_CLASSES)]
    total_correct_class = [0 for _ in range(NUM_CLASSES)]
    num_batches = 0
    for batch_data, batch_label in generator(df,channels,hashes,BATCH_SIZE,NUM_POINT):

''' The generator is used instead of h5py. 
 I take the point cloud, put it into a pandas dataframe, 
and the generator puts it into data, labels 
with shapes [BATCH_SIZE,NUM_POINTS,len(channels)], [BATCH_SIZE, NUM_POINTS], respectively.
 Credit to the link above
'''

        num_batches += 1 * BATCH_SIZE

        feed_dict = {ops['pointclouds_pl']: batch_data,
                    ops['labels_pl']: batch_label,
                    ops['is_training_pl']: is_training}

        loss_val, pred_val = sess.run([ops['loss'], ops['pred_softmax']],
                                      feed_dict=feed_dict)

        pred_label = np.argmax(pred_val, 2) # BxN
        print(pred_label.shape)
#         print(label.shape)
        correct = np.sum(pred_val == batch_label)
#         correct = np.sum(pred_val == batch_label)
        total_correct += correct
        total_seen += (BATCH_SIZE*NUM_POINT)
        loss_sum += (loss_val*BATCH_SIZE)
        for i in range(BATCH_SIZE):
            for j in range(NUM_POINT):
                l = batch_label[i, j]
                total_seen_class[l] += 1
                total_correct_class[l] += (pred_val[i, j] == l)

        for b in range(BATCH_SIZE):
#             pts = batch_data[b, :, :]
            pts = batch_data[b,:,:3]
            l = batch_label[b,:]

#             pts[:,6] *= max_room_x
#             pts[:,7] *= max_room_y
#             pts[:,8] *= max_room_z
#             pts[:,3:6] *= 255.0
            pred = pred_label[b, :]

            if VISU:
                inp = np.hstack((pts,pred[:,None]))
                inp = np.hstack((inp,l[:,None]))
                df_test = df_test.append(pd.DataFrame(inp,columns = test_columns),ignore_index=True)

        correct = np.sum(pred_label == batch_label)
        total_correct += correct
        total_seen += (BATCH_SIZE*NUM_POINT)
        loss_sum += (loss_val*BATCH_SIZE)
        for i in range(BATCH_SIZE):
            for j in range(NUM_POINT):
                l = batch_label[i, j]
                total_seen_class[l] += 1
                total_correct_class[l] += (pred_val[i, j] == l)

    log_string('eval mean loss: %f' % (loss_sum / float(total_seen/NUM_POINT)))
    log_string('eval accuracy: %f'% (total_correct / float(total_seen)))
    print('Correct classes', total_correct_class)
    print('Seen classes', total_seen_class)

    return total_correct, total_seen

def evaluate():
    is_training = False

    with tf.device('/gpu:'+str(GPU_INDEX)):
        pointclouds_pl, labels_pl = placeholder_inputs(BATCH_SIZE, NUM_POINT,POINT_DIM=POINT_DIM)
        is_training_pl = tf.placeholder(tf.bool, shape=())

        # simple model
        pred = get_model(pointclouds_pl, is_training_pl,POINT_DIM=POINT_DIM)
        loss = get_loss(pred, labels_pl)
        pred_softmax = tf.nn.softmax(pred)

        # Add ops to save and restore all the variables.
        saver = tf.train.Saver()

    # Create a session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.log_device_placement = True
    sess = tf.Session(config=config)

    # Restore variables from disk.
    saver.restore(sess, MODEL_PATH)
    log_string("Model restored.")

    ops = {'pointclouds_pl': pointclouds_pl,
           'labels_pl': labels_pl,
           'is_training_pl': is_training_pl,
           'pred': pred,
           'pred_softmax': pred_softmax,
           'loss': loss}

#     ops = {'pointclouds_pl': pointclouds_pl,
#        'labels_pl': labels_pl,
#        'is_training_pl': is_training_pl,
#        'pred': pred,
#        'loss': loss,
#        'train_op': train_op,
#        'merged': merged,
#        'step': batch}

    total_correct = 0
    total_seen = 0
    for epoch in range(MAX_EPOCH):
        a, b = eval_one_epoch(sess, ops, df_test,test_columns,epoch)
        print(a)
        print(b)
        total_correct += a
        total_seen += b
        print('total correct:',a)
        print('total seen',b)

with tf.Graph().as_default():
    evaluate()
LOG_FOUT.close()

Note I'm running this in Jupyter with TF 1.14 and Docker. If I need to clear anything up I certainly will. I realize this isn't the most clear; I've been trying to solve this all day. I can't see any error in the code so maybe it's my method? Again, if I need to be more clear I will. thank you!

charlesq34 / pointnet

low test accuracy using semantic segmentation #226