Open zhirui-gao opened 5 months ago
Hi, you can refer to this script for processing the data. https://github.com/NVlabs/neuralangelo/blob/main/projects/neuralangelo/scripts/convert_tnt_to_json.py
Thanks!
I used the code you provided and made some modifications to reproduce the DTU colmap points, but there are discrepancies with the dataset you provided, could you help me find the problem?
def convert_cam_dict_to_pinhole_dict(camera_dict, pinhole_dict_file, img_names):
# Partially adapted from https://github.com/Kai-46/nerfplusplus/blob/master/colmap_runner/run_colmap_posed.py
print('Writing pinhole_dict to: ', pinhole_dict_file)
h = 1200
w = 1600
pinhole_dict = {}
for idx in range(0, len(camera_dict)//6):
world_mat = camera_dict['world_mat_%d' % idx].astype(np.float32)
scale_mat = camera_dict['scale_mat_%d' % idx].astype(np.float32)
P = world_mat @ scale_mat
P = P[:3, :4]
K, pose_c2w = load_K_Rt_from_P(None, P)
W2C = np.linalg.inv(pose_c2w)
# params
fx = float(K[0, 0])
fy = float(K[1, 1])
cx = float(K[0, 2])
cy = float(K[1, 2])
qvec = rotmat2qvec(W2C[:3, :3]) # Assuming this function returns a list of floats
tvec = [float(x) for x in W2C[:3, 3]]
params = [w, h, fx, fy, cx, cy,
qvec[0], qvec[1], qvec[2], qvec[3],
tvec[0], tvec[1], tvec[2]]
pinhole_dict[img_names[idx]] = params
with open(pinhole_dict_file, 'w') as fp:
json.dump(pinhole_dict, fp, indent=2, sort_keys=True)
def init_colmap(args):
assert args.dtu_path, "Provide path to Tanks and Temples dataset"
scene_list = os.listdir(args.dtu_path)
for scene in scene_list:
scene_path = os.path.join(args.dtu_path, scene)
if not os.path.exists(f"{scene_path}/image"):
raise Exception(f"'image` folder cannot be found in {scene_path}."
"Please check the expected folder structure in DATA_PREPROCESSING.md")
# extract features
os.system(f"colmap feature_extractor --database_path {scene_path}/database.db \
--image_path {scene_path}/image \
--ImageReader.camera_model=PINHOLE \
--SiftExtraction.use_gpu=true \
--SiftExtraction.num_threads=32 \
--ImageReader.single_camera=true"
)
# match features
os.system(f"colmap sequential_matcher \
--database_path {scene_path}/database.db \
--SiftMatching.use_gpu=true"
)
# read poses
camera_dict = np.load(os.path.join(scene_path, 'cameras.npz'))
# convert to colmap files
pinhole_dict_file = os.path.join(scene_path, 'pinhole_dict.json')
convert_cam_dict_to_pinhole_dict(camera_dict, pinhole_dict_file,img_names=os.listdir(os.path.join(scene_path, 'image')))
db_file = os.path.join(scene_path, 'database.db')
sfm_dir = os.path.join(scene_path, 'sparse')
create_init_files(pinhole_dict_file, db_file, sfm_dir)
# bundle adjustment
os.system(f"colmap point_triangulator \
--database_path {scene_path}/database.db \
--image_path {scene_path}/image \
--input_path {scene_path}/sparse \
--output_path {scene_path}/sparse \
--Mapper.tri_ignore_two_view_tracks=true"
)
os.system(f"colmap bundle_adjuster \
--input_path {scene_path}/sparse \
--output_path {scene_path}/sparse \
--BundleAdjustment.refine_extrinsics=false"
)
I noticed that the size of the image is 15541162 instead of 16001200. Why does it need to resize the images? I will be very appreciative if you share your preprocessed DTU dataset code. Thanks a lot!
Hi, since the DTU's intrinsic is not in ideal pinhole format; That is say, the principle point is not the image center. So I crop the image to be pinhole before running the COLMAP program. Here is the code you can refer to
# you should crop the image and rewrite the intrinsic
images = [cv2.imread(image_paths[idx]) for idx in range(n_images)]
new_images = []
for idx in range(n_images):
cx, cy = intrinsics[idx][0,2], intrinsics[idx][1,2]
image = images[idx]
H, W, _ = image.shape
W2 = min((W - cx), cx)
H2 = min((H - cy), cy)
crop_box = (
int(cx-W2), # left
int(cx+W2), # upper
int(cy-H2), # right
int(cy+H2) # lower
)
new_image = image[crop_box[2]:crop_box[3], crop_box[0]:crop_box[1]]
intrinsics[idx][0,2] = new_image.shape[1] / 2
intrinsics[idx][1,2] = new_image.shape[0] / 2
@hbb1 Hi, have you ever tested using the same way as IDR to load the original size DTU images for training (like gaussian surfels)? Will that affect the performance? Thanks.
@leonwu0108 What do you mean the original size, the original 1600x1200 or the upscaled resolution? I don't think it will impact performance by using more pixels, although you may need adjusting some hyper-parameters accordingly. The real Important is the intrinsic, since both 3DGS and 2DGS only support ideal pinhole camera.
Thanks for the released code! I notice that you generate SFM point cloud with COLMAP given GT camera poses (cameras.npz). Could you share the scripts of this step as I want to test more DTU scenes? Thanks a lot!