Closed 1ssb closed 1 year ago
Any updates on this? I think the data preparation part is missing in the implementation.
I'm hitting the same issue as the second point. Any updates?
You need to convert the .jpg.npy files to loaded arrays and then save it back to .jpg files. You may use the following script:
# Code by 1ssb
# Run supported with only python3
import os
import cv2
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
def process_file(jpg_npy_path, output_path):
image_data = np.load(jpg_npy_path)
image_data_bgr = cv2.cvtColor(image_data, cv2.COLOR_RGB2BGR)
cv2.imwrite(output_path, image_data_bgr)
return 1 # Return 1 for successful processing
def process_extracted_dir(extracted_dir):
parent_dir = os.path.dirname(extracted_dir)
progress_file_path = os.path.join(parent_dir, 'progress.txt')
# Check for existing .jpg files or a progress file to determine if processing is complete or should be resumed
if any(f.endswith('.jpg') for f in os.listdir(parent_dir)) or os.path.exists(progress_file_path):
return
jpg_npy_files = [f for f in os.listdir(extracted_dir) if f.endswith('.jpg.npy')]
tasks = [(os.path.join(extracted_dir, f), os.path.join(parent_dir, f.replace('.jpg.npy', '.jpg'))) for f in jpg_npy_files]
with ThreadPoolExecutor(max_workers=16) as executor:
# Process the .jpg.npy files in parallel
for result in tqdm(executor.map(lambda args: process_file(*args), tasks), total=len(tasks)):
with open(progress_file_path, 'a') as progress_file:
# Update the progress file after each successful processing
progress_file.write(f'Successfully processed {result}\n')
def process_test_directory(test_directory):
subdirs = [os.path.join(test_directory, d) for d in os.listdir(test_directory) if os.path.isdir(os.path.join(test_directory, d))]
extracted_dirs = [os.path.join(subdir, 'extracted') for subdir in subdirs if 'extracted' in os.listdir(subdir)]
with ThreadPoolExecutor(max_workers=16) as executor:
# Process the extracted directories in parallel
executor.map(process_extracted_dir, extracted_dirs)
# Usage:
test_directory = './path/test'
process_test_directory(test_directory)
Can you explain the file distribution that is used by default in the real estate testing? By default the data is saved for the real estate under the root file:
root/[test or train]/[subdirectory_name]/data.npz/[image_name.jpg.npy]
. Is this also the structure that you used?Data preparation is incomplete, kindly specify the complete file structure as in how the files are to be saved for correct loading.