With the introduction of the batching feature extraction (#148), the inputs to rs_subset is likely going to be multiple .tiles files. My current workaround to combine all of these files prior to running rs_subset
import os
import csv
import argparse
from random import shuffle
def main():
parser = argparse.ArgumentParser()
parser.add_argument('dir', type=str)
parser.add_argument('out', type=str)
parser.add_argument('--ext', type=str, default='tiles')
args = parser.parse_args()
os.makedirs(args.out, exist_ok=True)
all, train, validation, evaluation = [], [], [], []
# combine tile csv files
for file in os.listdir(args.dir):
if file.endswith(args.ext):
csv_file = os.path.join(args.dir, file)
print(f"processing file: {csv_file}")
with open(csv_file, newline='') as csvfile:
spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
for row in spamreader:
all.append(row)
# shuffle
shuffle(all)
_80 = int(len(all) * 0.80)
_10 = int(len(all) * 0.10)
train = all[0:_80]
validation = all[_80: _80 + _10]
evaluation = all[_80 + _10:]
# write new csv files
for k, v in {"train.tiles": train, "validation.tiles": validation, "evaluation.tiles": evaluation}.items():
with open(os.path.join(args.out, k), 'w', newline='') as f:
wr = csv.writer(f, delimiter=' ', escapechar=' ', quoting=csv.QUOTE_NONE)
for line in v:
wr.writerow(line)
if __name__ == "__main__":
main()
Similar to #187
With the introduction of the batching feature extraction (#148), the inputs to rs_subset is likely going to be multiple
.tiles
files. My current workaround to combine all of these files prior to runningrs_subset