mapbox / robosat

Semantic segmentation on aerial and satellite imagery. Extracts features such as: buildings, parking lots, roads, water, clouds
MIT License
2.01k stars 382 forks source link

rs_subset should accept multiple file inputs #189

Open markmester opened 4 years ago

markmester commented 4 years ago

Similar to #187

With the introduction of the batching feature extraction (#148), the inputs to rs_subset is likely going to be multiple .tiles files. My current workaround to combine all of these files prior to running rs_subset

import os
import csv
import argparse
from random import shuffle

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('dir', type=str)
    parser.add_argument('out', type=str)
    parser.add_argument('--ext', type=str, default='tiles')
    args = parser.parse_args()

    os.makedirs(args.out, exist_ok=True)

    all, train, validation, evaluation = [], [], [], []

    # combine tile csv files
    for file in os.listdir(args.dir):
        if file.endswith(args.ext):
            csv_file = os.path.join(args.dir, file)

            print(f"processing file: {csv_file}")

            with open(csv_file, newline='') as csvfile:
                spamreader = csv.reader(csvfile, delimiter=' ', quotechar='|')
                for row in spamreader:
                    all.append(row)

    # shuffle
    shuffle(all)

    _80 = int(len(all) * 0.80)
    _10 = int(len(all) * 0.10)

    train = all[0:_80]
    validation = all[_80: _80 + _10]
    evaluation = all[_80 + _10:]

    # write new csv files
    for k, v in {"train.tiles": train, "validation.tiles": validation, "evaluation.tiles": evaluation}.items():
        with open(os.path.join(args.out, k), 'w', newline='') as f:
            wr = csv.writer(f, delimiter=' ', escapechar=' ', quoting=csv.QUOTE_NONE)
            for line in v:
                wr.writerow(line)

if __name__ == "__main__":
    main()