Added multithreading for downloading the images much faster

import numpy as np import pandas as pd import requests import os import threading

dress_patterns_df = pd.read_csv('dress_patterns.csv') dress_patterns = dress_patterns_df.values

create a folder dataset and nested folder of category

print(os.listdir()) os.mkdir('dataset_category')

for cat in category: print(cat) os.mkdir('dataset_category/'+cat)

print(os.listdir('dataset_category'))

def download_image(url, category, unit_id, i): try: r = requests.get(url, allow_redirects=True) open('dataset_category/'+category+'/'+str(unit_id)+'.jpg', 'wb').write(r.content) except: print('ERROR at: ', i)

save image in respective category folder.

threads = [] for i in range(len(dress_patterns)): if i%5 == 0: print(i, '/', len(dress_patterns)) pattern = dress_patterns[i] url = pattern[3] unit_id = pattern[0] category = pattern[1] thread = threading.Thread(target=download_image, args=(url, category, unit_id, i)) threads.append(thread) thread.start()

# limit the number of threads to 5
if len(threads) == 5:
    for thread in threads:
        thread.join()
    threads = []

wait for any remaining threads to complete

for thread in threads: thread.join()

aakashjhawar / dress-pattern-recognition-using-CNN

Issues #3

category

create a folder dataset and nested folder of category

save image in respective category folder.

wait for any remaining threads to complete