I have installed CNTK 2.5.1 for GPU and trying to run a few tests to make sure everything is working . The problem is I'm behind corporate proxy and I can't download anything using command line . I'm trying the following test :
When I execute the code which should download and extract the data I get blocked so I went and downloaded the files manually . I put the files under the cntk folder and executed the code again . Still the code is trying to pull the data from the network even though it should first check if the files are available locally (Which they do because I downloaded them) .
Here is the relevant code :
# By default, we store data in the Examples/Image directory under CNTK
# If you're running this _outside_ of CNTK, consider changing this
data_root = os.path.join('..', 'Examples', 'Image')
datasets_path = os.path.join(data_root, 'DataSets')
output_path = os.path.join('.', 'temp', 'Output')
def ensure_exists(path):
if not os.path.exists(path):
os.makedirs(path)
def write_to_file(file_path, img_paths, img_labels):
with open(file_path, 'w+') as f:
for i in range(0, len(img_paths)):
f.write('%s\t%s\n' % (os.path.abspath(img_paths[i]), img_labels[i]))
def download_unless_exists(url, filename, max_retries=3):
'''Download the file unless it already exists, with retry. Throws if all retries fail.'''
if os.path.exists(filename):
print('Reusing locally cached: ', filename)
else:
print('Starting download of {} to {}'.format(url, filename))
retry_cnt = 0
while True:
try:
urlretrieve(url, filename)
print('Download completed.')
return
except:
retry_cnt += 1
if retry_cnt == max_retries:
print('Exceeded maximum retry count, aborting.')
raise
print('Failed to download, retrying.')
time.sleep(np.random.randint(1,10))
def download_model(model_root = os.path.join(data_root, 'PretrainedModels')):
ensure_exists(model_root)
resnet18_model_uri = 'https://www.cntk.ai/Models/ResNet/ResNet_18.model'
resnet18_model_local = os.path.join(model_root, 'ResNet_18.model')
download_unless_exists(resnet18_model_uri, resnet18_model_local)
return resnet18_model_local
def download_flowers_dataset(dataset_root = os.path.join(datasets_path, 'Flowers')):
ensure_exists(dataset_root)
flowers_uris = [
'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz',
'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat',
'http://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat'
]
flowers_files = [
os.path.join(dataset_root, '102flowers.tgz'),
os.path.join(dataset_root, 'imagelabels.mat'),
os.path.join(dataset_root, 'setid.mat')
]
for uri, file in zip(flowers_uris, flowers_files):
download_unless_exists(uri, file)
tar_dir = os.path.join(dataset_root, 'extracted')
if not os.path.exists(tar_dir):
print('Extracting {} to {}'.format(flowers_files[0], tar_dir))
os.makedirs(tar_dir)
tarfile.open(flowers_files[0]).extractall(path=tar_dir)
else:
print('{} already extracted to {}, using existing version'.format(flowers_files[0], tar_dir))
flowers_data = {
'data_folder': dataset_root,
'training_map': os.path.join(dataset_root, '6k_img_map.txt'),
'testing_map': os.path.join(dataset_root, '1k_img_map.txt'),
'validation_map': os.path.join(dataset_root, 'val_map.txt')
}
if not os.path.exists(flowers_data['training_map']):
print('Writing map files ...')
# get image paths and 0-based image labels
image_paths = np.array(sorted(glob.glob(os.path.join(tar_dir, 'jpg', '*.jpg'))))
image_labels = loadmat(flowers_files[1])['labels'][0]
image_labels -= 1
# read set information from .mat file
setid = loadmat(flowers_files[2])
idx_train = setid['trnid'][0] - 1
idx_test = setid['tstid'][0] - 1
idx_val = setid['valid'][0] - 1
# Confusingly the training set contains 1k images and the test set contains 6k images
# We swap them, because we want to train on more data
write_to_file(flowers_data['training_map'], image_paths[idx_train], image_labels[idx_train])
write_to_file(flowers_data['testing_map'], image_paths[idx_test], image_labels[idx_test])
write_to_file(flowers_data['validation_map'], image_paths[idx_val], image_labels[idx_val])
print('Map files written, dataset download and unpack completed.')
else:
print('Using cached map files.')
return flowers_data
def download_animals_dataset(dataset_root = os.path.join(datasets_path, 'Animals')):
ensure_exists(dataset_root)
animals_uri = 'https://www.cntk.ai/DataSets/Animals/Animals.zip'
animals_file = os.path.join(dataset_root, 'Animals.zip')
download_unless_exists(animals_uri, animals_file)
if not os.path.exists(os.path.join(dataset_root, 'Test')):
with zipfile.ZipFile(animals_file) as animals_zip:
print('Extracting {} to {}'.format(animals_file, dataset_root))
animals_zip.extractall(path=os.path.join(dataset_root, '..'))
print('Extraction completed.')
else:
print('Reusing previously extracted Animals data.')
return {
'training_folder': os.path.join(dataset_root, 'Train'),
'testing_folder': os.path.join(dataset_root, 'Test')
}
print('Downloading flowers and animals data-set, this might take a while...')
flowers_data = download_flowers_dataset()
animals_data = download_animals_dataset()
print('All data now available to the notebook!')
I have installed CNTK 2.5.1 for GPU and trying to run a few tests to make sure everything is working . The problem is I'm behind corporate proxy and I can't download anything using command line . I'm trying the following test :
https://cntk.ai/pythondocs/CNTK_301_Image_Recognition_with_Deep_Transfer_Learning.html
When I execute the code which should download and extract the data I get blocked so I went and downloaded the files manually . I put the files under the cntk folder and executed the code again . Still the code is trying to pull the data from the network even though it should first check if the files are available locally (Which they do because I downloaded them) .
Here is the relevant code :
Thank you !