hanzhanggit / StackGAN

MIT License
1.86k stars 455 forks source link

UnicodeDecodeError while stage - 1 training #42

Closed Shubham-kale closed 6 years ago

Shubham-kale commented 6 years ago

This is the code I am trying to run on anaconda IDE :-

Training Step 1: train Stage-I GAN

python stageI/run_exp.py --cfg stageI/cfg/birds.yml --gpu 0

from future import division from future import print_function

import dateutil import dateutil.tz import datetime import argparse import pprint

from Project.misc.datasets import TextDataset from Project.stageI.model import CondGAN from Project.stageI.trainer import CondGANTrainer from Project.misc.utils import mkdir_p from Project.misc.config import cfg, cfg_from_file

import codecs #module and function addition by shubham kale : start def replace_line(file_name, line_num, text): f = codecs.open(file_name, 'r', encoding='utf-8') lines = f.readlines() lines[line_num] = text f.close() w = codecs.open(file_name, 'w', encoding='utf-8') w.writelines(lines) w.close()

End of additions

import sys #change by shubham kale line -1 from importlib import reload #line - 2 reload(sys) #line - 3

sys.setdefaultencoding('utf8') #meaningless in python 3

print('Using config:') pprint.pprint(cfg)

now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y%m%d%H%M_%S')

datadir = 'Data/%s' % cfg.DATASET_NAME

datadir = 'Data/flowers' dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1) filename_test = '%s/test' % (datadir) dataset.test = dataset.get_data(filename_test) if cfg.TRAIN.FLAG: filename_train = '%s/train' % (datadir) dataset.train = dataset.get_data(filename_train) ckt_logs_dir = "cktlogs/%s/%s%s" %(cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) mkdir_p(ckt_logs_dir) else: s_tmp = cfg.TRAIN.PRETRAINED_MODEL ckt_logs_dir = s_tmp[:s_tmp.find('.ckpt')]

model = CondGAN( image_shape=dataset.image_shape )

algo = CondGANTrainer( model=model, dataset=dataset, ckt_logs_dir=ckt_logs_dir ) if cfg.TRAIN.FLAG: algo.train() else: ''' For every input text embedding/sentence in the training and test datasets, generate cfg.TRAIN.NUM_COPY images with randomness from noise z and conditioning augmentation.''' algo.evaluate()

The Output error log is :-

Using config: {'CONFIG_NAME': '', 'DATASET_NAME': 'birds', 'EMBEDDING_TYPE': 'cnn-rnn', 'GAN': {'DF_DIM': 64, 'EMBEDDING_DIM': 128, 'GF_DIM': 128, 'NETWORK_TYPE': 'default'}, 'GPU_ID': 0, 'TEST': {'BATCH_SIZE': 64, 'CAPTION_PATH': '', 'HR_IMSIZE': 256, 'LR_IMSIZE': 64, 'NUM_COPY': 16, 'PRETRAINED_MODEL': ''}, 'TRAIN': {'BATCH_SIZE': 64, 'B_WRONG': True, 'COEFF': {'KL': 2.0}, 'COND_AUGMENTATION': True, 'DISCRIMINATOR_LR': 0.0002, 'FINETUNE_LR': False, 'FLAG': True, 'FT_LR_RETIO': 0.1, 'GENERATOR_LR': 0.0002, 'LR_DECAY_EPOCH': 50, 'MAX_EPOCH': 600, 'NUM_COPY': 4, 'NUM_EMBEDDING': 4, 'PRETRAINED_EPOCH': 600, 'PRETRAINED_MODEL': '', 'SNAPSHOT_INTERVAL': 2000}, 'Z_DIM': 100} images: (1155, 76, 76, 3)

UnicodeDecodeError Traceback (most recent call last)

in () 42 dataset = TextDataset(datadir, cfg.EMBEDDING_TYPE, 1) 43 filename_test = '%s/test' % (datadir) ---> 44 dataset.test = dataset.get_data(filename_test) 45 if cfg.TRAIN.FLAG: 46 filename_train = '%s/train' % (datadir) ~/Project/misc/datasets.py in get_data(self, pickle_path, aug_flag) 227 images = pickle.load(f) 228 images = np.array(images) --> 229 print('images: ', images.shape) 230 231 with open(pickle_path + self.embedding_filename, 'rb') as f: UnicodeDecodeError: 'ascii' codec can't decode byte 0xc4 in position 0: ordinal not in range(128)
Shubham-kale commented 6 years ago

by changing the following line in misc/datasets.py TextDataset class' "get_data" function the error is removed :- embeddings = pickle.load(f) to embeddings = pickle.load(f,encoding='latin1') list_filenames = pickle.load(f) to list_filenames = pickle.load(f,encoding='latin1') & class_id = pickle.load(f) to class_id = pickle.load(f,encoding='latin1')