Open mab85 opened 7 years ago
How can use this code for cifar10 dataset?
this is my code which is run on cifar10,but i encountered some mistakes in the course of running as a novice in GAN field.
from keras.model import Sequential
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers.core import Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import UpSampling2D
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Flatten
from keras.optimizers import SGD
from keras.datasets import cifar10
import numpy as np
from PIL import Image
import argparse
import math
# 生成器模型,主要用来生成图像
def generator_model():
model = Sequential()
model.add(Dense(input_dim=100, output_dim=1024)) # 全连接 100 to 1024
model.add(Activation('tanh'))
model.add(Dense(128*8*8)) # 全连接 1024 to 128*8*8
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Reshape((8, 8, 128), input_shape=(128*8*8,))) # 128*8*8 to 8*8*128
model.add(UpSampling2D(size=(2, 2))) # 上采样,就是插值过程8*8*128 to 16*16*128
model.add(Conv2D(64, (5, 5), padding='same')) # 卷积,16*16*128 to 16*16*64
model.add(Activation('tanh'))
model.add(UpSampling2D(size=(2, 2))) # 上采样,16*16*64 to 32*32*64
model.add(Conv2D(1, (5, 5), padding='same')) # 卷积,32*32*64 to 32*32*1
model.add(Activation('tanh'))
return model
# 判别器模型,主要用来获得损失
def discriminator_model():
model = Sequential()
model.add(
Conv2D(64, (5, 5),
padding='same',
input_shape=(32, 32, 3)) # cifar10 图像格式
)
model.add(Activation('tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (5, 5)))
model.add(Activation('tanh'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation('tanh'))
model.add(Dense(1))
model.add(Activation('sigmoid')) # 判别图像是来自真实数据还是来自生成数据
return model
def generator_containing_discriminator(g, d): # 固定D而训练G,从D获得损失,主要用来训练G
model = Sequential()
model.add(g) # g = generator_model()
d.trainable = False # 设置模型在融合模型中不可训练,在训练时,判别器的参数是不更新的
model.add(d) # d = discriminator_model()
return model # 此时的模型就是G + D
def combine_images(generated_images): # generated_images = g.predict(noise, verbose=0) 要搞清楚生成图像的维度信息
num = generated_images.shape[0]
width = int(math.sqrt(num))
height = int(math.ceil(float(num)/width))
shape = generated_images.shape[1:3]
image = np.zeros((height*shape[0], width*shape[1]),
dtype=generated_images.dtype)
for index, img in enumerate(generated_images):
i = int(index/width)
j = index % width
image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
img[:, :, 0]
return image
def train(BATCH_SIZE):
# x_train, x_test: uint8 array of RGB image data with shape (num_samples, 3, 32, 32)
# y_train, y_test: uint8 array of category labels (integers in range 0-9) with shape (num_samples,) 总之,X 为图像 Y 为标签
(x_train, y_train), (x_test, y_test) = cifar10.load_data() # 加载数据集
X_train = (X_train.astype(np.float32) - 127.5)/127.5 # 将像素缩放到[-1,1]之间,查清楚彩色图像怎么缩放像素
# X_train = X_train[:, :, :, None] # 原来MNIST的是[60000,28,28,1]
# X_test = X_test[:, :, :, None]
# X_train.shape, Y_train.shape, X_test.shape, Y_test.shape
# ((50000, 32, 32,3), (50000,), (10000, 32, 32,3), (10000,))
# X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:]) [batch_size,channel,width,height] [6000,1,28,28] 默认是theano做后端
d = discriminator_model() # d为判别器模型
g = generator_model() # g为生成器模型
d_on_g = generator_containing_discriminator(g, d) # 联合模型(固定D而训练G)
d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) # 判别器的优化器
g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) # 生成器的优化器
g.compile(loss='binary_crossentropy', optimizer="SGD") # 编译生成器
d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) # G+D模型,把D看做G的一个全连接层来分类
d.trainable = True # 设置判别器模型可以训练
d.compile(loss='binary_crossentropy', optimizer=d_optim)# 编译判别器
for epoch in range(100):
print("Epoch is", epoch)
print("Number of batches", int(X_train.shape[0]/BATCH_SIZE)) # 60000/BATCH_SIZE = batch_num
for index in range(int(X_train.shape[0]/BATCH_SIZE)): # index 代表的是每一个批次
noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100)) # 噪声 numpy.random.uniform(low=0.0, high=1.0, size=None) 98-108行训练的是D
image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE] # 取出一个批次的图像
generated_images = g.predict(noise, verbose=0) # 生成器的预测模型来生成图像predict(self, x, batch_size=None, verbose=0, steps=None)
if index % 20 == 0: # 每二十个批次保存一次生成图像
image = combine_images(generated_images) # 理解这个函数的作用是啥?????
image = image*127.5+127.5 # 恢复灰度图像的原始像素大小(0,255)
Image.fromarray(image.astype(np.uint8)).save( # 生成图像命名格式为:epoch_index(批次).png
str(epoch)+"_"+str(index)+".png")
X = np.concatenate((image_batch, generated_images)) # 训练图像和生成图像的联合
y = [1] * BATCH_SIZE + [0] * BATCH_SIZE # 前BATCH_SIZE个真实图像对应的是标签是【1】后BATCH_SIZE个假图像的对应的标签是【0】
d_loss = d.train_on_batch(X, y) # 按批次训练判别器
print("batch %d d_loss : %f" % (index, d_loss)) # 每个批次打印一次判别器损失
noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) # 噪声
d.trainable = False # 此时判别器不可训练,训练G时,D不训练
g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE) # 生成的图像要和真实的标签做比较
d.trainable = True # 训练
print("batch %d g_loss : %f" % (index, g_loss)) # 每个批次输出一次生成器损失
if index % 10 == 9: # 每十个批次保存一次权重
g.save_weights('generator', True)
d.save_weights('discriminator', True)
def generate(BATCH_SIZE, nice=False): # 主要使用来使训练好的生成器来生成图像
g = generator_model()
g.compile(loss='binary_crossentropy', optimizer="SGD")
g.load_weights('generator')
if nice: # nice
d = discriminator_model()
d.compile(loss='binary_crossentropy', optimizer="SGD")
d.load_weights('discriminator')
noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100))
generated_images = g.predict(noise, verbose=1)
d_pret = d.predict(generated_images, verbose=1)
index = np.arange(0, BATCH_SIZE*20)
index.resize((BATCH_SIZE*20, 1))
` pre_with_index.sort(key=lambda x: x[0], reverse=True)
nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32)
nice_images = nice_images[:, :, :, None]
for i in range(BATCH_SIZE):
idx = int(pre_with_index[i][1])
nice_images[i, :, :, 0] = generated_images[idx, :, :, 0]
image = combine_images(nice_images)
else:
noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))
generated_images = g.predict(noise, verbose=1)
image = combine_images(generated_images)
image = image*127.5+127.5
Image.fromarray(image.astype(np.uint8)).save(
"generated_image.png")
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--mode", type=str) # mode = 'train' or 'generate'
parser.add_argument("--batch_size", type=int, default=128)
parser.add_argument("--nice", dest="nice", action="store_true")
parser.set_defaults(nice=False)
args = parser.parse_args()
return args
if __name__ == "__main__":
args = get_args()
if args.mode == "train":
train(BATCH_SIZE=args.batch_size)
elif args.mode == "generate":
generate(BATCH_SIZE=args.batch_size, nice=args.nice)
can you help me to slove it?
thank you very much.
How can use this code for cifar10 dataset? this is my code which is run on cifar10,but i encountered some mistakes in the course of running as a novice in GAN field.
from keras.model import Sequential from keras.layers import Dense from keras.layers import Reshape from keras.layers.core import Activation from keras.layers.normalization import BatchNormalization from keras.layers.convolutional import UpSampling2D from keras.layers.convolutional import Conv2D, MaxPooling2D from keras.layers.core import Flatten from keras.optimizers import SGD from keras.datasets import cifar10 import numpy as np from PIL import Image import argparse import math # 生成器模型,主要用来生成图像 def generator_model(): model = Sequential() model.add(Dense(input_dim=100, output_dim=1024)) # 全连接 100 to 1024 model.add(Activation('tanh')) model.add(Dense(128*8*8)) # 全连接 1024 to 128*8*8 model.add(BatchNormalization()) model.add(Activation('tanh')) model.add(Reshape((8, 8, 128), input_shape=(128*8*8,))) # 128*8*8 to 8*8*128 model.add(UpSampling2D(size=(2, 2))) # 上采样,就是插值过程8*8*128 to 16*16*128 model.add(Conv2D(64, (5, 5), padding='same')) # 卷积,16*16*128 to 16*16*64 model.add(Activation('tanh')) model.add(UpSampling2D(size=(2, 2))) # 上采样,16*16*64 to 32*32*64 model.add(Conv2D(1, (5, 5), padding='same')) # 卷积,32*32*64 to 32*32*1 model.add(Activation('tanh')) return model # 判别器模型,主要用来获得损失 def discriminator_model(): model = Sequential() model.add( Conv2D(64, (5, 5), padding='same', input_shape=(32, 32, 3)) # cifar10 图像格式 ) model.add(Activation('tanh')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(128, (5, 5))) model.add(Activation('tanh')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('tanh')) model.add(Dense(1)) model.add(Activation('sigmoid')) # 判别图像是来自真实数据还是来自生成数据 return model def generator_containing_discriminator(g, d): # 固定D而训练G,从D获得损失,主要用来训练G model = Sequential() model.add(g) # g = generator_model() d.trainable = False # 设置模型在融合模型中不可训练,在训练时,判别器的参数是不更新的 model.add(d) # d = discriminator_model() return model # 此时的模型就是G + D def combine_images(generated_images): # generated_images = g.predict(noise, verbose=0) 要搞清楚生成图像的维度信息 num = generated_images.shape[0] width = int(math.sqrt(num)) height = int(math.ceil(float(num)/width)) shape = generated_images.shape[1:3] image = np.zeros((height*shape[0], width*shape[1]), dtype=generated_images.dtype) for index, img in enumerate(generated_images): i = int(index/width) j = index % width image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \ img[:, :, 0] return image def train(BATCH_SIZE): # x_train, x_test: uint8 array of RGB image data with shape (num_samples, 3, 32, 32) # y_train, y_test: uint8 array of category labels (integers in range 0-9) with shape (num_samples,) 总之,X 为图像 Y 为标签 (x_train, y_train), (x_test, y_test) = cifar10.load_data() # 加载数据集 X_train = (X_train.astype(np.float32) - 127.5)/127.5 # 将像素缩放到[-1,1]之间,查清楚彩色图像怎么缩放像素 # X_train = X_train[:, :, :, None] # 原来MNIST的是[60000,28,28,1] # X_test = X_test[:, :, :, None] # X_train.shape, Y_train.shape, X_test.shape, Y_test.shape # ((50000, 32, 32,3), (50000,), (10000, 32, 32,3), (10000,)) # X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:]) [batch_size,channel,width,height] [6000,1,28,28] 默认是theano做后端 d = discriminator_model() # d为判别器模型 g = generator_model() # g为生成器模型 d_on_g = generator_containing_discriminator(g, d) # 联合模型(固定D而训练G) d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) # 判别器的优化器 g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) # 生成器的优化器 g.compile(loss='binary_crossentropy', optimizer="SGD") # 编译生成器 d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) # G+D模型,把D看做G的一个全连接层来分类 d.trainable = True # 设置判别器模型可以训练 d.compile(loss='binary_crossentropy', optimizer=d_optim)# 编译判别器 for epoch in range(100): print("Epoch is", epoch) print("Number of batches", int(X_train.shape[0]/BATCH_SIZE)) # 60000/BATCH_SIZE = batch_num for index in range(int(X_train.shape[0]/BATCH_SIZE)): # index 代表的是每一个批次 noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100)) # 噪声 numpy.random.uniform(low=0.0, high=1.0, size=None) 98-108行训练的是D image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE] # 取出一个批次的图像 generated_images = g.predict(noise, verbose=0) # 生成器的预测模型来生成图像predict(self, x, batch_size=None, verbose=0, steps=None) if index % 20 == 0: # 每二十个批次保存一次生成图像 image = combine_images(generated_images) # 理解这个函数的作用是啥????? image = image*127.5+127.5 # 恢复灰度图像的原始像素大小(0,255) Image.fromarray(image.astype(np.uint8)).save( # 生成图像命名格式为:epoch_index(批次).png str(epoch)+"_"+str(index)+".png") X = np.concatenate((image_batch, generated_images)) # 训练图像和生成图像的联合 y = [1] * BATCH_SIZE + [0] * BATCH_SIZE # 前BATCH_SIZE个真实图像对应的是标签是【1】后BATCH_SIZE个假图像的对应的标签是【0】 d_loss = d.train_on_batch(X, y) # 按批次训练判别器 print("batch %d d_loss : %f" % (index, d_loss)) # 每个批次打印一次判别器损失 noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) # 噪声 d.trainable = False # 此时判别器不可训练,训练G时,D不训练 g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE) # 生成的图像要和真实的标签做比较 d.trainable = True # 训练 print("batch %d g_loss : %f" % (index, g_loss)) # 每个批次输出一次生成器损失 if index % 10 == 9: # 每十个批次保存一次权重 g.save_weights('generator', True) d.save_weights('discriminator', True) def generate(BATCH_SIZE, nice=False): # 主要使用来使训练好的生成器来生成图像 g = generator_model() g.compile(loss='binary_crossentropy', optimizer="SGD") g.load_weights('generator') if nice: # nice d = discriminator_model() d.compile(loss='binary_crossentropy', optimizer="SGD") d.load_weights('discriminator') noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100)) generated_images = g.predict(noise, verbose=1) d_pret = d.predict(generated_images, verbose=1) index = np.arange(0, BATCH_SIZE*20) index.resize((BATCH_SIZE*20, 1)) ` pre_with_index.sort(key=lambda x: x[0], reverse=True) nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32) nice_images = nice_images[:, :, :, None] for i in range(BATCH_SIZE): idx = int(pre_with_index[i][1]) nice_images[i, :, :, 0] = generated_images[idx, :, :, 0] image = combine_images(nice_images) else: noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) generated_images = g.predict(noise, verbose=1) image = combine_images(generated_images) image = image*127.5+127.5 Image.fromarray(image.astype(np.uint8)).save( "generated_image.png") def get_args(): parser = argparse.ArgumentParser() parser.add_argument("--mode", type=str) # mode = 'train' or 'generate' parser.add_argument("--batch_size", type=int, default=128) parser.add_argument("--nice", dest="nice", action="store_true") parser.set_defaults(nice=False) args = parser.parse_args() return args if __name__ == "__main__": args = get_args() if args.mode == "train": train(BATCH_SIZE=args.batch_size) elif args.mode == "generate": generate(BATCH_SIZE=args.batch_size, nice=args.nice)
can you help me to slove it?
thank you very much.
The image batch you have is of size 32*32*3, but the generated image of 32*32*1. You won't be able to concatenate these 2 batches of images and just change model.add(Conv2D(1, (5, 5), padding='same')) to model.add(Conv2D(3, (5, 5), padding='same')) should work.
@jacobgil @jusjusjus @jacobsagivtech @laoluzi How can use this code for another dataset or own dataset and how access the discriminator network as feature extractor of image ?