jacobgil / keras-dcgan

Keras implementation of Deep Convolutional Generative Adversarial Networks
973 stars 413 forks source link

Other dataset #21

Open mab85 opened 7 years ago

mab85 commented 7 years ago

@jacobgil @jusjusjus @jacobsagivtech @laoluzi How can use this code for another dataset or own dataset and how access the discriminator network as feature extractor of image ?

WangNuoWa commented 6 years ago

How can use this code for cifar10 dataset?
this is my code which is run on cifar10,but i encountered some mistakes in the course of running as a novice in GAN field.


from keras.model                 import Sequential  
from keras.layers                import Dense
from keras.layers                import Reshape 
from keras.layers.core           import Activation  
from keras.layers.normalization  import BatchNormalization
from keras.layers.convolutional  import UpSampling2D
from keras.layers.convolutional  import Conv2D, MaxPooling2D
from keras.layers.core           import Flatten
from keras.optimizers            import SGD  
from keras.datasets              import cifar10
import numpy                         as   np
from PIL                         import Image
import argparse
import math

# 生成器模型,主要用来生成图像
def generator_model():
    model = Sequential()
    model.add(Dense(input_dim=100, output_dim=1024))   # 全连接 100 to 1024
    model.add(Activation('tanh'))
    model.add(Dense(128*8*8))                          # 全连接 1024 to 128*8*8
    model.add(BatchNormalization())
    model.add(Activation('tanh'))
    model.add(Reshape((8, 8, 128), input_shape=(128*8*8,))) # 128*8*8 to 8*8*128
    model.add(UpSampling2D(size=(2, 2)))                    # 上采样,就是插值过程8*8*128 to 16*16*128
    model.add(Conv2D(64, (5, 5), padding='same'))           # 卷积,16*16*128  to 16*16*64
    model.add(Activation('tanh'))
    model.add(UpSampling2D(size=(2, 2)))                    # 上采样,16*16*64 to 32*32*64
    model.add(Conv2D(1, (5, 5), padding='same'))            # 卷积,32*32*64 to 32*32*1
    model.add(Activation('tanh'))
    return model

# 判别器模型,主要用来获得损失
def discriminator_model():
    model = Sequential()
    model.add(
            Conv2D(64, (5, 5),
            padding='same',
            input_shape=(32, 32, 3))                        # cifar10 图像格式
            )
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (5, 5)))
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('tanh'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))                       # 判别图像是来自真实数据还是来自生成数据
    return model

def generator_containing_discriminator(g, d):   # 固定D而训练G,从D获得损失,主要用来训练G
    model = Sequential()
    model.add(g)                     #  g = generator_model() 
    d.trainable = False              #  设置模型在融合模型中不可训练,在训练时,判别器的参数是不更新的
    model.add(d)                     #  d = discriminator_model()
    return model                     #  此时的模型就是G + D 

def combine_images(generated_images):  # generated_images = g.predict(noise, verbose=0)  要搞清楚生成图像的维度信息
    num = generated_images.shape[0]
    width = int(math.sqrt(num))
    height = int(math.ceil(float(num)/width))
    shape = generated_images.shape[1:3]
    image = np.zeros((height*shape[0], width*shape[1]),
                     dtype=generated_images.dtype)
    for index, img in enumerate(generated_images):
        i = int(index/width)
        j = index % width
        image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
            img[:, :, 0]
    return image

def train(BATCH_SIZE):
    # x_train, x_test: uint8 array of RGB image data with shape (num_samples, 3, 32, 32)
    # y_train, y_test: uint8 array of category labels (integers in range 0-9) with shape (num_samples,)  总之,X 为图像 Y 为标签 
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()         # 加载数据集
    X_train = (X_train.astype(np.float32) - 127.5)/127.5               # 将像素缩放到[-1,1]之间,查清楚彩色图像怎么缩放像素
    # X_train = X_train[:, :, :, None]                                 # 原来MNIST的是[60000,28,28,1]
    # X_test = X_test[:, :, :, None]
    # X_train.shape, Y_train.shape, X_test.shape, Y_test.shape 
    # ((50000, 32, 32,3), (50000,), (10000, 32, 32,3), (10000,))
    # X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])  [batch_size,channel,width,height] [6000,1,28,28] 默认是theano做后端
    d = discriminator_model()   # d为判别器模型
    g = generator_model()       # g为生成器模型
    d_on_g = generator_containing_discriminator(g, d)       # 联合模型(固定D而训练G)
    d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)   # 判别器的优化器
    g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)   # 生成器的优化器
    g.compile(loss='binary_crossentropy', optimizer="SGD")  # 编译生成器
    d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) # G+D模型,把D看做G的一个全连接层来分类
    d.trainable = True                                      # 设置判别器模型可以训练
    d.compile(loss='binary_crossentropy', optimizer=d_optim)# 编译判别器
    for epoch in range(100):
        print("Epoch is", epoch)
        print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))  # 60000/BATCH_SIZE = batch_num 
        for index in range(int(X_train.shape[0]/BATCH_SIZE)):         # index 代表的是每一个批次
            noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))  # 噪声 numpy.random.uniform(low=0.0, high=1.0, size=None) 98-108行训练的是D
            image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]  # 取出一个批次的图像
            generated_images = g.predict(noise, verbose=0)  # 生成器的预测模型来生成图像predict(self, x, batch_size=None, verbose=0, steps=None)
            if index % 20 == 0:                             # 每二十个批次保存一次生成图像
                image = combine_images(generated_images)    # 理解这个函数的作用是啥?????
                image = image*127.5+127.5                   # 恢复灰度图像的原始像素大小(0,255)
                Image.fromarray(image.astype(np.uint8)).save(  # 生成图像命名格式为:epoch_index(批次).png
                    str(epoch)+"_"+str(index)+".png")
            X = np.concatenate((image_batch, generated_images)) # 训练图像和生成图像的联合
            y = [1] * BATCH_SIZE + [0] * BATCH_SIZE             # 前BATCH_SIZE个真实图像对应的是标签是【1】后BATCH_SIZE个假图像的对应的标签是【0】
            d_loss = d.train_on_batch(X, y)                     # 按批次训练判别器
            print("batch %d d_loss : %f" % (index, d_loss))     # 每个批次打印一次判别器损失
            noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) # 噪声
            d.trainable = False                                 # 此时判别器不可训练,训练G时,D不训练
            g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE) # 生成的图像要和真实的标签做比较
            d.trainable = True                                      # 训练
            print("batch %d g_loss : %f" % (index, g_loss))     # 每个批次输出一次生成器损失
            if index % 10 == 9:                                 # 每十个批次保存一次权重
                g.save_weights('generator', True)
                d.save_weights('discriminator', True)

def generate(BATCH_SIZE, nice=False):   # 主要使用来使训练好的生成器来生成图像
    g = generator_model()
    g.compile(loss='binary_crossentropy', optimizer="SGD")
    g.load_weights('generator')
    if nice:                            # nice
        d = discriminator_model()
        d.compile(loss='binary_crossentropy', optimizer="SGD")
        d.load_weights('discriminator')
        noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100))
        generated_images = g.predict(noise, verbose=1)
        d_pret = d.predict(generated_images, verbose=1)
        index = np.arange(0, BATCH_SIZE*20)
        index.resize((BATCH_SIZE*20, 1))
`        pre_with_index.sort(key=lambda x: x[0], reverse=True)
        nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32)
        nice_images = nice_images[:, :, :, None]
        for i in range(BATCH_SIZE):
            idx = int(pre_with_index[i][1])
            nice_images[i, :, :, 0] = generated_images[idx, :, :, 0]
        image = combine_images(nice_images)
    else:
        noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))
        generated_images = g.predict(noise, verbose=1)
        image = combine_images(generated_images)
    image = image*127.5+127.5
    Image.fromarray(image.astype(np.uint8)).save(
        "generated_image.png")

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", type=str)  # mode = 'train' or 'generate' 
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--nice", dest="nice", action="store_true")
    parser.set_defaults(nice=False)
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = get_args()
    if args.mode == "train":
        train(BATCH_SIZE=args.batch_size)
    elif args.mode == "generate":
        generate(BATCH_SIZE=args.batch_size, nice=args.nice)

can you help me to slove it?

thank you very much.

kongyanye commented 5 years ago

How can use this code for cifar10 dataset? this is my code which is run on cifar10,but i encountered some mistakes in the course of running as a novice in GAN field.


from keras.model                 import Sequential  
from keras.layers                import Dense
from keras.layers                import Reshape 
from keras.layers.core           import Activation  
from keras.layers.normalization  import BatchNormalization
from keras.layers.convolutional  import UpSampling2D
from keras.layers.convolutional  import Conv2D, MaxPooling2D
from keras.layers.core           import Flatten
from keras.optimizers            import SGD  
from keras.datasets              import cifar10
import numpy                         as   np
from PIL                         import Image
import argparse
import math

# 生成器模型,主要用来生成图像
def generator_model():
    model = Sequential()
    model.add(Dense(input_dim=100, output_dim=1024))   # 全连接 100 to 1024
    model.add(Activation('tanh'))
    model.add(Dense(128*8*8))                          # 全连接 1024 to 128*8*8
    model.add(BatchNormalization())
    model.add(Activation('tanh'))
    model.add(Reshape((8, 8, 128), input_shape=(128*8*8,))) # 128*8*8 to 8*8*128
    model.add(UpSampling2D(size=(2, 2)))                    # 上采样,就是插值过程8*8*128 to 16*16*128
    model.add(Conv2D(64, (5, 5), padding='same'))           # 卷积,16*16*128  to 16*16*64
    model.add(Activation('tanh'))
    model.add(UpSampling2D(size=(2, 2)))                    # 上采样,16*16*64 to 32*32*64
    model.add(Conv2D(1, (5, 5), padding='same'))            # 卷积,32*32*64 to 32*32*1
    model.add(Activation('tanh'))
    return model

# 判别器模型,主要用来获得损失
def discriminator_model():
    model = Sequential()
    model.add(
            Conv2D(64, (5, 5),
            padding='same',
            input_shape=(32, 32, 3))                        # cifar10 图像格式
            )
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (5, 5)))
    model.add(Activation('tanh'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('tanh'))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))                       # 判别图像是来自真实数据还是来自生成数据
    return model

def generator_containing_discriminator(g, d):   # 固定D而训练G,从D获得损失,主要用来训练G
    model = Sequential()
    model.add(g)                     #  g = generator_model() 
    d.trainable = False              #  设置模型在融合模型中不可训练,在训练时,判别器的参数是不更新的
    model.add(d)                     #  d = discriminator_model()
    return model                     #  此时的模型就是G + D 

def combine_images(generated_images):  # generated_images = g.predict(noise, verbose=0)  要搞清楚生成图像的维度信息
    num = generated_images.shape[0]
    width = int(math.sqrt(num))
    height = int(math.ceil(float(num)/width))
    shape = generated_images.shape[1:3]
    image = np.zeros((height*shape[0], width*shape[1]),
                     dtype=generated_images.dtype)
    for index, img in enumerate(generated_images):
        i = int(index/width)
        j = index % width
        image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \
            img[:, :, 0]
    return image

def train(BATCH_SIZE):
    # x_train, x_test: uint8 array of RGB image data with shape (num_samples, 3, 32, 32)
    # y_train, y_test: uint8 array of category labels (integers in range 0-9) with shape (num_samples,)  总之,X 为图像 Y 为标签 
    (x_train, y_train), (x_test, y_test) = cifar10.load_data()         # 加载数据集
    X_train = (X_train.astype(np.float32) - 127.5)/127.5               # 将像素缩放到[-1,1]之间,查清楚彩色图像怎么缩放像素
    # X_train = X_train[:, :, :, None]                                 # 原来MNIST的是[60000,28,28,1]
    # X_test = X_test[:, :, :, None]
  # X_train.shape, Y_train.shape, X_test.shape, Y_test.shape 
  # ((50000, 32, 32,3), (50000,), (10000, 32, 32,3), (10000,))
    # X_train = X_train.reshape((X_train.shape[0], 1) + X_train.shape[1:])  [batch_size,channel,width,height] [6000,1,28,28] 默认是theano做后端
    d = discriminator_model()   # d为判别器模型
    g = generator_model()       # g为生成器模型
    d_on_g = generator_containing_discriminator(g, d)       # 联合模型(固定D而训练G)
    d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)   # 判别器的优化器
    g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True)   # 生成器的优化器
    g.compile(loss='binary_crossentropy', optimizer="SGD")  # 编译生成器
    d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) # G+D模型,把D看做G的一个全连接层来分类
    d.trainable = True                                      # 设置判别器模型可以训练
    d.compile(loss='binary_crossentropy', optimizer=d_optim)# 编译判别器
    for epoch in range(100):
        print("Epoch is", epoch)
        print("Number of batches", int(X_train.shape[0]/BATCH_SIZE))  # 60000/BATCH_SIZE = batch_num 
        for index in range(int(X_train.shape[0]/BATCH_SIZE)):         # index 代表的是每一个批次
            noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100))  # 噪声 numpy.random.uniform(low=0.0, high=1.0, size=None) 98-108行训练的是D
            image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE]  # 取出一个批次的图像
            generated_images = g.predict(noise, verbose=0)  # 生成器的预测模型来生成图像predict(self, x, batch_size=None, verbose=0, steps=None)
            if index % 20 == 0:                             # 每二十个批次保存一次生成图像
                image = combine_images(generated_images)    # 理解这个函数的作用是啥?????
                image = image*127.5+127.5                   # 恢复灰度图像的原始像素大小(0,255)
                Image.fromarray(image.astype(np.uint8)).save(  # 生成图像命名格式为:epoch_index(批次).png
                    str(epoch)+"_"+str(index)+".png")
            X = np.concatenate((image_batch, generated_images)) # 训练图像和生成图像的联合
            y = [1] * BATCH_SIZE + [0] * BATCH_SIZE             # 前BATCH_SIZE个真实图像对应的是标签是【1】后BATCH_SIZE个假图像的对应的标签是【0】
            d_loss = d.train_on_batch(X, y)                     # 按批次训练判别器
            print("batch %d d_loss : %f" % (index, d_loss))     # 每个批次打印一次判别器损失
            noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) # 噪声
            d.trainable = False                                 # 此时判别器不可训练,训练G时,D不训练
            g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE) # 生成的图像要和真实的标签做比较
            d.trainable = True                                      # 训练
            print("batch %d g_loss : %f" % (index, g_loss))     # 每个批次输出一次生成器损失
            if index % 10 == 9:                                 # 每十个批次保存一次权重
                g.save_weights('generator', True)
                d.save_weights('discriminator', True)

def generate(BATCH_SIZE, nice=False):   # 主要使用来使训练好的生成器来生成图像
    g = generator_model()
    g.compile(loss='binary_crossentropy', optimizer="SGD")
    g.load_weights('generator')
    if nice:                            # nice
        d = discriminator_model()
        d.compile(loss='binary_crossentropy', optimizer="SGD")
        d.load_weights('discriminator')
        noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100))
        generated_images = g.predict(noise, verbose=1)
        d_pret = d.predict(generated_images, verbose=1)
        index = np.arange(0, BATCH_SIZE*20)
        index.resize((BATCH_SIZE*20, 1))
`        pre_with_index.sort(key=lambda x: x[0], reverse=True)
        nice_images = np.zeros((BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32)
        nice_images = nice_images[:, :, :, None]
        for i in range(BATCH_SIZE):
            idx = int(pre_with_index[i][1])
            nice_images[i, :, :, 0] = generated_images[idx, :, :, 0]
        image = combine_images(nice_images)
    else:
        noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100))
        generated_images = g.predict(noise, verbose=1)
        image = combine_images(generated_images)
    image = image*127.5+127.5
    Image.fromarray(image.astype(np.uint8)).save(
        "generated_image.png")

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--mode", type=str)  # mode = 'train' or 'generate' 
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--nice", dest="nice", action="store_true")
    parser.set_defaults(nice=False)
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = get_args()
    if args.mode == "train":
        train(BATCH_SIZE=args.batch_size)
    elif args.mode == "generate":
        generate(BATCH_SIZE=args.batch_size, nice=args.nice)

can you help me to slove it?

thank you very much.

The image batch you have is of size 32*32*3, but the generated image of 32*32*1. You won't be able to concatenate these 2 batches of images and just change model.add(Conv2D(1, (5, 5), padding='same')) to model.add(Conv2D(3, (5, 5), padding='same')) should work.