hit-computer / char-rnn-tf

Implement character-level language models for text generation based-on LSTM, in Python/TensorFlow
150 stars 49 forks source link

Seed 是否可以傳一段中文字串 #4

Closed fukuball closed 7 years ago

fukuball commented 7 years ago

您好,我訓練完成後,可以使用單個字作為 seed,但不知如何傳一段中文字串作為 seed,希望可以指點一下迷津,謝謝~

fukuball commented 7 years ago

結果我大概寫出來了,如果沒問題的話,我可以整理一下發 pull request 給你~

#coding:utf-8
import tensorflow as tf
import sys,time
import numpy as np
import cPickle, os
import random

config_tf = tf.ConfigProto()
config_tf.gpu_options.allow_growth = True
config_tf.inter_op_parallelism_threads = 1
config_tf.intra_op_parallelism_threads = 1

model_path = './Model' #the path of model that need to save or load
save_time = 45 #load save_time saved models
is_sample = True #true means using sample, if not using max
is_beams = True #whether or not using beam search
beam_size = 2 #size of beam search
len_of_generation = 200 #The number of characters by generated
start_sentence = u'挽著我的手'

char_to_idx, idx_to_char = cPickle.load(open(model_path+'.voc', 'r'))

class Config(object):
    def __init__(self):
        self.init_scale = 0.04
        self.learning_rate = 0.001
        self.max_grad_norm = 15
        self.num_layers = 3
        self.num_steps = 25 # number of steps to unroll the RNN for
        self.hidden_size = 1000 # size of hidden layer of neurons
        self.iteration = 50
        self.save_freq = 5 #The step (counted by the number of iterations) at which the model is saved to hard disk.
        self.keep_prob = 0.5
        self.batch_size = 32
        self.vocab_size = 0

class Model(object):
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size
        self.lr = config.learning_rate

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) #声明输入变量x, y

        lstm_cell = tf.contrib.rnn.BasicLSTMCell(size, forget_bias=0.0, state_is_tuple=False)
        if is_training and config.keep_prob < 1:
            lstm_cell = tf.contrib.rnn.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * config.num_layers, state_is_tuple=False)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size]) #size是wordembedding的维度
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)#返回一个tensor,shape是(batch_size, num_steps, size)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state) #inputs[:, time_step, :]的shape是(batch_size, size)
                outputs.append(cell_output)

        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
        """
        outpus是一个list,n*(batch_size, hidden_size),tf.concat(outputs, 1)返回一个矩阵(batch_size, n*hidden_size)
        reshape(..., [-1, size])
        """
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b #logits应该是(batch_size*time_step, vocab_size),顺序是第一段的第一个词,第二个词,...,然后是第二段的第一个词,...
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [logits],
            [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state
        self._logits = logits

        if not is_training:
            self._prob = tf.nn.softmax(logits)
            return

        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.AdamOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))

    @property
    def input_data(self):
        return self._input_data

    @property
    def targets(self):
        return self._targets

    @property
    def initial_state(self):
        return self._initial_state

    @property
    def cost(self):
        return self._cost

    @property
    def final_state(self):
        return self._final_state

    @property
    def train_op(self):
        return self._train_op

def run_epoch(session, m, data, eval_op, state=None):
    """Runs the model on the given data."""
    x = data.reshape((1,1))
    prob, _state, _ = session.run([m._prob, m.final_state, eval_op],
                         {m.input_data: x,
                          m.initial_state: state})
    return prob, _state

def main(_):
    with tf.Graph().as_default(), tf.Session(config=config_tf) as session:
        config = cPickle.load(open(model_path+'.fig', 'r'))
        config.batch_size = 1
        config.num_steps = 1

        initializer = tf.random_uniform_initializer(-config.init_scale,
                                                config.init_scale)
        with tf.variable_scope("model", reuse=None, initializer=initializer):
            mtest = Model(is_training=False, config=config)

        #tf.global_variables_initializer().run()

        model_saver = tf.train.Saver()
        print 'model loading ...'
        model_saver.restore(session, model_path+'-%d'%save_time)
        print 'Done!'

        if not is_beams:
            # sentence state
            char_list = list(start_sentence);

            start_idx = char_to_idx[char_list[0]]
            _state = mtest.initial_state.eval()
            test_data = np.int32([start_idx])
            prob, _state = run_epoch(session, mtest, test_data, tf.no_op(), _state)
            gen_res = [char_list[0]]

            for i in xrange(1, len(char_list)):
                char = char_list[i]
                try:
                    char_index = char_to_idx[char]
                except KeyError:
                    top_indices = np.argsort(-y1)
                    char_index = top_indices[0]
                prob, _state = run_epoch(session, mtest, np.int32([char_index]), tf.no_op(), _state)
                gen_res.append(char)

            if is_sample:
                gen = np.random.choice(config.vocab_size, 1, p=prob.reshape(-1))
                gen = gen[0]
            else:
                gen = np.argmax(prob.reshape(-1))
            test_data = np.int32(gen)
            gen_res.append(idx_to_char[gen])
            for i in range(len_of_generation-1):
                prob, _state = run_epoch(session, mtest, test_data, tf.no_op(), _state)
                if is_sample:
                    gen = np.random.choice(config.vocab_size, 1, p=prob.reshape(-1))
                    gen = gen[0]
                else:
                    gen = np.argmax(prob.reshape(-1))
                test_data = np.int32(gen)
                gen_res.append(idx_to_char[gen])
            print 'Generated Result: ',''.join(gen_res)
        else:

            # sentence state
            char_list = list(start_sentence);

            start_idx = char_to_idx[char_list[0]]
            _state = mtest.initial_state.eval()
            beams = [(0.0, [idx_to_char[start_idx]], idx_to_char[start_idx])]
            test_data = np.int32([start_idx])
            prob, _state = run_epoch(session, mtest, test_data, tf.no_op(), _state)
            y1 = np.log(1e-20 + prob.reshape(-1))
            beams = [(beams[0][0], beams[0][1], beams[0][2], _state)]

            for i in xrange(1, len(char_list)):
                char = char_list[i]
                try:
                    char_index = char_to_idx[char]
                except KeyError:
                    top_indices = np.argsort(-y1)
                    char_index = top_indices[0]
                prob, _state = run_epoch(session, mtest, np.int32([char_index]), tf.no_op(), beams[0][3])
                y1 = np.log(1e-20 + prob.reshape(-1))
                beams = [(beams[0][0], beams[0][1] + [char], char_index, _state)]

            if is_sample:
                top_indices = np.random.choice(config.vocab_size, beam_size, replace=False, p=prob.reshape(-1))
            else:
                top_indices = np.argsort(-y1)
            b = beams[0]
            beam_candidates = []
            for i in xrange(beam_size):
                wordix = top_indices[i]
                beam_candidates.append((b[0] + y1[wordix], b[1] + [idx_to_char[wordix]], wordix, _state))
            beam_candidates.sort(key = lambda x:x[0], reverse = True) # decreasing order
            beams = beam_candidates[:beam_size] # truncate to get new beams
            for xy in range(len_of_generation-1):
                beam_candidates = []
                for b in beams:
                    test_data = np.int32(b[2])
                    prob, _state = run_epoch(session, mtest, test_data, tf.no_op(), b[3])
                    y1 = np.log(1e-20 + prob.reshape(-1))
                    if is_sample:
                        top_indices = np.random.choice(config.vocab_size, beam_size, replace=False, p=prob.reshape(-1))
                    else:
                        top_indices = np.argsort(-y1)
                    for i in xrange(beam_size):
                        wordix = top_indices[i]
                        beam_candidates.append((b[0] + y1[wordix], b[1] + [idx_to_char[wordix]], wordix, _state))
                beam_candidates.sort(key = lambda x:x[0], reverse = True) # decreasing order
                beams = beam_candidates[:beam_size] # truncate to get new beams

            print 'Generated Result: ',''.join(beams[0][1])

if __name__ == "__main__":
    tf.app.run()
hit-computer commented 7 years ago

您好。非常感谢您对generation.py这部分功能的扩展,我看了一下您的代码并且实际运行测试了一下,没有发现问题。那就麻烦您pull request给我吧,再次感谢您的贡献,谢谢:)

fukuball commented 7 years ago

@hit-computer 好的,我晚一點整理好發 pull request,尚有其他事情忙,應該會需要等我 1 ~ 3 天。

hit-computer commented 7 years ago

@fukuball 没事儿,不着急的。 谢谢啦 :)

fukuball commented 7 years ago

@hit-computer 結果剛剛有空先做好了,已發 pull request,再請你看看囉,這個 issue 就可以關閉了,謝謝~

hit-computer commented 7 years ago

@fukuball 收到啦,感谢咯~