Open weixianggoh opened 7 years ago
problably tensorflow version. put this in the terminal: "pip list | grep tensorflow", what does it say?
@DanielLSM The result is this
(tensorflow) bot@bot-Aspire-V5-471PG:~/tensorflow$ pip list | grep tensorflow
DEPRECATION: The default format will switch to columns in the future. You can use --format=(legacy|columns) (or define a format=(legacy|columns) in your pip.conf under the [list] section) to disable this warning.
tensorflow (1.3.0)
tensorflow-gpu (1.3.0)
tensorflow-tensorboard (0.1.6)
you have the newest version of tensorflow, but the code is from an older tensorflow version (1.0 or 1.1 I believe). You have to options, either uninstall tensorflow and install an older version, or look at https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq where the method was put in another sub-module.
try to change the tf.nn.seq2seq to "tf.contrib.legacy_seq2seq" in model.py and you also have to change the sampled_loss function. try the code below
def sampled_loss(labels, logits):
labels = tf.reshape(labels, [-1, 1])
return tf.nn.sampled_softmax_loss(tf.transpose(w), b, labels, logits,
config.NUM_SAMPLES, config.DEC_VOCAB)
I made these changes for Tensorflow1.3. Cheers!
@DanielLSM I just want to make sure that how much estimated time It would take on training? I have started training 12 hours ago and now it is on "Iter 10500: loss 3.0672782254219055, time 3.1186535358428955". How much max iterations it will have?
Thanks!
`""" A neural chatbot using sequence to sequence model with attentional decoder.
This is based on Google Translate Tensorflow model https://github.com/tensorflow/models/blob/master/tutorials/rnn/translate/
Sequence to sequence model by Cho et al.(2014)
Created by Chip Huyen as the starter code for assignment 3, class CS 20SI: "TensorFlow for Deep Learning Research" cs20si.stanford.edu
This file contains the code to do the pre-processing for the Cornell Movie-Dialogs Corpus.
See readme.md for instruction on how to run the starter code. """ from future import print_function
import os import random import re
import numpy as np
import config
def get_lines(): id2line = {} file_path = os.path.join(config.DATA_PATH, config.LINE_FILE) with open(file_path, 'r') as f: lines = f.readlines() for line in lines: parts = line.split(' +++$+++ ') if len(parts) == 5: if parts[4][-1] == '\n': parts[4] = parts[4][:-1] id2line[parts[0]] = parts[4] return id2line
def get_convos(): """ Get conversations from the raw data """ file_path = os.path.join(config.DATA_PATH, config.CONVO_FILE) convos = [] with open(file_path, 'r') as f: for line in f.readlines(): parts = line.split(' +++$+++ ') if len(parts) == 4: convo = [] for line in parts[3][1:-2].split(', '): convo.append(line[1:-1]) convos.append(convo)
return convos
def question_answers(id2line, convos): """ Divide the dataset into two sets: questions and answers. """ questions, answers = [], [] for convo in convos: for index, line in enumerate(convo[:-1]): questions.append(id2line[convo[index]]) answers.append(id2line[convo[index + 1]]) assert len(questions) == len(answers) return questions, answers
def prepare_dataset(questions, answers):
make_dir(config.PROCESSED_PATH)
# random convos to create the test set
test_ids = random.sample([i for i in range(len(questions))],config.TESTSET_SIZE)
filenames = ['train.enc', 'train.dec', 'test.enc', 'test.dec']
files = []
for filename in filenames:
files.append(open(os.path.join(config.PROCESSED_PATH, filename),'w'))
for i in range(len(questions)):
if i in test_ids:
files[2].write(questions[i] + '\n')
files[3].write(answers[i] + '\n')
else:
files[0].write(questions[i] + '\n')
files[1].write(answers[i] + '\n')
for file in files:
file.close()
def make_dir(path): """ Create a directory if there isn't one already. """ try: os.mkdir(path) except OSError: pass
def basic_tokenizer(line, normalize_digits=True): """ A basic tokenizer to tokenize text into tokens. Feel free to change this to suit your need. """ line = re.sub('', '', line) line = re.sub('', '', line) line = re.sub('[', '', line) line = re.sub(']', '', line) words = [] _WORD_SPLIT = re.compile(r"([.,!?\"'-<>:;)(])") _DIGIT_RE = re.compile(r"\d") for fragment in line.strip().lower().split(): for token in re.split(_WORD_SPLIT, fragment): if not token: continue if normalize_digits: token = re.sub(_DIGIT_RE, r'#', token) words.append(token) return words
def build_vocab(filename, normalize_digits=True): in_path = os.path.join(config.PROCESSED_PATH, filename) out_path = os.path.join(config.PROCESSED_PATH, 'vocab.{}'.format(filename[-3:]))
vocab = {}
with open(in_path, 'r') as f:
for line in f.readlines():
for token in basic_tokenizer(line):
if not token in vocab:
vocab[token] = 0
vocab[token] += 1
sorted_vocab = sorted(vocab, key=vocab.get, reverse=True)
with open(out_path, 'w') as f:
f.write('<pad>' + '\n')
f.write('<unk>' + '\n')
f.write('<s>' + '\n')
f.write('<\s>' + '\n')
index = 4
for word in sorted_vocab:
if vocab[word] < config.THRESHOLD:
with open('config.py', 'a') as cf:
if filename[-3:] == 'enc':
cf.write('ENC_VOCAB = ' + str(index) + '\n')
else:
cf.write('DEC_VOCAB = ' + str(index) + '\n')
break
f.write(word + '\n')
index += 1
def load_vocab(vocab_path): with open(vocab_path, 'r') as f: words = f.read().splitlines() return words, {words[i]: i for i in range(len(words))}
def sentence2id(vocab, line):
return [vocab.get(token, vocab['
def token2id(data, mode): """ Convert all the tokens in the data into their corresponding index in the vocabulary. """ vocab_path = 'vocab.' + mode in_path = data + '.' + mode out_path = data + '_ids.' + mode
_, vocab = load_vocab(os.path.join(config.PROCESSED_PATH, vocab_path))
in_file = open(os.path.join(config.PROCESSED_PATH, in_path), 'r')
out_file = open(os.path.join(config.PROCESSED_PATH, out_path), 'w')
lines = in_file.read().splitlines()
for line in lines:
if mode == 'dec': # we only care about '<s>' and </s> in encoder
ids = [vocab['<s>']]
else:
ids = []
ids.extend(sentence2id(vocab, line))
# ids.extend([vocab.get(token, vocab['<unk>']) for token in basic_tokenizer(line)])
if mode == 'dec':
ids.append(vocab['<\s>'])
out_file.write(' '.join(str(id_) for id_ in ids) + '\n')
def prepare_raw_data(): print('Preparing raw data into train set and test set ...') id2line = get_lines() convos = get_convos() questions, answers = question_answers(id2line, convos) prepare_dataset(questions, answers)
def process_data(): print('Preparing data to be model-ready ...') build_vocab('train.enc') build_vocab('train.dec') token2id('train', 'enc') token2id('train', 'dec') token2id('test', 'enc') token2id('test', 'dec')
def load_data(enc_filename, dec_filename, max_training_size=None): encode_file = open(os.path.join(config.PROCESSED_PATH, enc_filename), 'rb') decode_file = open(os.path.join(config.PROCESSED_PATH, dec_filename), 'rb') encode, decode = encode_file.readline(), decode_file.readline() databuckets = [[] for in config.BUCKETS] i = 0 while encode and decode: if (i + 1) % 10000 == 0: print("Bucketing conversation number", i) encodeids = [int(id) for id_ in encode.split()] decodeids = [int(id) for id_ in decode.split()] for bucket_id, (encode_max_size, decode_max_size) in enumerate(config.BUCKETS): if len(encode_ids) <= encode_max_size and len(decode_ids) <= decode_max_size: data_buckets[bucket_id].append([encode_ids, decode_ids]) break encode, decode = encode_file.readline(), decode_file.readline() i += 1 return data_buckets
def _padinput(input, size): return input_ + [config.PADID] * (size - len(input))
def _reshape_batch(inputs, size, batch_size): """ Create batch-major inputs. Batch inputs are just re-indexed inputs """ batch_inputs = [] for length_id in range(size): batch_inputs.append(np.array([inputs[batch_id][length_id] for batch_id in range(batch_size)], dtype=np.int32)) return batch_inputs
def get_batch(data_bucket, bucket_id, batch_size=1): """ Return one batch to feed into the model """
encoder_size, decoder_size = config.BUCKETS[bucket_id]
encoder_inputs, decoder_inputs = [], []
for _ in range(batch_size):
encoder_input, decoder_input = random.choice(data_bucket)
# pad both encoder and decoder, reverse the encoder
encoder_inputs.append(list(reversed(_pad_input(encoder_input, encoder_size))))
decoder_inputs.append(_pad_input(decoder_input, decoder_size))
# now we create batch-major vectors from the data selected above.
batch_encoder_inputs = _reshape_batch(encoder_inputs, encoder_size, batch_size)
batch_decoder_inputs = _reshape_batch(decoder_inputs, decoder_size, batch_size)
# create decoder_masks to be 0 for decoders that are padding.
batch_masks = []
for length_id in range(decoder_size):
batch_mask = np.ones(batch_size, dtype=np.float32)
for batch_id in range(batch_size):
# we set mask to 0 if the corresponding target is a PAD symbol.
# the corresponding decoder is decoder_input shifted by 1 forward.
if length_id < decoder_size - 1:
target = decoder_inputs[batch_id][length_id + 1]
if length_id == decoder_size - 1 or target == config.PAD_ID:
batch_mask[batch_id] = 0.0
batch_masks.append(batch_mask)
return batch_encoder_inputs, batch_decoder_inputs, batch_masks
if name == 'main': prepare_raw_data() process_data()`
i updated data.py to point it working ... still 100% not know is it correctly do what is needed. same there is my update to model.py. training is running but as it is low as hell and on chat output still nothing recognizable ....
training i bet will run as long until ya stop it or it reach highest score. if i get it right there is number in config for it ( THRESHOLD = 2 or maybe LR = 0.5 till researching on this fish )
model.py still need to test
`""" A neural chatbot using sequence to sequence model with attentional decoder.
This is based on Google Translate Tensorflow model https://github.com/tensorflow/models/blob/master/tutorials/rnn/translate/
Sequence to sequence model by Cho et al.(2014)
Created by Chip Huyen as the starter code for assignment 3, class CS 20SI: "TensorFlow for Deep Learning Research" cs20si.stanford.edu
This file contains the code to build the model
See readme.md for instruction on how to run the starter code. """ from future import print_function
import time
import numpy as np import tensorflow as tf
import config
class ChatBotModel(object): def init(self, forward_only, batch_size): """forward_only: if set, we do not construct the backward pass in the model. """ print('Initialize new model') self.fw_only = forward_only self.batch_size = batch_size
def _create_placeholders(self):
# Feeds for inputs. It's a list of placeholders
print('Create placeholders')
self.encoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='encoder{}'.format(i))
for i in range(config.BUCKETS[-1][0])]
self.decoder_inputs = [tf.placeholder(tf.int32, shape=[None], name='decoder{}'.format(i))
for i in range(config.BUCKETS[-1][1] + 1)]
self.decoder_masks = [tf.placeholder(tf.float32, shape=[None], name='mask{}'.format(i))
for i in range(config.BUCKETS[-1][1] + 1)]
# Our targets are decoder inputs shifted by one (to ignore <s> symbol)
self.targets = self.decoder_inputs[1:]
def _inference(self):
print('Create inference')
# If we use sampled softmax, we need an output projection.
# Sampled softmax only makes sense if we sample less than vocabulary size.
if config.NUM_SAMPLES > 0 and config.NUM_SAMPLES < config.DEC_VOCAB:
w = tf.get_variable('proj_w', [config.HIDDEN_SIZE, config.DEC_VOCAB])
b = tf.get_variable('proj_b', [config.DEC_VOCAB])
self.output_projection = (w, b)
def sampled_loss(labels, logits):
labels = tf.reshape(labels, [-1, 1])
return tf.nn.sampled_softmax_loss(tf.transpose(w), b, labels, logits,
config.NUM_SAMPLES, config.DEC_VOCAB)
self.softmax_loss_function = sampled_loss
single_cell = tf.nn.rnn_cell.GRUCell(config.HIDDEN_SIZE)
self.cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * config.NUM_LAYERS)
def _create_loss(self):
print('Creating loss... \nIt might take a couple of minutes depending on how many buckets you have.')
start = time.time()
def _seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
return tf.contrib.legacy_seq2seq.embedding_attention_seq2seq(
encoder_inputs, decoder_inputs, self.cell,
num_encoder_symbols=config.ENC_VOCAB,
num_decoder_symbols=config.DEC_VOCAB,
embedding_size=config.HIDDEN_SIZE,
output_projection=self.output_projection,
feed_previous=do_decode)
if self.fw_only:
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs,
self.decoder_inputs,
self.targets,
self.decoder_masks,
config.BUCKETS,
lambda x, y: _seq2seq_f(x, y, True),
softmax_loss_function=self.softmax_loss_function)
# If we use output projection, we need to project outputs for decoding.
if self.output_projection:
for bucket in range(len(config.BUCKETS)):
self.outputs[bucket] = [tf.matmul(output,
self.output_projection[0]) + self.output_projection[1]
for output in self.outputs[bucket]]
else:
self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
self.encoder_inputs,
self.decoder_inputs,
self.targets,
self.decoder_masks,
config.BUCKETS,
lambda x, y: _seq2seq_f(x, y, False),
softmax_loss_function=self.softmax_loss_function)
print('Time:', time.time() - start)
def _creat_optimizer(self):
print('Create optimizer... \nIt might take a couple of minutes depending on how many buckets you have.')
with tf.variable_scope('training') as scope:
self.global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
if not self.fw_only:
self.optimizer = tf.train.GradientDescentOptimizer(config.LR)
trainables = tf.trainable_variables()
self.gradient_norms = []
self.train_ops = []
start = time.time()
for bucket in range(len(config.BUCKETS)):
clipped_grads, norm = tf.clip_by_global_norm(tf.gradients(self.losses[bucket],
trainables),
config.MAX_GRAD_NORM)
self.gradient_norms.append(norm)
self.train_ops.append(self.optimizer.apply_gradients(zip(clipped_grads, trainables),
global_step=self.global_step))
print('Creating opt for bucket {} took {} seconds'.format(bucket, time.time() - start))
start = time.time()
def _create_summary(self):
pass
def build_graph(self):
self._create_placeholders()
self._inference()
self._create_loss()
self._creat_optimizer()
self._create_summary()
`
after 24 h of learning with score aprox 2.2-2.3 on testdriwe it give me crash ... im done :D
Hello everybody. I am testing this code and I am unable to find python3chatbot.py file for interactive mode and start conversation with chatbot. Can anybody help please . Thank.
use args --mode test
when i run model.py i found this error output_projection=self.output_projection, AttributeError: 'ChatBotModel' object has no attribute 'output_projection'
Hi everyone, I am new to tensorflow, can anyone guide me in solving this error? Is it because of old version of tensorflow?
Thank you so much for spending your time. I really appreciate your help