somvirs57 / text_generation_tensorflow

This contains the dataset and the python file for text generation using tensorflow.
4 stars 2 forks source link

Not showing the output. (my code is here) #1

Open madihamymon opened 3 years ago

madihamymon commented 3 years ago

my code is: from google.colab import drive drive.mount('/content/drive/')

import tensorflow as tf import pandas as pd import numpy as np

with open('/content/drive/My Drive/file.txt') as story: story_data = story.read() print(story_data)

import re
def clean_text(text): text = re.sub(r',', '', text) text = re.sub(r'\'', '', text) text = re.sub(r'\"', '', text) text = re.sub(r'(', '', text) text = re.sub(r')', '', text) text = re.sub(r'\n', '', text) text = re.sub(r'“', '', text) text = re.sub(r'”', '', text) text = re.sub(r'’', '', text) text = re.sub(r'.', '', text) text = re.sub(r';', '', text) text = re.sub(r':', '', text) text = re.sub(r'-', '', text) return text

lower_data = story_data.lower() # Converting the string to lower case to get uniformity split_data = lower_data.splitlines() print(split_data)

final = '' # initiating a argument with blank string to hold the values of final cleaned data for line in split_data: line = clean_text(line) final += '\n' + line print(final)

final_data = final.split('\n') # splitting again to get list of cleaned and splitted data ready to be processed print(final_data)

from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences

max_vocab = 1000000 tokenizer = Tokenizer(num_words=max_vocab) tokenizer.fit_on_texts(final_data)

word2idx = tokenizer.word_index print(len(word2idx)) print(word2idx) vocab_size = len(word2idx) + 1 # Adding 1 to the vocab_size because the index starts from 1 not 0. This will make it uniform when using it further print(vocab_size)

input_seq = [] for line in final_data: token_list = tokenizer.texts_to_sequences([line])[0] for i in range(1, len(token_list)): n_gram_seq = token_list[:i+1] input_seq.append(n_gram_seq) print(input_seq)

max_seq_length = max(len(x) for x in input_seq) print(max_seq_length) input_seq = np.array(pad_sequences(input_seq, maxlen=max_seq_length, padding='pre')) print(input_seq)

xs = input_seq[:, :-1] # xs contains every word in sentence except the last one because we are using this value to predict the y value labels = input_seq[:, -1] # labels contains only the last word of the sentence which will help in hot encoding the y value in next step print("xs: ",xs) print("labels:",labels)

from tensorflow.keras.utils import to_categorical ys = to_categorical(labels, num_classes=vocab_size) print(ys)

from tensorflow.keras.layers import Input, Dense, Embedding, LSTM, Dropout, Bidirectional, GlobalMaxPooling1D from tensorflow.keras.models import Model from tensorflow.keras.optimizers import Adam from tensorflow.keras.models import Sequential

i = Input(shape=(max_seq_length - 1, )) # using 1 less value becasuse we are preserving the last value for predicted word x = Embedding(vocab_size, 124)(i) x = Dropout(0.2)(x) x = LSTM(520, return_sequences=True)(x) x = Bidirectional(layer=LSTM(340, return_sequences=True))(x) x = GlobalMaxPooling1D()(x) x = Dense(1024, activation='relu')(x) x = Dense(vocab_size, activation='softmax')(x) model = Model(i,x)

model.compile(optimizer=Adam(lr=0.001), loss = 'categorical_crossentropy', metrics=['accuracy']) r = model.fit(xs,ys,epochs=100)

import matplotlib.pyplot as plt plt.plot(r.history['accuracy'])

def predict_words(seed, no_words): for i in range(no_words): token_list = tokenizer.texts_to_sequences([seed])[0] token_list = pad_sequences([token_list], maxlen=max_seq_length-1, padding='pre') predicted = np.argmax(model.predict(token_list), axis=1) new_word = '' for word, index in tokenizer.word_index.items(): if predicted == index: new_word = word break seed += " " + new_word print(seed)

seed_text = 'i am feeling good today' next_words = 10 predict_words(seed_text, next_words)

model.save('poem_generator.h5')

somvirs57 commented 3 years ago

Hi @madihamymon, Can you also put the error trace to check where the error is orginating... the code look fine..