simplysameer333 / MachineLearning

1 stars 1 forks source link

check #11

Open simplysameer333 opened 5 years ago

simplysameer333 commented 5 years ago

https://github.com/tensorflow/docs/blob/master/site/en/r2/tutorials/sequences/text_classification_rnn.ipynb

!pip install tensorflow-gpu==2.0.0-alpha0

!pip install tensorflow_datasets

import tensorflow_datasets as tfds import tensorflow as tf import matplotlib.pyplot as plt

BUFFER_SIZE = 10000 BATCH_SIZE = 64

def plotgraphs(history, string): plt.plot(history.history[string]) plt.plot(history.history['val'+string]) plt.xlabel("Epochs") plt.ylabel(string) plt.legend([string, 'val_'+string]) plt.show()

def pad_to_size(vec, size): zeros = [0] * (size - len(vec)) vec.extend(zeros) return vec

def sample_predict(sentence, pad): tokenized_sample_pred_text = tokenizer.encode(sample_pred_text)

if pad:
    tokenized_sample_pred_text = pad_to_size(tokenized_sample_pred_text, 64)

predictions = model.predict(tf.expand_dims(tokenized_sample_pred_text, 0))

return (predictions)

dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True, as_supervised=True) train_dataset, test_dataset = dataset['train'], dataset['test'] tokenizer = info.features['text'].encoder print ('Vocabulary size: {}'.format(tokenizer.vocab_size))

train_dataset = train_dataset.shuffle(BUFFER_SIZE) train_dataset = train_dataset.padded_batch(BATCH_SIZE, train_dataset.output_shapes) test_dataset = test_dataset.padded_batch(BATCH_SIZE, test_dataset.output_shapes)

Create the model

model = tf.keras.Sequential([ tf.keras.layers.Embedding(tokenizer.vocab_size, 64), tf.keras.layers.Bidirectional(tf.keras.layers.LSTM( 64, return_sequences=True)), tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(train_dataset, epochs=10, validation_data=test_dataset)

test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss)) print('Test Accuracy: {}'.format(test_acc))

predict on a sample text without padding.

sample_pred_text = ('The movie was not good. The animation and the graphics ' 'were terrible. I would not recommend this movie.') predictions = sample_predict(sample_pred_text, pad=False) print (predictions)

plot_graphs(history, 'accuracy') plot_graphs(history, 'loss')

simplysameer333 commented 5 years ago

http://androidkt.com/tensorflow-text-classification-attention-mechanism/ https://www.tensorflow.org/guide/premade_estimators https://www.kaggle.com/pierrek20/multiclass-iris-prediction-with-tensorflow-keras

simplysameer333 commented 5 years ago

import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler

import numpy as np import pandas as pd import pandas as pd import numpy as np import scipy as sp import sklearn import random import time

from sklearn import preprocessing, model_selection

from keras.models import Sequential from keras.layers import Dense from keras.utils import np_utils from sklearn.preprocessing import LabelEncoder from keras.utils.np_utils import to_categorical from sklearn.utils import shuffle

data = pd.read_csv('../input/Iris.csv') data = data.drop(['Id'], axis =1)

data = shuffle(data)

i = 8 data_to_predict = data[:i].reset_index(drop = True) predict_species = data_to_predict.Species predict_species = np.array(predict_species) prediction = np.array(data_to_predict.drop(['Species'],axis= 1))

data = data[i:].reset_index(drop = True)

X = data.drop(['Species'], axis = 1) X = np.array(X) Y = data['Species']

Transform name species into numerical values

encoder = LabelEncoder() encoder.fit(Y) Y = encoder.transform(Y) Y = np_utils.to_categorical(Y)

print(Y)

We have 3 classes : the output looks like :

0,0,1 : Class 1

0,1,0 : Class 2

1,0,0 : Class 3

train_x, test_x, train_y, test_y = model_selection.train_test_split(X,Y,test_size = 0.1, random_state = 0)

input_dim = len(data.columns) - 1

model = Sequential() model.add(Dense(8, input_dim = input_dim , activation = 'relu')) model.add(Dense(10, activation = 'relu')) model.add(Dense(10, activation = 'relu')) model.add(Dense(10, activation = 'relu')) model.add(Dense(3, activation = 'softmax'))

model.compile(loss = 'categorical_crossentropy' , optimizer = 'adam' , metrics = ['accuracy'] )

model.fit(train_x, train_y, epochs = 10, batch_size = 2)

scores = model.evaluate(test_x, test_y) print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

predictions = model.predictclasses(prediction) prediction = np.argmax(tocategorical(predictions), axis = 1) prediction = encoder.inversetransform(prediction)

for i, j in zip(prediction_ , predict_species): print( " the nn predict {}, and the species to find is {}".format(i,j))