jangjusung / jusung-python

0 stars 0 forks source link

RNN #35

Open jangjusung opened 2 years ago

jangjusung commented 2 years ago

import numpy as np from tensorflow.keras.preprocessing.text import Tokenizer

text = """A barber is a person. a barber is good person. a barber is huge person. he Knew A Secret! The Secret He Kept is huge secret. Huge secret. His barber kept his word. a barber kept his word. His barber kept his secret. But keeping and keeping such a huge secret to himself was driving the barber crazy. the barber went up a huge mountain.""" text

sentences = [['barber', 'person'], ['barber', 'good', 'person'], ['barber', 'huge', 'person'], ['knew', 'secret'], ['secret', 'kept', 'huge', 'secret'], ['huge', 'secret'], ['barber', 'kept', 'word'], ['barber', 'kept', 'word'], ['barber', 'kept', 'secret'], ['keeping', 'keeping', 'huge', 'secret', 'driving', 'barber', 'crazy'], ['barber', 'went', 'huge', 'mountain']] sentences

tokenizer = Tokenizer() tokenizer.fit_on_texts(sentences) encoded = tokenizer.texts_to_sequences(sentences) print(encoded)

max_len = max(len(item) for item in encoded) print(max_len)

for item in encoded: # 각 문장에 대해서 while len(item) < max_len: # max_len보다 작으면 item.append(0)

padded_np = np.array(encoded) padded_np