import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
text = """A barber is a person.
a barber is good person.
a barber is huge person.
he Knew A Secret!
The Secret He Kept is huge secret.
Huge secret. His barber kept his word.
a barber kept his word. His barber kept his secret.
But keeping and keeping such a huge secret to himself was driving the barber crazy.
the barber went up a huge mountain."""
text
import numpy as np from tensorflow.keras.preprocessing.text import Tokenizer
text = """A barber is a person. a barber is good person. a barber is huge person. he Knew A Secret! The Secret He Kept is huge secret. Huge secret. His barber kept his word. a barber kept his word. His barber kept his secret. But keeping and keeping such a huge secret to himself was driving the barber crazy. the barber went up a huge mountain.""" text
sentences = [['barber', 'person'], ['barber', 'good', 'person'], ['barber', 'huge', 'person'], ['knew', 'secret'], ['secret', 'kept', 'huge', 'secret'], ['huge', 'secret'], ['barber', 'kept', 'word'], ['barber', 'kept', 'word'], ['barber', 'kept', 'secret'], ['keeping', 'keeping', 'huge', 'secret', 'driving', 'barber', 'crazy'], ['barber', 'went', 'huge', 'mountain']] sentences
tokenizer = Tokenizer() tokenizer.fit_on_texts(sentences) encoded = tokenizer.texts_to_sequences(sentences) print(encoded)
max_len = max(len(item) for item in encoded) print(max_len)
for item in encoded: # 각 문장에 대해서 while len(item) < max_len: # max_len보다 작으면 item.append(0)
padded_np = np.array(encoded) padded_np