jiayihu / gmail-smart-compose

A study implementation of Gmail Smart Compose trained with Keras and used in browser with Tensorflow.js
MIT License
26 stars 8 forks source link

Key error after training the mode #31

Open rahulkrprajapati opened 8 months ago

rahulkrprajapati commented 8 months ago
def tokenize_text(text):
  text = ' ' + text.lower() + ' '
  text_tensor = tokenizer.texts_to_sequences([text])
  text_tensor = keras.preprocessing.sequence.pad_sequences(text_tensor, maxlen=max_length_in, padding="post")
  return text_tensor

# Reversed map from a tokenizer index to a word
index_to_word = dict(map(reversed, tokenizer.word_index.items()))

# Given an input string, an encoder model (infenc_model) and a decoder model (infmodel),
def decode_sequence(input_tensor):
    # Encode the input as state vectors.
    state = encoder_model.predict(input_tensor)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['']
    curr_word = ""
    decoded_sentence = ''

    i = 0
    while curr_word != "" and i < (max_length_out - 1):
        output_tokens, h = inf_model.predict([target_seq, state])

        curr_token = np.argmax(output_tokens[0, 0])

        if (curr_token == 0):
          break;

        curr_word = index_to_word[curr_token]

        decoded_sentence += ' ' + curr_word
        target_seq[0, 0] = curr_token
        state = h
        i += 1

    return decoded_sentence

def tokens_to_seq(tokens):
  words = list(map(lambda token: index_to_word[token] if token != 0 else '', tokens))
  return ' '.join(words)

 texts = [
    'here is',
    'have a',
    'please review',
    'please call me',
    'thanks for',
    'let me',
    'Let me know',
    'Let me know if you',
    'this sounds',
    'is this call going to',
    'can you get',
    'is it okay',
    'it should',
    'call if there\'s',
    'gave her a',
    'i will let',
    'i will be',
    'may i get a copy of all the',
    'how is our trade',
    'this looks like a',
    'i am fine with the changes',
    'please be sure this'
]

output = list(map(lambda text: (text, decode_sequence(tokenize_text(text))), texts))
output_df = pd.DataFrame(output, columns=["input", "output"])
output_df.head(len(output))

Stack trace:


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In [37], line 26
      1 texts = [
      2     'here is',
      3     'have a',
   (...)
     23     'please be sure this'
     24 ]
---> 26 output = list(map(lambda text: (text, decode_sequence(tokenize_text(text))), texts))
     27 output_df = pd.DataFrame(output, columns=["input", "output"])
     28 output_df.head(len(output))

Cell In [37], line 26, in <lambda>(text)
      1 texts = [
      2     'here is',
      3     'have a',
   (...)
     23     'please be sure this'
     24 ]
---> 26 output = list(map(lambda text: (text, decode_sequence(tokenize_text(text))), texts))
     27 output_df = pd.DataFrame(output, columns=["input", "output"])
     28 output_df.head(len(output))

Cell In [35], line 16, in decode_sequence(input_tensor)
     13 state = encoder_model.predict(input_tensor)
     15 target_seq = np.zeros((1, 1))
---> 16 target_seq[0, 0] = tokenizer.word_index['']
     17 curr_word = ""
     18 decoded_sentence = ''

KeyError: ''