Key error after training the mode

def tokenize_text(text):
  text = ' ' + text.lower() + ' '
  text_tensor = tokenizer.texts_to_sequences([text])
  text_tensor = keras.preprocessing.sequence.pad_sequences(text_tensor, maxlen=max_length_in, padding="post")
  return text_tensor

# Reversed map from a tokenizer index to a word
index_to_word = dict(map(reversed, tokenizer.word_index.items()))

# Given an input string, an encoder model (infenc_model) and a decoder model (infmodel),
def decode_sequence(input_tensor):
    # Encode the input as state vectors.
    state = encoder_model.predict(input_tensor)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = tokenizer.word_index['']
    curr_word = ""
    decoded_sentence = ''

    i = 0
    while curr_word != "" and i < (max_length_out - 1):
        output_tokens, h = inf_model.predict([target_seq, state])

        curr_token = np.argmax(output_tokens[0, 0])

        if (curr_token == 0):
          break;

        curr_word = index_to_word[curr_token]

        decoded_sentence += ' ' + curr_word
        target_seq[0, 0] = curr_token
        state = h
        i += 1

    return decoded_sentence

def tokens_to_seq(tokens):
  words = list(map(lambda token: index_to_word[token] if token != 0 else '', tokens))
  return ' '.join(words)

 texts = [
    'here is',
    'have a',
    'please review',
    'please call me',
    'thanks for',
    'let me',
    'Let me know',
    'Let me know if you',
    'this sounds',
    'is this call going to',
    'can you get',
    'is it okay',
    'it should',
    'call if there\'s',
    'gave her a',
    'i will let',
    'i will be',
    'may i get a copy of all the',
    'how is our trade',
    'this looks like a',
    'i am fine with the changes',
    'please be sure this'
]

output = list(map(lambda text: (text, decode_sequence(tokenize_text(text))), texts))
output_df = pd.DataFrame(output, columns=["input", "output"])
output_df.head(len(output))

Stack trace:


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In [37], line 26
      1 texts = [
      2     'here is',
      3     'have a',
   (...)
     23     'please be sure this'
     24 ]
---> 26 output = list(map(lambda text: (text, decode_sequence(tokenize_text(text))), texts))
     27 output_df = pd.DataFrame(output, columns=["input", "output"])
     28 output_df.head(len(output))

Cell In [37], line 26, in <lambda>(text)
      1 texts = [
      2     'here is',
      3     'have a',
   (...)
     23     'please be sure this'
     24 ]
---> 26 output = list(map(lambda text: (text, decode_sequence(tokenize_text(text))), texts))
     27 output_df = pd.DataFrame(output, columns=["input", "output"])
     28 output_df.head(len(output))

Cell In [35], line 16, in decode_sequence(input_tensor)
     13 state = encoder_model.predict(input_tensor)
     15 target_seq = np.zeros((1, 1))
---> 16 target_seq[0, 0] = tokenizer.word_index['']
     17 curr_word = ""
     18 decoded_sentence = ''

KeyError: ''

jiayihu / gmail-smart-compose

Key error after training the mode #31