File "/projects/MonikaDesu-Rebecca/src/data_utils.py", line 137, in prepare_custom_data
data_to_token_ids(train_enc, enc_train_ids_path, enc_vocab_path, tokenizer)
File "/projects/MonikaDesu-Rebecca/src/data_utils.py", line 112, in data_to_token_ids
vocab, _ = initialize_vocabulary(vocabulary_path)
File "/projects/MonikaDesu-Rebecca/src/data_utils.py", line 87, in initialize_vocabulary
rev_vocab.extend(f.readlines())
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/lib/io/file_io.py", line 131, in readlines
s = self.readline()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/lib/io/file_io.py", line 124, in readline
return compat.as_str_any(self._read_buf.ReadLineAsString())
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/compat.py", line 106, in as_str_any
return as_str(value)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/compat.py", line 84, in as_text
return bytes_or_text.decode(encoding)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 1: invalid start byte
Backtrace :