Open ZXXSG opened 2 years ago
I have encountered the same problem as you. May I ask how you solved it?
def build_vocab(self, path, data):
print("Creating vocabulary...")
vocab = {}
for pair in data:
for token in pair['sentence']:
if token in vocab:
vocab[token] += 1
else:
vocab[token] = 1
vocab_list = sorted(vocab, key=vocab.get, reverse=True)
if len(vocab_list) > self.FLAGS.voc_size:
vocab_list = vocab_list[:self.FLAGS.voc_size]
vocab_list.append('<unk>')
print("Loading word vectors...")
vectors = {}
word_dim = None # 用于存储词向量的维度
with open('%s' % path) as f:
for line in f:
s = line.strip()
word = s[:s.find(' ')]
vector = s[s.find(' ')+1:].split()
if word_dim is None:
word_dim = len(vector)
vectors[word] = np.array(vector, dtype=np.float32)
embed = []
num_not_found, num_found = 0, 0
for word in vocab_list:
if word in vectors:
vector = vectors[word]
num_found += 1
else:
num_not_found += 1
vector = np.zeros((word_dim,), dtype=np.float32)
embed.append(vector)
print('%s words found in vocab' % num_found)
print('%s words not found in vocab' % num_not_found)
embed = np.array(embed, dtype=np.float32)
return vocab_list, embed, vocab
when i run the code, I meet the following problem. I hope that you can help me. Thanks. (pytorch_gpu) G:\RNN-Capsule>python main.py --rnn_type LSTM --name_model 'LSTM-Capsule' --data_dir ./data-rt-acl --bidirectional Tr ue Namespace(batch_size=64, bidirectional=True, cell_dropout=0.5, data_dir='./data-rt-acl', embed_dropout=0.3, final_dropout=0.5, hidd en_dim=256, iter_num=8192, learning_rate=0.001, lr_word_vector=0.0001, max_length=64, n_label=2, n_layer=2, name_model="'LSTM-Capsu le'", optim_type='Adam', per_checkpoint=32, rnn_type='LSTM', seed=1705216, voc_size=32768, weight_decay=0, word_dim=300, wordvec_na me='glove.840B.300d.txt') Creating vocabulary... Loading word vectors... 16525 words found in vocab 2654 words not found in vocab Traceback (most recent call last): File "main.py", line 85, in
vocab, embed, vocab_dict = datamanager.build_vocab('%s/%s' % (FLAGS.data_dir, FLAGS.wordvec_name), data['train'])
File "G:\RNN-Capsule\datamanager.py", line 59, in build_vocab
embed = np.array(embed, dtype=np.float32)
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (19179,) + inhomogeneous part.