Open xlHuang0719 opened 3 years ago
sorry, that's a simple problem in nlp. I have figured it out.
sorry, that's a simple problem in nlp. I have figured it out.
hello,how do you solve this problem?
sorry, that's a simple problem in nlp. I have figured it out.
hello,how do you solve this problem?
Hi, I constructed the vocab file by the following python script.
import os import tensorlayer as tl import nltk import json nltk.download('punkt')
TOKENS = {"
def main(src_path, mid_path, dest_path):
try:
with open(src_path, 'r', encoding='UTF-8') as f:
txt = f.readlines()
except Exception as e:
print("Could not open")
processed_capts = []
for tran in txt:
vid_idx = tran.find(' ')
sentences = tran[vid_idx+1:]
c = tl.nlp.process_sentence(sentences, start_word=TOKENS['
if name == "main": text_path = '/workspace/how2' src_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.txt') mid_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.vocab_original.txt') dest_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.vocab.txt') main(src_path, mid_path, dest_path)
sorry, that's a simple problem in nlp. I have figured it out.
hello,how do you solve this problem?
Hi, I constructed the vocab file by the following python script.
import os import tensorlayer as tl import nltk import json nltk.download('punkt')
TOKENS = {"": 0, "": 1, "": 2, "": 3}
def main(src_path, mid_path, dest_path): try: with open(src_path, 'r', encoding='UTF-8') as f: txt = f.readlines() except Exception as e: print("Could not open") processed_capts = [] for tran in txt: vid_idx = tran.find(' ') sentences = tran[vid_idx+1:] c = tl.nlp.process_sentence(sentences, start_word=TOKENS[''], end_word=TOKENS['']) processed_capts.append(c) tl.nlp.create_vocab(processed_capts, word_counts_output_file=mid_path, min_word_count=10) dict_save = {} word_index = 0 for tok, idx in TOKENS.items(): dict_save[tok] = str(word_index)+' 0' word_index += 1 with open(mid_path, 'r', encoding='UTF-8') as f: vocabs = f.readlines() for idx,word in enumerate(vocabs): if word.split()[0] not in dict_save.keys(): dict_save[word.split()[0]] = str(word_index)+' '+word.split()[1] word_index += 1 with open(dest_path, 'w') as f: json.dump(dict_save, f)
if name == "main": text_path = '/workspace/how2' src_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.txt') mid_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.vocab_original.txt') dest_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.vocab.txt') main(src_path, mid_path, dest_path)
Thank you very much I want to know if you encounter many bugs when using nmtpytorch. I run it on torch 1.8.0, but there are always various problems
sorry, that's a simple problem in nlp. I have figured it out.
hello,how do you solve this problem?
Hi, I constructed the vocab file by the following python script. import os import tensorlayer as tl import nltk import json nltk.download('punkt') TOKENS = {"": 0, "": 1, "": 2, "": 3} def main(src_path, mid_path, dest_path): try: with open(src_path, 'r', encoding='UTF-8') as f: txt = f.readlines() except Exception as e: print("Could not open") processed_capts = [] for tran in txt: vid_idx = tran.find(' ') sentences = tran[vid_idx+1:] c = tl.nlp.process_sentence(sentences, start_word=TOKENS[''], end_word=TOKENS['']) processed_capts.append(c) tl.nlp.create_vocab(processed_capts, word_counts_output_file=mid_path, min_word_count=10) dict_save = {} word_index = 0 for tok, idx in TOKENS.items(): dict_save[tok] = str(word_index)+' 0' word_index += 1 with open(mid_path, 'r', encoding='UTF-8') as f: vocabs = f.readlines() for idx,word in enumerate(vocabs): if word.split()[0] not in dict_save.keys(): dict_save[word.split()[0]] = str(word_index)+' '+word.split()[1] word_index += 1 with open(dest_path, 'w') as f: json.dump(dict_save, f) if name == "main": text_path = '/workspace/how2' src_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.txt') mid_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.vocab_original.txt') dest_path = os.path.join(text_path, 'text_300/sum_train_300/tran.tok.vocab.txt') main(src_path, mid_path, dest_path)
Thank you very much I want to know if you encounter many bugs when using nmtpytorch. I run it on torch 1.8.0, but there are always various problems
maybe you can try to degrade you pytorch version to 1.6 or lower
Hi, when I run the project, I don't know how to create the vocab file as specified in the conf file: [vocabulary] tran: ${data:root}/text_300/sum_train_300/tran.tok.vocab.txt desc: ${data:root}/text_300/sum_train_300/tran.tok.vocab.txt
I want to know how to create the tran.tok.vocab.txt file, appreciate your reply.