(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 10, in
from prepare_data import PrepareData
File "F:\code\transformer-simple\prepare_data.py", line 4, in
ModuleNotFoundError: No module named 'nltk'
(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 33, in load_data
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 8: illegal multibyte sequence
(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 33, in load_data
for line in f:
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 8: illegal multibyte sequence
(venv) PS F:\code\transformer-simple> python run.py --type evaluate
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 33, in load_data
for line in f:
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 8: illegal multibyte sequence
(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last): python run.py --type evaluate
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 36, in load_data
en.append(["BOS"] + word_tokenize(line[0].lower()) + ["EOS"])
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 129, in word_tokenize
sentences = [text] if preserve_line else sent_tokenize(text, language)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 106, in sent_tokenize
tokenizer = load(f"tokenizers/punkt/{language}.pickle")
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 750, in load
opened_resource = _open(resource_url)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 876, in open
return find(path, path + [""]).open()
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 583, in find
raise LookupError(resource_not_found)
LookupError:
Resource punkt not found.
Please use the NLTK Downloader to obtain the resource:
(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 36, in load_data
en.append(["BOS"] + word_tokenize(line[0].lower()) + ["EOS"])
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 129, in word_tokenize
sentences = [text] if preserve_line else sent_tokenize(text, language)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 106, in sent_tokenize
tokenizer = load(f"tokenizers/punkt/{language}.pickle")
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 750, in load
opened_resource = _open(resource_url)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 876, in open
return find(path, path + [""]).open()
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 583, in find
raise LookupError(resource_not_found)
LookupError:
Resource punkt not found.
Please use the NLTK Downloader to obtain the resource:
(venv) PS F:\code\transformer-simple> ^C
(venv) PS F:\code\transformer-simple> python run.py
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\Administrator\AppData\Roaming\nltk_data...
File "F:\code\transformer-simple\run.py", line 10, in
from prepare_data import PrepareData
File "F:\code\transformer-simple\prepare_data.py", line 4, in
from nltk1 import word_tokenize
File "F:\code\transformer-simple\nltk1.py", line 2, in
nltk.download('punkt')
File "C:\Users\Administrator\venv\lib\site-packages\nltk\downloader.py", line 777, in download
for msg in self.incr_download(info_or_id, download_dir, force):
File "C:\Users\Administrator\venv\lib\site-packages\nltk\downloader.py", line 642, in incr_download
File "C:\Users\Administrator\venv\lib\site-packages\nltk\downloader.py", line 695, in _download_package
os.remove(filepath)
PermissionError: [WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'C:\Users\Administrator\AppData\Roaming\nltk_data\tokenizers\punkt.zip'
(venv) PS F:\code\transformer-simple> python run.py
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data] Package punkt is already up-to-date!
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 10, in
from prepare_data import PrepareData
File "F:\code\transformer-simple\prepare_data.py", line 4, in
from nltk1 import word_tokenize
ImportError: cannot import name 'word_tokenize' from 'nltk1' (F:\code\transformer-simple\nltk1.py)
(venv) PS F:\code\transformer-simple> python run.py
src_vocab 6309
tgt_vocab 3439
start train
C:\Users\Administrator\venv\lib\site-packages\torch\nn_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
warnings.warn(warning.format(ret))
Epoch 0 Batch: 0 Loss: 8.133185 Tokens per Sec: 2.177150s
Epoch 0 Batch: 50 Loss: 7.429173 Tokens per Sec: 2.355751s
Epoch 0 Batch: 100 Loss: 6.456788 Tokens per Sec: 2.339739s
Epoch 0 Batch: 150 Loss: 5.560229 Tokens per Sec: 2.274967s
Epoch 0 Batch: 200 Loss: 5.631516 Tokens per Sec: 2.376741s
Epoch 0 Batch: 250 Loss: 4.571174 Tokens per Sec: 2.368741s
Epoch 0 Batch: 300 Loss: 5.864964 Tokens per Sec: 2.376392s
Evaluate
Epoch 0 Batch: 0 Loss: 5.005608 Tokens per Sec: 2.080838s
<<<<< Evaluate loss: 4.575314
Epoch 1 Batch: 0 Loss: 4.769076 Tokens per Sec: 2.195853s
Epoch 1 Batch: 50 Loss: 4.747526 Tokens per Sec: 2.317183s
Epoch 1 Batch: 100 Loss: 3.432811 Tokens per Sec: 2.387290s
Epoch 1 Batch: 150 Loss: 3.867074 Tokens per Sec: 2.317557s
Epoch 1 Batch: 200 Loss: 4.345443 Tokens per Sec: 2.395591s
Epoch 1 Batch: 250 Loss: 3.182194 Tokens per Sec: 2.358538s
Epoch 1 Batch: 300 Loss: 4.922453 Tokens per Sec: 2.379280s
Evaluate
Epoch 1 Batch: 0 Loss: 3.726320 Tokens per Sec: 2.088690s
<<<<< Evaluate loss: 3.187353
Epoch 2 Batch: 0 Loss: 3.615676 Tokens per Sec: 2.236484s
Epoch 2 Batch: 50 Loss: 3.564056 Tokens per Sec: 2.385402s
Epoch 2 Batch: 100 Loss: 2.468265 Tokens per Sec: 2.350994s
Epoch 2 Batch: 150 Loss: 2.947180 Tokens per Sec: 2.212929s
Epoch 2 Batch: 200 Loss: 3.374241 Tokens per Sec: 2.410238s
Epoch 2 Batch: 250 Loss: 2.233203 Tokens per Sec: 2.373651s
Epoch 2 Batch: 300 Loss: 4.186131 Tokens per Sec: 2.395608s
Evaluate
Epoch 2 Batch: 0 Loss: 2.723930 Tokens per Sec: 2.075086s
<<<<< Evaluate loss: 2.302255
Epoch 3 Batch: 0 Loss: 2.822041 Tokens per Sec: 2.262297s
Epoch 3 Batch: 50 Loss: 2.664301 Tokens per Sec: 2.366780s
Epoch 3 Batch: 100 Loss: 1.729961 Tokens per Sec: 2.302921s
Epoch 3 Batch: 150 Loss: 2.305430 Tokens per Sec: 2.360787s
Epoch 3 Batch: 200 Loss: 2.605517 Tokens per Sec: 2.181611s
Epoch 3 Batch: 250 Loss: 1.637028 Tokens per Sec: 2.385572s
Epoch 3 Batch: 300 Loss: 3.590002 Tokens per Sec: 2.402328s
return container(name_or_buffer)
File "C:\Users\Administrator\venv\lib\site-packages\torch\serialization.py", line 463, in init
super().init(torch._C.PyTorchFileWriter(self.name))
RuntimeError: Parent directory save does not exist.
(venv) PS F:\code\transformer-simple>
我自己部署了一下,挺好部署的。pycharm 修一下就行Windows PowerShell 尝试新的跨平台 PowerShell https://aka.ms/pscore6
(venv) PS F:\code\transformer-simple> !pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl numpy matplotlib spacy torchtex t seaborn 21 !pip : 无法将“!pip”项识别为 cmdlet、函数、脚本文件或可运行程序的名称。请检查名称的拼写,如果包括路径,请确保路径正确,然后再试一次。
(venv) PS F:\code\transformer-simple> pip install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl numpy matplotlib spacy torchtext seaborn ERROR: torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl is not a supported wheel on this platform.
[notice] A new release of pip available: 22.3.1 -> 23.3.1 [notice] To update, run: python.exe -m pip install --upgrade pip (venv) PS F:\code\transformer-simple> python3 run.py python3 : 无法将“python3”项识别为 cmdlet、函数、脚本文件或可运行程序的名称。请检查名称的拼写,如果包括路径,请确保路径正确,然后再试一次。
(venv) PS F:\code\transformer-simple> python run.py Traceback (most recent call last): File "F:\code\transformer-simple\run.py", line 10, in
from prepare_data import PrepareData
File "F:\code\transformer-simple\prepare_data.py", line 4, in
ModuleNotFoundError: No module named 'nltk'
(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 33, in load_data
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 8: illegal multibyte sequence
(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 33, in load_data
for line in f:
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 8: illegal multibyte sequence
(venv) PS F:\code\transformer-simple> python run.py --type evaluate
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 33, in load_data
for line in f:
UnicodeDecodeError: 'gbk' codec can't decode byte 0x80 in position 8: illegal multibyte sequence
(venv) PS F:\code\transformer-simple> python run.py
Traceback (most recent call last): python run.py --type evaluate
File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 36, in load_data
en.append(["BOS"] + word_tokenize(line[0].lower()) + ["EOS"])
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 129, in word_tokenize
sentences = [text] if preserve_line else sent_tokenize(text, language)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 106, in sent_tokenize
tokenizer = load(f"tokenizers/punkt/{language}.pickle")
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 750, in load
opened_resource = _open(resource_url)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 876, in open
return find(path, path + [""]).open()
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 583, in find
raise LookupError(resource_not_found)
LookupError:
Resource punkt not found. Please use the NLTK Downloader to obtain the resource:
For more information see: https://www.nltk.org/data.html
Searched in:
(venv) PS F:\code\transformer-simple> python run.py Traceback (most recent call last): File "F:\code\transformer-simple\run.py", line 87, in
main()
File "F:\code\transformer-simple\run.py", line 45, in main
data = PrepareData()
File "F:\code\transformer-simple\prepare_data.py", line 14, in init
self.train_en, self.train_cn = self.load_data(args.train_file)
File "F:\code\transformer-simple\prepare_data.py", line 36, in load_data
en.append(["BOS"] + word_tokenize(line[0].lower()) + ["EOS"])
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 129, in word_tokenize
sentences = [text] if preserve_line else sent_tokenize(text, language)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\tokenize__init__.py", line 106, in sent_tokenize
tokenizer = load(f"tokenizers/punkt/{language}.pickle")
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 750, in load
opened_resource = _open(resource_url)
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 876, in open
return find(path, path + [""]).open()
File "C:\Users\Administrator\venv\lib\site-packages\nltk\data.py", line 583, in find
raise LookupError(resource_not_found)
LookupError:
Resource punkt not found. Please use the NLTK Downloader to obtain the resource:
For more information see: https://www.nltk.org/data.html
Attempted to load tokenizers/punkt/english.pickle Searched in:
(venv) PS F:\code\transformer-simple> ^C (venv) PS F:\code\transformer-simple> python run.py [nltk_data] Downloading package punkt to [nltk_data] C:\Users\Administrator\AppData\Roaming\nltk_data... File "F:\code\transformer-simple\run.py", line 10, in
from prepare_data import PrepareData
File "F:\code\transformer-simple\prepare_data.py", line 4, in
from nltk1 import word_tokenize
File "F:\code\transformer-simple\nltk1.py", line 2, in
nltk.download('punkt')
File "C:\Users\Administrator\venv\lib\site-packages\nltk\downloader.py", line 777, in download
for msg in self.incr_download(info_or_id, download_dir, force):
File "C:\Users\Administrator\venv\lib\site-packages\nltk\downloader.py", line 642, in incr_download
File "C:\Users\Administrator\venv\lib\site-packages\nltk\downloader.py", line 695, in _download_package
os.remove(filepath)
PermissionError: [WinError 32] 另一个程序正在使用此文件,进程无法访问。: 'C:\Users\Administrator\AppData\Roaming\nltk_data\tokenizers\punkt.zip'
(venv) PS F:\code\transformer-simple> python run.py
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\Administrator\AppData\Roaming\nltk_data...
[nltk_data] Package punkt is already up-to-date!
Traceback (most recent call last):
File "F:\code\transformer-simple\run.py", line 10, in
from prepare_data import PrepareData
File "F:\code\transformer-simple\prepare_data.py", line 4, in
from nltk1 import word_tokenize
ImportError: cannot import name 'word_tokenize' from 'nltk1' (F:\code\transformer-simple\nltk1.py)
(venv) PS F:\code\transformer-simple> python run.py
src_vocab 6309
tgt_vocab 3439