Open shink opened 2 months ago
pip install torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
上游适配:python 脚本新增 --device
参数,shell 脚本通过环境变量 export BACKEND=npu
指定设备
上游是否修改:是
运行:BACKEND_DEVICE=npu ./run_python_examples.sh
运行所有例子
现状:少许 example 运行失败
上游适配:无
上游是否修改:否
运行:ACCELERATOR=npu python test.py --verbose
运行所有模型的训练测试过程
现状:报错 npu 未注册 -> 测试用例是在子进程中执行,继承了主进程的环境变量,而在主进程中 torch_npu 导入后将环境变量设置为了关闭
运行:
apt install libsndfile1
pip install -r examples/pytorch/_tests_requirements.txt
pytest examples/pytorch/test_pytorch_examples.py -v
from transformers.testing_util import torch_device 自动识别设备
ImportError: libGL.so.1: cannot open shared object file: No such file or directory
test_models.py::test_model_backward[2-swinv2_cr_large_224] Fatal Python error: Segmentation fault
Thread 0x0000fff99f87f120 (most recent call first):
<no Python frame>
Thread 0x0000fffe99d3f120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffe9c54f120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffe9ed5f120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffea156f120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffe9752f120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffe94d1f120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffe9250f120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffe8fcff120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 312 in wait
File "/usr/local/python3.9/lib/python3.9/multiprocessing/queues.py", line 231 in _feed
File "/usr/local/python3.9/lib/python3.9/threading.py", line 917 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffe579bf120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/multiprocessing/connection.py", line 379 in _recv
File "/usr/local/python3.9/lib/python3.9/multiprocessing/connection.py", line 414 in _recv_bytes
File "/usr/local/python3.9/lib/python3.9/multiprocessing/connection.py", line 250 in recv
File "/usr/local/python3.9/lib/python3.9/multiprocessing/managers.py", line 810 in _callmethod
File "<string>", line 2 in get
File "/usr/local/Ascend/ascend-toolkit/latest/python/site-packages/tbe/common/repository_manager/utils/multiprocess_util.py", line 91 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Thread 0x0000fffdf02bf120 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/threading.py", line 316 in wait
File "/usr/local/python3.9/lib/python3.9/threading.py", line 581 in wait
File "/usr/local/python3.9/lib/python3.9/site-packages/tqdm/_monitor.py", line 60 in run
File "/usr/local/python3.9/lib/python3.9/threading.py", line 980 in _bootstrap_inner
File "/usr/local/python3.9/lib/python3.9/threading.py", line 937 in _bootstrap
Current thread 0x0000ffffb6ca8640 (most recent call first):
File "/usr/local/python3.9/lib/python3.9/site-packages/torch/_tensor_str.py", line 146 in __init__
File "/usr/local/python3.9/lib/python3.9/site-packages/torch/_tensor_str.py", line 357 in _tensor_str
File "/usr/local/python3.9/lib/python3.9/site-packages/torch/_tensor_str.py", line 625 in _str_intern
File "/usr/local/python3.9/lib/python3.9/site-packages/torch/_tensor_str.py", line 708 in _str
File "/usr/local/python3.9/lib/python3.9/site-packages/torch/_tensor.py", line 464 in __repr__
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_io/saferepr.py", line 73 in repr_instance
File "/usr/local/python3.9/lib/python3.9/reprlib.py", line 62 in repr1
File "/usr/local/python3.9/lib/python3.9/reprlib.py", line 52 in repr
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_io/saferepr.py", line 61 in repr
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_io/saferepr.py", line 112 in saferepr
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_code/code.py", line 831 in repr_args
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_code/code.py", line 927 in repr_traceback_entry
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_code/code.py", line 982 in <listcomp>
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_code/code.py", line 981 in repr_traceback
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_code/code.py", line 1057 in repr_excinfo
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/_code/code.py", line 698 in getrepr
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/nodes.py", line 497 in _repr_failure_py
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/python.py", line 1877 in repr_failure
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/reports.py", line 364 in from_item_and_call
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/runner.py", line 372 in pytest_runtest_makereport
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_callers.py", line 103 in _multicall
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_manager.py", line 120 in _hookexec
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_hooks.py", line 513 in __call__
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/runner.py", line 228 in call_and_report
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/runner.py", line 133 in runtestprotocol
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/runner.py", line 114 in pytest_runtest_protocol
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_callers.py", line 103 in _multicall
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_manager.py", line 120 in _hookexec
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_hooks.py", line 513 in __call__
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/main.py", line 351 in pytest_runtestloop
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_callers.py", line 103 in _multicall
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_manager.py", line 120 in _hookexec
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_hooks.py", line 513 in __call__
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/main.py", line 326 in _main
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/main.py", line 272 in wrap_session
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/main.py", line 319 in pytest_cmdline_main
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_callers.py", line 103 in _multicall
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_manager.py", line 120 in _hookexec
File "/usr/local/python3.9/lib/python3.9/site-packages/pluggy/_hooks.py", line 513 in __call__
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/config/__init__.py", line 174 in main
File "/usr/local/python3.9/lib/python3.9/site-packages/_pytest/config/__init__.py", line 197 in console_main
File "/usr/local/python3.9/bin/pytest", line 8 in <module>
Segmentation fault (core dumped)
Resource
TODO
--device
arg: https://github.com/cosdt/pytorch-examples/issues/1