pytorch / TensorRT

PyTorch/TorchScript/FX compiler for NVIDIA GPUs using TensorRT
https://pytorch.org/TensorRT
BSD 3-Clause "New" or "Revised" License
2.43k stars 337 forks source link

Failed to install in Jetson #942

Closed januarkai closed 2 years ago

januarkai commented 2 years ago

I try to install Torch-TensorRT and I faced this installation error. /home/nvidia/Torch-TensorRT/cpp/bin/torchtrtc/BUILD:10:10: no such package '@libtorch//': The repository '@libtorch' could not be resolved and referenced by '//cpp/bin/torchtrtc:torchtrtc' I am using Jetpack 4.5 bazel build //:libtorchtrt --platforms //toolchains:jetpack_4.5

narendasan commented 2 years ago

What does your workspace file look like?

januarkai commented 2 years ago

I have solved the previous error. But now I faced new error: Selection_001 Should I start new open issue for this? Here my WORKSPACE looklike: `workspace(name = "Torch-TensorRT")

load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")

http_archive( name = "rules_python", sha256 = "778197e26c5fbeb07ac2a2c5ae405b30f6cb7ad1f5510ea6fdac03bded96cc6f", url = "https://github.com/bazelbuild/rules_python/releases/download/0.2.0/rules_python-0.2.0.tar.gz", )

load("@rules_python//python:pip.bzl", "pip_install")

http_archive( name = "rules_pkg", sha256 = "038f1caa773a7e35b3663865ffb003169c6a71dc995e39bf4815792f385d837d", urls = [ "https://mirror.bazel.build/github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz", "https://github.com/bazelbuild/rules_pkg/releases/download/0.4.0/rules_pkg-0.4.0.tar.gz", ], )

load("@rules_pkg//:deps.bzl", "rules_pkg_dependencies")

rules_pkg_dependencies()

git_repository( name = "googletest", commit = "703bd9caab50b139428cea1aaff9974ebee5742e", remote = "https://github.com/google/googletest", shallow_since = "1570114335 -0400", )

External dependency for torch_tensorrt if you already have precompiled binaries.

local_repository( name = "torch_tensorrt", path = "/opt/conda/lib/python3.8/site-packages/torch_tensorrt" )

CUDA should be installed on the system locally

new_local_repository( name = "cuda", build_file = "@//third_party/cuda:BUILD", path = "/usr/local/cuda-10.2/", )

new_local_repository( name = "cublas", build_file = "@//third_party/cublas:BUILD", path = "/usr", ) #############################################################################################################

Tarballs and fetched dependencies (default - use in cases when building from precompiled bin and tarballs)

#############################################################################################################

http_archive(

name = "libtorch",

build_file = "@//third_party/libtorch:BUILD",

sha256 = "190e963e739d5f7c2dcf94b3994de8fcd335706a4ebb333812ea7d8c841beb06",

strip_prefix = "libtorch",

urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-cxx11-abi-shared-with-deps-1.10.0%2Bcu113.zip"],

)

http_archive(

name = "libtorch_pre_cxx11_abi",

build_file = "@//third_party/libtorch:BUILD",

sha256 = "0996a6a4ea8bbc1137b4fb0476eeca25b5efd8ed38955218dec1b73929090053",

strip_prefix = "libtorch",

urls = ["https://download.pytorch.org/libtorch/cu113/libtorch-shared-with-deps-1.10.0%2Bcu113.zip"],

)

Download these tarballs manually from the NVIDIA website

Either place them in the distdir directory in third_party and use the --distdir flag

or modify the urls to "file:////.tar.gz

http_archive(

name = "cudnn",

build_file = "@//third_party/cudnn/archive:BUILD",

sha256 = "0e5d2df890b9967efa6619da421310d97323565a79f05a1a8cb9b7165baad0d7",

strip_prefix = "cuda",

urls = [

"https://developer.nvidia.com/compute/machine-learning/cudnn/secure/8.2.4/11.4_20210831/cudnn-11.4-linux-x64-v8.2.4.15.tgz",

],

)

http_archive(

name = "tensorrt",

build_file = "@//third_party/tensorrt/archive:BUILD",

sha256 = "da130296ac6636437ff8465812eb55dbab0621747d82dc4fe9b9376f00d214af",

strip_prefix = "TensorRT-8.2.2.1",

urls = [

"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.2.1/tars/tensorrt-8.2.2.1.linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz",

],

)

####################################################################################

Locally installed dependencies (use in cases of custom dependencies or aarch64)

####################################################################################

NOTE: In the case you are using just the pre-cxx11-abi path or just the cxx11 abi path

with your local libtorch, just point deps at the same path to satisfy bazel.

NOTE: NVIDIA's aarch64 PyTorch (python) wheel file uses the CXX11 ABI unlike PyTorch's standard

x86_64 python distribution. If using NVIDIA's version just point to the root of the package

for both versions here and do not use --config=pre-cxx11-abi

new_local_repository( name = "libtorch", path = "/home/nvidia/.local/lib/python3.6/site-packages/torch", build_file = "third_party/libtorch/BUILD" )

new_local_repository( name = "libtorch_pre_cxx11_abi", path = "/home/nvidia/.local/lib/python3.6/site-packages/torch", build_file = "third_party/libtorch/BUILD" )

new_local_repository( name = "cudnn", path = "/usr/", build_file = "@//third_party/cudnn/local:BUILD" )

new_local_repository( name = "tensorrt", path = "/usr/", build_file = "@//third_party/tensorrt/local:BUILD" )

##########################################################################

Testing Dependencies (optional - comment out on aarch64)

##########################################################################

pip_install(

name = "torch_tensorrt_py_deps",

requirements = "//py:requirements.txt",

)

pip_install(

name = "py_test_deps",

requirements = "//tests/py:requirements.txt",

)

pip_install( name = "pylinter_deps", requirements = "//tools/linter:requirements.txt", )`

narendasan commented 2 years ago

This bug is okay. The issue is there are some breaking changes for jetpack 4.5 (specifically we use apis introduced in TRT 8.2) in preparation for our next release. I would say try checking out the 1.0 tag if you just need a working version. If you need master I can provide more information on how to backport.

januarkai commented 2 years ago

I have tried all tags, and only v0.3.0 successfully installed C++ library. But I faced problem I tried to install Python API. It shows this kind of error: trtorch/csrc/tensorrt_backend.h:13:15: error: ‘c10::IValue trtorch::backend::TensorRTBackend::preprocess(c10::IValue, c10::impl::GenericDict)’ marked ‘override’, but does not override c10::IValue preprocess(c10::IValue mod, c10::impl::GenericDict method_compile_spec) override; ^~~~~~~~~~ In file included from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/c10/core/StorageImpl.h:6:0, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/c10/core/Storage.h:3, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/ATen/core/TensorBody.h:12, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/ATen/Tensor.h:3, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/ATen/Context.h:4, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/ATen/ATen.h:9, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/torch/csrc/jit/ir/attributes.h:2, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/torch/csrc/jit/ir/ir.h:3, from /home/nvidia/.local/lib/python3.6/site-packages/torch/include/torch/csrc/jit/passes/lower_graph.h:3, from trtorch/csrc/tensorrt_backend.cpp:1: So is it better for me to upgrade Jetpack to 4.6?. I am using AGX Xavier. I want to do inference on Jetson from my model. My model have been converted to TensorRT using latest Torch-tensorRT Docker container in my Computer. But when I run inference using v0.3.0 in AGX Xavier I faced this error: terminate called after throwing an instance of 'c10::Error' what(): __setstate__() Expected a value of type 'str' for argument '_1' but instead found type 'List[str]'. Position: 1 Declaration: __setstate__(__torch__.torch.classes.tensorrt.Engine _0, str _1) -> (NoneType _0) Exception raised from checkArg at bazel-out/aarch64-opt/bin/external/libtorch/_virtual_includes/ATen/ATen/core/function_schema_inl.h:184 (most recent call first): frame #0: c10::Error::Error(c10::SourceLocation, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) + 0xa0 (0x7f81d02508 in /home/nvidia/.local/lib/python3.6/site-packages/torch/lib/libc10.so)

narendasan commented 2 years ago

Yes using latest Jetpack is preferable if possible

januarkai commented 2 years ago

Thank you @narendasan