pytorch / pytorch

Tensors and Dynamic neural networks in Python with strong GPU acceleration
https://pytorch.org
Other
83.88k stars 22.61k forks source link

Build without MKL is not possible when MKL is installed #32407

Open eLvErDe opened 4 years ago

eLvErDe commented 4 years ago

Hello,

I was planning to make some benchmark with different BLAS implementation on CPU so I built several Pytorch package with BLAS=Eigen/OpenBLAS/ATLAS/MKL. Sadly something unexpected occurred: All version are linked to libmkl!

Here is an example with the Eigen flavor:

python3 -c 'import torch; print(torch.__config__.show())'

PyTorch built with:
  - GCC 8.3
  - Intel(R) Math Kernel Library Version 2019.0.2 Product Build 20190118 for Intel(R) 64 architecture applications
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - Build settings: BLAS=Eigen, BUILD_NAMEDTENSOR=OFF, BUILD_TYPE=Release, CXX_FLAGS=-g -O2 -fdebug-prefix-map=/home/acecile/packaging/pytorch/pytorch-1.3.1+debian=. -fstack-protector-strong -Wformat -Werror=format-security -DOMPI_SKIP_MPICXX=1 -I/usr/include/mkl -Wno-deprecated -fvisibility-inlines-hidden -fopenmp -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -O2 -fPIC -Wno-narrowing -Wall -Wextra -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Wno-stringop-overflow, FORCE_FALLBACK_CUDA_MPI=1, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=False, USE_EIGEN_FOR_BLAS=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKLDNN=OFF, USE_MPI=ON, USE_NCCL=OFF, USE_NNPACK=1, USE_OPENMP=ON, USE_STATIC_DISPATCH=OFF, 
ldd /usr/lib/python3/dist-packages/torch/lib/libtorch.so | grep -i mkl

    libmkl_intel_lp64.so => /usr/lib/x86_64-linux-gnu/libmkl_intel_lp64.so (0x00007f43c8dea000)
    libmkl_gnu_thread.so => /usr/lib/x86_64-linux-gnu/libmkl_gnu_thread.so (0x00007f43c7596000)
    libmkl_core.so => /usr/lib/x86_64-linux-gnu/libmkl_core.so (0x00007f43c340a000)

I look at all environment variable and found none to explicitely disable the use of MKL. Any help would be appreciated !

Regards, Adam.

ssnl commented 4 years ago

Hi, MKL could be linked for nonBLAS/LAPACK functions, e.g., FFT. Could you try the env variable USE_MKL=0?

eLvErDe commented 4 years ago

Hi,

I assume it could exists but it did nothing. If you grep the source code, you will see it does not exist

ssnl commented 4 years ago

A couple of USE_* and NO_* flags are handled programmatically. It could work. Although given that nowhere in codebase uses that, you should test and see.

eLvErDe commented 4 years ago

Hello,

Sorry I closed this issue without meaning it... I built it with USE_MKL=0 and NO_MKL=1 but it's not helping (btw, I see no reference for them inside the code so this is not a surprise).

As a workaround, I run the following before building:

find /path/to/src/ -name FindMKL.cmake -print -exec truncate -s 0 {} \;

Which helped getting rid of MKL dependency. My analysis is that you have thierd parties dependencies (e.g: Eigen) using their own FindMKL.cmake file and detecting MKL no matter what you ask while building PyTorch.

I understand this is not a top priority feature, but still, it is not expected.

Best regards, Adam.

ssnl commented 4 years ago

reopened due to the comment above

HapeMask commented 4 years ago

I'm running into this issue as well, but I think the problem is more general. It appears that even without MKL installed, if you specify BLAS=Eigen, cmake will still try to find another BLAS library instead of eigen.

I believe the offending line(s) are in cmake/Dependencies.cmake and I have a simple diff I used on my machine to give what seems to be desired behavior:

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 1b3917c..d9f8a61 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -113,6 +113,7 @@ message(STATUS "Trying to find preferred BLAS backend of choice: " ${BLAS})
 if(BLAS STREQUAL "Eigen")
   # Eigen is header-only and we do not have any dependent libraries
   set(CAFFE2_USE_EIGEN_FOR_BLAS ON)
+  find_package(Eigen3 REQUIRED)
 elseif(BLAS STREQUAL "ATLAS")
   find_package(Atlas REQUIRED)
   include_directories(SYSTEM ${ATLAS_INCLUDE_DIRS})
@@ -156,7 +157,7 @@ if (NOT INTERN_BUILD_MOBILE)
   set(AT_MKL_ENABLED 0)
   set(AT_MKL_MT 0)
   set(USE_BLAS 1)
-  if(NOT (ATLAS_FOUND OR OpenBLAS_FOUND OR MKL_FOUND OR VECLIB_FOUND))
+  if(NOT (EIGEN3_FOUND OR ATLAS_FOUND OR OpenBLAS_FOUND OR MKL_FOUND OR VECLIB_FOUND))
     message(WARNING "Preferred BLAS (" ${BLAS} ") cannot be found, now searching for a general BLAS library")
     find_package(BLAS)
     if (NOT BLAS_FOUND)
rddesmond commented 4 years ago

Good find, @HapeMask! I came to the same conclusion just as I found your diff. Thank you!

rddesmond commented 4 years ago

It looks like the diff from @HapeMask isn't quite right. This legacy of the merger between pytorch and caffe2. caffe2 can use Eigen as a blas (and uses the special CAFFE2_USE_EIGEN_FOR_BLAS variable to signal it). pytorch cannot use Eigen as a blas, but it can be disabled with USE_BLAS=0. The logic above leaves USE_BLAS=1 with Eigen, which breaks linking for libpytorch.so.

This does break what currently works -- using Eigen for Caffe2 and another blas for libpytorch. I don't know why you'd want that though..

if(Eigen_FOUND)
  set(USE_BLAS 0) // This variable is for PyTorch, which cannot use Eigen.  Caffe2 will use CAFFE2_USE_EIGEN_FOR_BLAS
elseif(ATLAS_FOUND OR OpenBLAS_FOUND OR MKL_FOUND OR VECLIB_FOUND OR GENERIC_BLAS_FOUND)
  set(USE_BLAS 1)
else()
  message(WARNING "Preferred BLAS (" ${BLAS} ") cannot be found, now searching for a general BLAS library")
  find_package(BLAS)
  if(NOT BLAS_FOUND)
    set(USE_BLAS 0)
    set(BLAS "" CACHE STRING "Selected BLAS library")
  else()
    set(USE_BLAS 1)
    set(BLAS BLAS_INFO CACHE STRING "Selected BLAS library")
  endif()
endif()

https://github.com/pytorch/pytorch/issues/8561 might make this OBE by cleaning up the logic.

rddesmond commented 4 years ago

I found that this isn't quite enough, because FIND_PACKAGE(LAPACK)'s first thing is to look for a blas library (if one hasn't already been selected) and see if that has lapack. Since Eigan hasn't gone through the normal route, it will set USE_LAPACK=1 if it finds a usable BLAS, even if it isn't linked in. My full diff against 1.4.1:

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 1b3917c..efe9c5d 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -112,6 +112,7 @@ message(STATUS "Trying to find preferred BLAS backend of choice: " ${BLAS})

 if(BLAS STREQUAL "Eigen")
   # Eigen is header-only and we do not have any dependent libraries
+  find_package(Eigen3 REQUIRED)
   set(CAFFE2_USE_EIGEN_FOR_BLAS ON)
 elseif(BLAS STREQUAL "ATLAS")
   find_package(Atlas REQUIRED)
@@ -155,15 +156,20 @@ endif()
 if (NOT INTERN_BUILD_MOBILE)
   set(AT_MKL_ENABLED 0)
   set(AT_MKL_MT 0)
-  set(USE_BLAS 1)
-  if(NOT (ATLAS_FOUND OR OpenBLAS_FOUND OR MKL_FOUND OR VECLIB_FOUND))
+  if(CAFFE2_USE_EIGEN_FOR_BLAS)
+    message(WARNING "Using EIGEN for blas for Caffe2, disabling blas in libpytorch")
+    set(USE_BLAS 0)
+  elseif(ATLAS_FOUND OR OpenBLAS_FOUND OR MKL_FOUND OR VECLIB_FOUND)
+    set(USE_BLAS 1)
+  else()
     message(WARNING "Preferred BLAS (" ${BLAS} ") cannot be found, now searching for a general BLAS library")
     find_package(BLAS)
     if (NOT BLAS_FOUND)
       set(USE_BLAS 0)
       set(BLAS "" CACHE STRING "Selected BLAS library")
     else()
-      set(BLAS BLAS_INFO CACHE STRING "Selected BLAS library")
+      set(USE_BLAS 1)
+      set(BLAS ${BLAS_INFO} CACHE STRING "Selected BLAS library")
     endif()
   endif()

@@ -1359,9 +1291,11 @@ if (NOT INTERN_BUILD_MOBILE)
       CACHE BOOL "Copy the required BLAS DLLs into the TH install dirs")
   ENDIF()

-  FIND_PACKAGE(LAPACK)
-  IF (LAPACK_FOUND)
-    SET(USE_LAPACK 1)
+  IF (NOT CAFFE2_USE_EIGEN_FOR_BLAS)
+    FIND_PACKAGE(LAPACK)
+    IF (LAPACK_FOUND)
+      SET(USE_LAPACK 1)
+    ENDIF()
   ENDIF()

   if (NOT USE_CUDA)
rmast commented 6 months ago

Your selection of an eventual BLAS is forcefully overwritten by these lines of code "if(NOT INTERN_BUILD_MOBILE)" : https://github.com/pytorch/pytorch/blob/34910f87f0a6bf0a2611ba24b1dba0dd6d501263/cmake/Dependencies.cmake#L184

I've been working on a change to this file to respect the preselected version of BLAS and will PR that soon.