facebookarchive / caffe2

Caffe2 is a lightweight, modular, and scalable deep learning framework.
https://caffe2.ai
Apache License 2.0
8.42k stars 1.94k forks source link

My changes for Windows #1469

Open RailWar opened 7 years ago

RailWar commented 7 years ago

These are my changes for building on Windows (I have Windows 10 and Visaul studio 2015) Strange formating, it is only diff log. I don't know how attach the file

diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 3a7688c7..1469300e 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -218,6 +218,9 @@ if (BUILD_PYTHON)
     elseif (MSVC)
       set_target_properties(caffe2_pybind11_state_gpu PROPERTIES SUFFIX ".pyd")
     endif()
+    if (MSVC)
+      set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /FORCE:MULTIPLE")
+    endif()
     set_target_properties(
         caffe2_pybind11_state_gpu PROPERTIES LIBRARY_OUTPUT_DIRECTORY
         ${CMAKE_BINARY_DIR}/caffe2/python)
diff --git a/caffe2/binaries/CMakeLists.txt b/caffe2/binaries/CMakeLists.txt
index 3ec61dbd..b9170d92 100644
--- a/caffe2/binaries/CMakeLists.txt
+++ b/caffe2/binaries/CMakeLists.txt
@@ -1,3 +1,7 @@
+if(MSVC)
+  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /FORCE:MULTIPLE")
+ENDIF()
+
 caffe2_binary_target("convert_caffe_image_db.cc")
 caffe2_binary_target("convert_db.cc")
 caffe2_binary_target("db_throughput.cc")
diff --git a/caffe2/binaries/make_image_db.cc b/caffe2/binaries/make_image_db.cc
index 2bdbb53d..7e8a8a34 100644
--- a/caffe2/binaries/make_image_db.cc
+++ b/caffe2/binaries/make_image_db.cc
@@ -127,7 +127,7 @@ class Converter {

       // Add raw file contents to DB if !raw
       if (!caffe2::FLAGS_raw) {
-        std::ifstream image_file_stream(input_folder + pair.first);
+        std::ifstream image_file_stream(input_folder + pair.first, std::ios::in | std::ios::binary);
         if (!image_file_stream) {
           LOG(ERROR) << "Cannot open " << input_folder << pair.first
                      << ". Skipping.";
diff --git a/caffe2/core/blob_serialization.h b/caffe2/core/blob_serialization.h
index 66a1284d..48a36427 100644
--- a/caffe2/core/blob_serialization.h
+++ b/caffe2/core/blob_serialization.h
@@ -584,7 +584,7 @@ void TensorDeserializer<Context>::Deserialize(
       for (int i = 0; i < chunkSize; ++i) {
         temp_blob.Deserialize(proto.string_data(i));
         if (i == 0) {
-          raw_ptr = tensor->template raw_mutable_data(temp_blob.meta());
+          raw_ptr = tensor->raw_mutable_data(temp_blob.meta());
         }
         temp_blob.meta().copy()(
             temp_blob.GetRaw(),
diff --git a/caffe2/core/operator_schema.cc b/caffe2/core/operator_schema.cc
index f6723cb6..77e10caf 100644
--- a/caffe2/core/operator_schema.cc
+++ b/caffe2/core/operator_schema.cc
@@ -248,7 +248,7 @@ OpSchema& OpSchema::ScalarType(::caffe2::TensorProto_DataType dt) {
 }

 OpSchema& OpSchema::CostInferenceFunction(
-    CostInferenceFunctionType&& function) {
+    CostInferenceFunctionType function) {
   cost_inference_function_ =
       caffe2::make_unique<CostInferenceFunctionType>(function);
   return *this;
diff --git a/caffe2/core/operator_schema.h b/caffe2/core/operator_schema.h
index 29c2ca0b..78796e3c 100644
--- a/caffe2/core/operator_schema.h
+++ b/caffe2/core/operator_schema.h
@@ -199,7 +199,7 @@ class OpSchema {
   /**
    * @brief Register the Cost inference function.
    */
-  OpSchema& CostInferenceFunction(CostInferenceFunctionType&& function);
+  OpSchema& CostInferenceFunction(CostInferenceFunctionType function);
   bool HasCostInferenceFunction() const {
     return !!cost_inference_function_;
   }
diff --git a/caffe2/image/image_input_op.h b/caffe2/image/image_input_op.h
index ac295c68..a38aec1d 100644
--- a/caffe2/image/image_input_op.h
+++ b/caffe2/image/image_input_op.h
@@ -256,13 +256,13 @@ ImageInputOp<Context>::ImageInputOp(

   // hard-coded PCA eigenvectors and eigenvalues, based on RBG channel order
   color_lighting_eigvecs_.push_back(
-    std::vector<float>{-144.7125, 183.396, 102.2295});
+    std::vector<float>{-144.7125f, 183.396f, 102.2295f});
   color_lighting_eigvecs_.push_back(
-    std::vector<float>{-148.104, -1.1475, -207.57});
+    std::vector<float>{-148.104f, -1.1475f, -207.57f});
   color_lighting_eigvecs_.push_back(
-    std::vector<float>{-148.818, -177.174, 107.1765});
+    std::vector<float>{-148.818f, -177.174f, 107.1765f});

-  color_lighting_eigvals_ = std::vector<float>{0.2175, 0.0188, 0.0045};
+  color_lighting_eigvals_ = std::vector<float>{0.2175f, 0.0188f, 0.0045f};

   CAFFE_ENFORCE_GT(batch_size_, 0, "Batch size should be nonnegative.");
   if (use_caffe_datum_) {
diff --git a/caffe2/operators/conv_op_cudnn.cc b/caffe2/operators/conv_op_cudnn.cc
index e8996415..775789a8 100644
--- a/caffe2/operators/conv_op_cudnn.cc
+++ b/caffe2/operators/conv_op_cudnn.cc
@@ -149,22 +149,32 @@ class CudnnConvOpBase : public ConvPoolOpBase<CUDAContext> {
     switch (order_) {
       case StorageOrder::NHWC:
         if (size == 4) {
+#if (CUDNN_VERSION_MIN(7,0,0))
           CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
               desc_,
               cudnnTypeWrapper<T>::type,
               N,
-#if CUDNN_VERSION_MIN(7,0,0)
               C,
-#else
-              C / group_,
-#endif
               H,
               W,
               H * W * C,
               1,
               W * C,
               C));
-        } else {
+#else
+         CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
+             desc_,
+             cudnnTypeWrapper<T>::type,
+             N,
+             C / group_,
+             H,
+             W,
+             H * W * C,
+             1,
+             W * C,
+             C));
+#endif
+       } else {
 #if !CUDNN_VERSION_MIN(7,0,0)
           C = C / group_;
 #endif
@@ -180,22 +190,32 @@ class CudnnConvOpBase : public ConvPoolOpBase<CUDAContext> {
         break;
       case StorageOrder::NCHW:
         if (size == 4) {
+#if CUDNN_VERSION_MIN(7,0,0)
           CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
               desc_,
               cudnnTypeWrapper<T>::type,
               N,
-#if CUDNN_VERSION_MIN(7,0,0)
               C,
-#else
-              C / group_,
-#endif
               H,
               W,
               C * H * W,
               H * W,
               W,
               1));
-        } else {
+#else
+         CUDNN_ENFORCE(cudnnSetTensor4dDescriptorEx(
+             desc_,
+             cudnnTypeWrapper<T>::type,
+             N,
+             C / group_,
+             H,
+             W,
+             C * H * W,
+             H * W,
+             W,
+             1));
+#endif
+       } else {
 #if !CUDNN_VERSION_MIN(7,0,0)
           C = C / group_;
 #endif
@@ -365,19 +385,26 @@ bool CudnnConvOp::DoRunWithType() {
     if (filter_changed) {
       cudnn_filter_dims_ = filter.dims();
       if (kernel_.size() == 2) {
-        CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
+#if CUDNN_VERSION_MIN(7,0,0)
+         CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
             filter_desc_,
             cudnnTypeWrapper<T_W>::type,
             GetCudnnTensorFormat(order_),
-#if CUDNN_VERSION_MIN(7,0,0)
             M,
-#else
-            M / group_,
-#endif
             C / group_,
             kernel_h(),
             kernel_w()));
-      } else {
+#else
+         CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
+           filter_desc_,
+           cudnnTypeWrapper<T_W>::type,
+           GetCudnnTensorFormat(order_),
+           M / group_,
+           C / group_,
+           kernel_h(),
+           kernel_w()));
+#endif
+     } else {
         vector<int> dims(filter.dims().begin(), filter.dims().end());
         dims[0] /= group_;
 #if !CUDNN_VERSION_MIN(7,0,0)
@@ -729,19 +756,26 @@ bool CudnnConvGradientOp::DoRunWithType() {
     if (filter_changed) {
       cudnn_filter_dims_ = filter.dims();
       if (kernel_.size() == 2) {
+#if CUDNN_VERSION_MIN(7,0,0)
         CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
             filter_desc_,
             cudnnTypeWrapper<T_W>::type,
             GetCudnnTensorFormat(order_),
-#if CUDNN_VERSION_MIN(7,0,0)
             M,
-#else
-            M / group_,
-#endif
             C / group_,
             kernel_h(),
             kernel_w()));
-      } else {
+#else
+       CUDNN_ENFORCE(cudnnSetFilter4dDescriptor(
+           filter_desc_,
+           cudnnTypeWrapper<T_W>::type,
+           GetCudnnTensorFormat(order_),
+           M / group_,
+           C / group_,
+           kernel_h(),
+           kernel_w()));
+#endif
+     } else {
         vector<int> dims(filter.dims().begin(), filter.dims().end());
 #if !CUDNN_VERSION_MIN(7,0,0)
         dims[0] /= group_;
diff --git a/caffe2/operators/layer_norm_op.cu b/caffe2/operators/layer_norm_op.cu
index 23ced9c8..84bf0161 100644
--- a/caffe2/operators/layer_norm_op.cu
+++ b/caffe2/operators/layer_norm_op.cu
@@ -277,7 +277,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
   stats_dims.push_back(1);
   dmean_.Resize(stats_dims);
   dstdev_.Resize(stats_dims);
-  gscratch_.Resize(std::vector<size_t>{left, right});
+  gscratch_.Resize(std::vector<TIndex>{left, right});

   std::vector<int> segs(left + 1);
   std::iota(segs.begin(), segs.end(), 0);
@@ -307,7 +307,7 @@ bool LayerNormGradientOp<CUDAContext>::DoRunWithType<float>() {
       dout.data<float>(),
       gscratch_.mutable_data<float>());

-  dstdev_.Resize(vector<size_t>{left, 1});
+  dstdev_.Resize(vector<TIndex>{left, 1});
   // dstdev = reduce(temp1)
   allocScratchAndReduce(
       gscratch_.data<float>(),
diff --git a/caffe2/operators/recurrent_op_cudnn.cc b/caffe2/operators/recurrent_op_cudnn.cc
index 21b3ed3d..c9a19f11 100644
--- a/caffe2/operators/recurrent_op_cudnn.cc
+++ b/caffe2/operators/recurrent_op_cudnn.cc
@@ -131,10 +131,9 @@ void RecurrentBaseOp<T>::initialize(

   // RNN setup
   {
-    CUDNN_ENFORCE(cudnnSetRNNDescriptor(
 #if CUDNN_MAJOR >= 7
+    CUDNN_ENFORCE(cudnnSetRNNDescriptor(
         cudnn_wrapper_.inline_cudnn_handle(),
-#endif
         rnnDesc_,
         hiddenSize,
         numLayers,
@@ -142,10 +141,19 @@ void RecurrentBaseOp<T>::initialize(
         rnnInput,
         rnnDirection,
         rnnMode,
-#if CUDNN_MAJOR >= 7
         CUDNN_RNN_ALGO_STANDARD, // TODO: verify correctness / efficiency.
-#endif
         cudnnTypeWrapper<T>::type));
+#else  
+   CUDNN_ENFORCE(cudnnSetRNNDescriptor(
+       rnnDesc_,
+       hiddenSize,
+       numLayers,
+       dropoutDesc_,
+       rnnInput,
+       rnnDirection,
+       rnnMode,
+       cudnnTypeWrapper<T>::type));
+#endif
   }
   // X setup
   {
diff --git a/caffe2/utils/GpuBitonicSort.cuh b/caffe2/utils/GpuBitonicSort.cuh
index f52bb508..668d9d80 100644
--- a/caffe2/utils/GpuBitonicSort.cuh
+++ b/caffe2/utils/GpuBitonicSort.cuh
@@ -28,6 +28,8 @@ __device__ inline void bitonicSwap(K& kA, V& vA,
   }
 };

+#define INTEGER_IS_POWER_OF_2(v) (v && !(v & (v - 1)))
+
 template <typename Comparator, typename K, typename V,
           int Power2SortSize,
           int ThreadsPerBlock>
@@ -39,9 +41,9 @@ __device__ inline void bitonicSort(K* keys,
   // Assume the sort is taking place in shared memory
   // static_assert(Power2SortSize * (sizeof(K) + sizeof(V)) < 32768,
   //               "sort data too large (>32768 bytes)");
-  static_assert(math::integerIsPowerOf2(Power2SortSize),
+  static_assert(/*math::integerIsPowerOf2*/INTEGER_IS_POWER_OF_2(Power2SortSize),
                 "sort size must be power of 2");
-  static_assert(math::integerIsPowerOf2(ThreadsPerBlock),
+  static_assert(/*math::integerIsPowerOf2*/INTEGER_IS_POWER_OF_2(ThreadsPerBlock),
                 "threads in block must be power of 2");

   // If what we are sorting is too small, then not all threads
@@ -107,7 +109,7 @@ __device__ inline void warpBitonicSort(K* keys,
   // Smaller sorts should use a warp shuffle sort
   static_assert(Power2SortSize > kWarpSize,
                 "sort not large enough");
-  static_assert(math::integerIsPowerOf2(Power2SortSize),
+  static_assert(/*math::integerIsPowerOf2*/INTEGER_IS_POWER_OF_2(Power2SortSize),
                 "sort size must be power of 2");
   static_assert(Power2SortSize <= kMaxBitonicSortSize,
                 "sort size <= 4096 only supported");
diff --git a/caffe2/utils/proto_utils.cc b/caffe2/utils/proto_utils.cc
index 65d53676..5d08c3cb 100644
--- a/caffe2/utils/proto_utils.cc
+++ b/caffe2/utils/proto_utils.cc
@@ -183,7 +183,7 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
 }

 void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
-  int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+  int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
   CAFFE_ENFORCE_NE(
       fd, -1, "File cannot be created: ", filename, " error number: ", errno);
   std::unique_ptr<ZeroCopyOutputStream> raw_output(new FileOutputStream(fd));
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
index 2425375e..54605ef1 100644
--- a/cmake/Cuda.cmake
+++ b/cmake/Cuda.cmake
@@ -6,7 +6,7 @@
 # Default is set to cuda 9. If we detect the cuda architectores to be less than
 # 9, we will lower it to the corresponding known archs.
 set(Caffe2_known_gpu_archs "30 35 50 52 60 61 70") # for CUDA 9.x
-set(Caffe2_known_gpu_archs8 "20 21(20) 30 35 50 52 60 61") # for CUDA 8.x
+set(Caffe2_known_gpu_archs8 "20 21(20) 30 35 50 52 60 61 62") # for CUDA 8.x
 set(Caffe2_known_gpu_archs7 "20 21(20) 30 35 50 52") # for CUDA 7.x

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index b18288d1..da53e3d4 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -145,7 +145,7 @@ if(USE_LMDB)
   find_package(LMDB)
   if(LMDB_FOUND)
     caffe2_include_directories(${LMDB_INCLUDE_DIR})
-    list(APPEND Caffe2_DEPENDENCY_LIBS ${LMDB_LIBRARIES})
+    list(APPEND Caffe2_DEPENDENCY_LIBS "ntdll.lib" ${LMDB_LIBRARIES})
   else()
     message(WARNING "Not compiling with LMDB. Suppress this warning with -DUSE_LMDB=OFF")
     set(USE_LMDB OFF)
@@ -213,6 +213,7 @@ if(USE_OPENCV)
     # OpenCV 2
     find_package(OpenCV QUIET COMPONENTS core highgui imgproc)
   endif()
+  set(OpenCV_FOUND TRUE)
   if(OpenCV_FOUND)
     caffe2_include_directories(${OpenCV_INCLUDE_DIRS})
     list(APPEND Caffe2_DEPENDENCY_LIBS ${OpenCV_LIBS})
@@ -356,9 +357,9 @@ if(USE_NCCL)
   if(NOT USE_CUDA)
     message(WARNING "If not using cuda, one should not use NCCL either.")
     set(USE_NCCL OFF)
-  elseif(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-    message(WARNING "NCCL is currently only supported under Linux.")
-    set(USE_NCCL OFF)
+  #elseif(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+  #  message(WARNING "NCCL is currently only supported under Linux.")
+  #  set(USE_NCCL OFF)
   else()
     include("cmake/External/nccl.cmake")
     caffe2_include_directories(${NCCL_INCLUDE_DIRS})
diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake
index b804d9a0..11d89c09 100644
--- a/cmake/Utils.cmake
+++ b/cmake/Utils.cmake
@@ -245,6 +245,9 @@ function(caffe2_binary_target target_name_or_src)
   add_executable(${__target} ${__srcs})
   add_dependencies(${__target} ${Caffe2_MAIN_LIBS_ORDER})
   target_link_libraries(${__target} ${Caffe2_MAIN_LIBS} ${Caffe2_DEPENDENCY_LIBS})
+  if (USE_CUDA)
+    target_link_libraries(${__target} ${Caffe2_CUDA_DEPENDENCY_LIBS})
+  endif()
   install(TARGETS ${__target} DESTINATION bin)
 endfunction()
AurusHuang commented 7 years ago

Your post is badly formatted that I can't figure out what is changed.

Yangqing commented 7 years ago

(I updated your comment a bit - you can mark it as a code section.)

I assume that the current head is passing Windows building - if not please let us know for sure :) And feel free to send a pull request.