apache / orc

Apache ORC - the smallest, fastest columnar storage for Hadoop workloads
https://orc.apache.org/
Apache License 2.0
665 stars 477 forks source link

ORC-1689: [C++] Generate CMake config file #1889

Closed wgtmac closed 2 months ago

wgtmac commented 2 months ago

What changes were proposed in this pull request?

Generate cmake config file and install it.

Why are the changes needed?

It is good for downstream projects to find ORC library.

How was this patch tested?

Test it locally.

Was this patch authored or co-authored using generative AI tooling?

No.

wgtmac commented 2 months ago

Could you help review this? I'm not sure if we need to generate orcTarget.cmake which I think is unnecesarry at the moment. @kou

wgtmac commented 2 months ago

I did try to generate the target file but was having a hard time to deal with the third-party libraries and make it relocatable. Let me learn it deeper and try again. Thanks @kou!

kou commented 2 months ago

FYI: Apache Arrow creates libarrow_bundled_dependencies.a that contains all *.a built by externalproject_add()/fetchcontent:

We can use only libarrow_bundled_dependencies.a for libarrow.a: https://github.com/apache/arrow/blob/b98763ad079f20e36d82268f9e7cf0db49fdd461/cpp/src/arrow/ArrowConfig.cmake.in#L103-L119

wgtmac commented 2 months ago

FYI: Apache Arrow creates libarrow_bundled_dependencies.a that contains all *.a built by externalproject_add()/fetchcontent:

We can use only libarrow_bundled_dependencies.a for libarrow.a: https://github.com/apache/arrow/blob/b98763ad079f20e36d82268f9e7cf0db49fdd461/cpp/src/arrow/ArrowConfig.cmake.in#L103-L119

Yes, I looked at that and it can be a separate improvement to Apache ORC because Apache Arrow does not use third-party libraries built by Apache ORC.

kou commented 2 months ago

It makes sense.

wgtmac commented 2 months ago

Does the file /tmp/orc/lib/cmake/orc/orcTargets.cmake looks right? @kou

# Generated by CMake

if("${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}" LESS 2.8)
   message(FATAL_ERROR "CMake >= 2.8.0 required")
endif()
if(CMAKE_VERSION VERSION_LESS "2.8.3")
   message(FATAL_ERROR "CMake >= 2.8.3 required")
endif()
cmake_policy(PUSH)
cmake_policy(VERSION 2.8.3...3.25)
#----------------------------------------------------------------
# Generated CMake target import file.
#----------------------------------------------------------------

# Commands may need to know the format version.
set(CMAKE_IMPORT_FILE_VERSION 1)

# Protect against multiple inclusion, which would fail when already imported targets are added once more.
set(_cmake_targets_defined "")
set(_cmake_targets_not_defined "")
set(_cmake_expected_targets "")
foreach(_cmake_expected_target IN ITEMS orc::orc)
  list(APPEND _cmake_expected_targets "${_cmake_expected_target}")
  if(TARGET "${_cmake_expected_target}")
    list(APPEND _cmake_targets_defined "${_cmake_expected_target}")
  else()
    list(APPEND _cmake_targets_not_defined "${_cmake_expected_target}")
  endif()
endforeach()
unset(_cmake_expected_target)
if(_cmake_targets_defined STREQUAL _cmake_expected_targets)
  unset(_cmake_targets_defined)
  unset(_cmake_targets_not_defined)
  unset(_cmake_expected_targets)
  unset(CMAKE_IMPORT_FILE_VERSION)
  cmake_policy(POP)
  return()
endif()
if(NOT _cmake_targets_defined STREQUAL "")
  string(REPLACE ";" ", " _cmake_targets_defined_text "${_cmake_targets_defined}")
  string(REPLACE ";" ", " _cmake_targets_not_defined_text "${_cmake_targets_not_defined}")
  message(FATAL_ERROR "Some (but not all) targets in this export set were already defined.\nTargets Defined: ${_cmake_targets_defined_text}\nTargets not yet defined: ${_cmake_targets_not_defined_text}\n")
endif()
unset(_cmake_targets_defined)
unset(_cmake_targets_not_defined)
unset(_cmake_expected_targets)

# Compute the installation prefix relative to this file.
get_filename_component(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
get_filename_component(_IMPORT_PREFIX "${_IMPORT_PREFIX}" PATH)
if(_IMPORT_PREFIX STREQUAL "/")
  set(_IMPORT_PREFIX "")
endif()

# Create imported target orc::orc
add_library(orc::orc STATIC IMPORTED)

set_target_properties(orc::orc PROPERTIES
  INTERFACE_COMPILE_DEFINITIONS "ENABLE_METRICS=0"
  INTERFACE_INCLUDE_DIRECTORIES "${_IMPORT_PREFIX}/include"
  INTERFACE_LINK_LIBRARIES "protobuf::libprotobuf;ZLIB::ZLIB;Snappy::snappy;\$<TARGET_NAME_IF_EXISTS:LZ4::lz4_static>;\$<TARGET_NAME_IF_EXISTS:LZ4::lz4_shared>;\$<TARGET_NAME_IF_EXISTS:zstd::libzstd_static>;\$<TARGET_NAME_IF_EXISTS:zstd::libzstd_shared>"
)

if(CMAKE_VERSION VERSION_LESS 2.8.12)
  message(FATAL_ERROR "This file relies on consumers using CMake 2.8.12 or greater.")
endif()

# Load information for each installed configuration.
file(GLOB _cmake_config_files "${CMAKE_CURRENT_LIST_DIR}/orcTargets-*.cmake")
foreach(_cmake_config_file IN LISTS _cmake_config_files)
  include("${_cmake_config_file}")
endforeach()
unset(_cmake_config_file)
unset(_cmake_config_files)

# Cleanup temporary variables.
set(_IMPORT_PREFIX)

# Loop over all imported files and verify that they actually exist
foreach(_cmake_target IN LISTS _cmake_import_check_targets)
  foreach(_cmake_file IN LISTS "_cmake_import_check_files_for_${_cmake_target}")
    if(NOT EXISTS "${_cmake_file}")
      message(FATAL_ERROR "The imported target \"${_cmake_target}\" references the file
   \"${_cmake_file}\"
but this file does not exist.  Possible reasons include:
* The file was deleted, renamed, or moved to another location.
* An install or uninstall procedure did not complete successfully.
* The installation package was faulty and contained
   \"${CMAKE_CURRENT_LIST_FILE}\"
but not all the files it references.
")
    endif()
  endforeach()
  unset(_cmake_file)
  unset("_cmake_import_check_files_for_${_cmake_target}")
endforeach()
unset(_cmake_target)
unset(_cmake_import_check_targets)

# This file does not depend on other imported targets which have
# been exported from the same project but in a separate export set.

# Commands beyond this point should not need to know the version.
set(CMAKE_IMPORT_FILE_VERSION)
cmake_policy(POP)
kou commented 2 months ago

Yes! But we need to find dependencies such as protobuf::libprotobuf in orcConfig.cmake something like:

diff --git a/c++/orcConfig.cmake.in b/c++/orcConfig.cmake.in
index 561ea3d7..a6b16f8c 100644
--- a/c++/orcConfig.cmake.in
+++ b/c++/orcConfig.cmake.in
@@ -17,6 +17,13 @@

 @PACKAGE_INIT@

+set(ORC_SYSTEM_DEPENDENCIES "@ORC_SYSTEM_DEPENDENCIES@")
+
+include(CMakeFindDependencyMacro)
+foreach(dependency ${ORC_SYSTEM_DEPENDENCIES})
+  find_dependency(${dependency})
+endforeach()
+
 include("${CMAKE_CURRENT_LIST_DIR}/orcTargets.cmake")

 check_required_components(orc)
diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt
index 958da1af..05cd34c1 100644
--- a/c++/src/CMakeLists.txt
+++ b/c++/src/CMakeLists.txt
@@ -203,13 +203,7 @@ add_library (orc STATIC ${SOURCE_FILES})

 target_link_libraries (orc
   INTERFACE
-    $<INSTALL_INTERFACE:protobuf::libprotobuf>
-    $<INSTALL_INTERFACE:ZLIB::ZLIB>
-    $<INSTALL_INTERFACE:Snappy::snappy>
-    $<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:LZ4::lz4_static>>
-    $<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:LZ4::lz4_shared>>
-    $<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:zstd::libzstd_static>>
-    $<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:zstd::libzstd_shared>>
+    ${ORC_INSTALL_INTERFACE_TARGETS}
   PUBLIC
     $<BUILD_INTERFACE:orc::protobuf>
     $<BUILD_INTERFACE:orc::zlib>
diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake
index c3d414e8..3b65db36 100644
--- a/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cmake_modules/ThirdpartyToolchain.cmake
@@ -15,6 +15,9 @@
 # specific language governing permissions and limitations
 # under the License.

+set(ORC_SYSTEM_DEPENDENCIES)
+set(ORC_INSTALL_INTERFACE_TARGETS)
+
 set(ORC_FORMAT_VERSION "1.0.0")
 set(LZ4_VERSION "1.9.3")
 set(SNAPPY_VERSION "1.1.7")
@@ -106,9 +109,13 @@ ExternalProject_Add (orc-format_ep
 # Snappy
 if (ORC_PACKAGE_KIND STREQUAL "conan")
   find_package (Snappy REQUIRED CONFIG)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:Snappy::snappy>")
   add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR})
 elseif (NOT "${SNAPPY_HOME}" STREQUAL "")
   find_package (Snappy REQUIRED)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:Snappy::snappy>")
   if (ORC_PREFER_STATIC_SNAPPY AND ${SNAPPY_STATIC_LIB})
     add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR})
   else ()
@@ -141,9 +148,13 @@ add_library (orc::snappy ALIAS orc_snappy)

 if (ORC_PACKAGE_KIND STREQUAL "conan")
   find_package (ZLIB REQUIRED CONFIG)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:ZLIB::ZLIB>")
   add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR})
 elseif (NOT "${ZLIB_HOME}" STREQUAL "")
   find_package (ZLIB REQUIRED)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:ZLIB::ZLIB>")
   if (ORC_PREFER_STATIC_ZLIB AND ${ZLIB_STATIC_LIB})
     add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR})
   else ()
@@ -184,9 +195,17 @@ add_library (orc::zlib ALIAS orc_zlib)

 if (ORC_PACKAGE_KIND STREQUAL "conan")
   find_package (ZSTD REQUIRED CONFIG)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:zstd::libzstd_static>>")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:zstd::libzstd_shared>>")
   add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR})
 elseif (NOT "${ZSTD_HOME}" STREQUAL "")
   find_package (ZSTD REQUIRED)
+  # Zstandard uses "zstd" as its CMake package name. We don't use our
+  # FindZSTD.cmake for find_package(orc).
+  list (APPEND ORC_SYSTEM_DEPENDENCIES zstd)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:zstd::libzstd_static>>")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:zstd::libzstd_shared>>")
   if (ORC_PREFER_STATIC_ZSTD AND ${ZSTD_STATIC_LIB})
     add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR})
   else ()
@@ -233,9 +252,17 @@ add_library (orc::zstd ALIAS orc_zstd)
 # LZ4
 if (ORC_PACKAGE_KIND STREQUAL "conan")
   find_package (LZ4 REQUIRED CONFIG)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:LZ4::lz4_static>>")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:LZ4::lz4_shared>>")
   add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR})
 elseif (NOT "${LZ4_HOME}" STREQUAL "")
   find_package (LZ4 REQUIRED)
+  # LZ4 uses "lz4" as its CMake package name. We don't use our
+  # FindLZ4.cmake for find_package(orc).
+  list (APPEND ORC_SYSTEM_DEPENDENCIES lz4)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:LZ4::lz4_static>>")
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:$<TARGET_NAME_IF_EXISTS:LZ4::lz4_shared>>")
   if (ORC_PREFER_STATIC_LZ4 AND ${LZ4_STATIC_LIB})
     add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR})
   else ()
@@ -379,9 +406,13 @@ endif ()

 if (ORC_PACKAGE_KIND STREQUAL "conan")
   find_package (Protobuf REQUIRED CONFIG)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:protobuf::libprotobuf>")
   add_resolved_library (orc_protobuf ${protobuf_LIBRARIES} ${protobuf_INCLUDE_DIR})
 elseif (NOT "${PROTOBUF_HOME}" STREQUAL "")
   find_package (Protobuf REQUIRED)
+  list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf)
+  list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$<INSTALL_INTERFACE:protobuf::libprotobuf>")

   if (ORC_PREFER_STATIC_PROTOBUF AND ${PROTOBUF_STATIC_LIB})
     add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR})

Note that the above diff doesn't work for dependencies that are installed in custom location such as $HOME/local because we use cmake_modules/FindXXX.cmake on build but we don't use it on find_package(orc). They may find different dependencies.

wgtmac commented 2 months ago

Yes, I am already trying to add find_dependency to the config file. Thanks for the detail help!

kou commented 2 months ago

FYI: Apache Arrow bundles their custom FindXXX.cmake and use them if they are needed: https://github.com/apache/arrow/blob/73386cb806a429875f1846b69e21beccfd339b21/cpp/cmake_modules/ThirdpartyToolchain.cmake#L236-L250

wgtmac commented 2 months ago

FYI: Apache Arrow bundles their custom FindXXX.cmake and use them if they are needed: https://github.com/apache/arrow/blob/73386cb806a429875f1846b69e21beccfd339b21/cpp/cmake_modules/ThirdpartyToolchain.cmake#L236-L250

I saw that. What I tried is to install FindXXX.cmake from Apache ORC but they are called before find_dependency in the config file, so I thought it is not a good idea to use them.

kou commented 2 months ago

Apache Arrow uses FindXXXAlt.cmake when we need to use CONFIG CMake package. e.g.: https://github.com/apache/arrow/blob/main/cpp/cmake_modules/FindSnappyAlt.cmake

wgtmac commented 2 months ago

Finally I make it work for all modes (vendored, providing XXX_HOME, or conan) and this is ready for review. Would you mind taking a look at this again? @kou

wgtmac commented 2 months ago

@deshanxiao Could you help check if this breaks vcpkg? Is it possible to add ORC_PACKAGE_KIND=vcpkg to remove the patch file https://github.com/microsoft/vcpkg/blob/9224b3bbd8df24999d85720b1d005dd6f969ade0/ports/orc/fix-cmake.patch?

wgtmac commented 2 months ago

@h-vetinari @xhochy Could you help check if this breaks conda? I saw there are some patches https://github.com/conda-forge/orc-feedstock/tree/main/recipe/patches. Is it possible to add ORC_PACKAGE_KIND=conda with some code change to remove the patches?

wgtmac commented 2 months ago

cc @dongjoon-hyun @stiga-huang

deshanxiao commented 2 months ago

Hi @wgtmac I checked vcpkg and the previous patch can not apply any more:

-- Downloading https://github.com/wgtmac/orc/archive/ORC-1689.tar.gz -> wgtmac-orc-ORC-1689.tar.gz...
-- Extracting source /home/deshanxiao/vcpkg/downloads/wgtmac-orc-ORC-1689.tar.gz
-- Applying patch fix-cmake.patch
CMake Error at scripts/cmake/z_vcpkg_apply_patches.cmake:34 (message):
  Applying patch failed: Checking patch c++/src/CMakeLists.txt..

This will not break our existing version. Feel free to merge the PR. And I will make a new PR to remove the patch in vcpkg.

h-vetinari commented 2 months ago

@h-vetinari @xhochy Could you help check if this breaks conda?

Thanks for the ping - happy to provide feedback and to try things out!

CC also @nehaljwani, who has been taking care of orc in conda-forge for much longer than I have.

I saw there are some patches https://github.com/conda-forge/orc-feedstock/tree/main/recipe/patches. Is it possible to add ORC_PACKAGE_KIND=conda with some code change to remove the patches?

I just went through the patches in the feedstock (after rebasing them on current main here):

So aside from patch Nr. 2 (if ORC insists to hardcode the STATIC bit), it should be possible to avoid the need for ORC_PACKAGE_KIND=conda in the first place, AFAICT.

wgtmac commented 2 months ago

Thanks for the check and feedback! @h-vetinari

For the 2nd patch, unfortunately it requires some additional change (not just removing STATIC keyword). The only patch on the Conan side since Apache ORC 2.0.0 is doing the similar thing: https://github.com/conan-io/conan-center-index/blob/master/recipes/orc/all/conanfile.py#L135-L137. I will try to support building shared libraries natively but for now let's still patch this.

wgtmac commented 2 months ago

@stiga-huang

On the Apache Impala side, we can remove FindOrc.cmake by doing following things:

wgtmac commented 2 months ago

I have just merged it. Thanks all!