JonathanSalwan / Triton

Triton is a dynamic binary analysis library. Build your own program analysis tools, automate your reverse engineering, perform software verification or just emulate code.
https://triton-library.github.io
Apache License 2.0
3.48k stars 529 forks source link

Consider getting rid of boost #1073

Closed mrexodia closed 2 years ago

mrexodia commented 2 years ago

It seems that Triton only depends on boost for the 512 bit integer types. Perhaps switching to a header-only lightweight alternative would be an idea?

I found https://github.com/ckormanyos/wide-integer after looking a bit, but I'm sure others exist.

SweetVishnya commented 2 years ago

I believe Boost is more stable than some repo on github. Anyway, we should benchmark performance if such changes occur.

mrexodia commented 2 years ago

It's not really about performance, it's about the fact that Boost is extremely large 😅

image

JonathanSalwan commented 2 years ago

What about just extracting boost multiprecision headers?

mrexodia commented 2 years ago

I tried this locally some years ago, but it's still a huge amount of code because the multiprecision headers include a lot of other boost components. I can experiment locally with the wide-integer single header and see if replacing the typedefs is enough to make things work, maybe it's straightforward to provide fake boost types.

mrexodia commented 2 years ago

I ran from the boost directory:

b2.exe --toolset=msvc
mkdir triton
dist\bin\bcp.exe multiprecision/cpp_int.hpp ./triton
dist\bin\bcp.exe numeric/conversion/cast.hpp ./triton

image

It results in 1492 files, 15.4mb

JonathanSalwan commented 2 years ago

but it's still a huge amount of code because the multiprecision headers include a lot of other boost components.

Yep, I've also tried several years ago :).

I can experiment locally with the wide-integer single header and see if replacing the typedefs is enough to make things work,

I'm not sure as we uses some methods of boost type, like for example convert_to<>

SweetVishnya commented 2 years ago

Yeah, we do some int to string conversions all way in the code. Anyway, one can build only part of boost components.

SweetVishnya commented 2 years ago

The part of our CMake related to Boost:

# Set required Boost libraries
set(BOOST_COMPONENTS program_options filesystem log iostreams)
list(JOIN BOOST_COMPONENTS , BOOST_COMPONENTS_COMMA_SEP)

# Build Boost
set(BOOST_COMPONENTS_BUILD ${BOOST_COMPONENTS})
list(TRANSFORM BOOST_COMPONENTS_BUILD PREPEND "--with-")
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
  if(${BUILD_BOOST})
    ExternalProject_Add(boost
      URL https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz
      URL_HASH SHA256=c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f
      PREFIX ${BOOST_ROOT}-build
      INSTALL_DIR ${BOOST_ROOT}
      BUILD_IN_SOURCE ON
      CONFIGURE_COMMAND ./bootstrap.sh --prefix="${BOOST_ROOT}"
        --with-libraries=${BOOST_COMPONENTS_COMMA_SEP}
        $<${CLANG}:--with-toolset=clang>
      BUILD_COMMAND ${BUILD_SCRIPTS_DIR}/build_boost.sh $<${CLANG}:toolset=clang>
        threading=single
        cflags=-fPIC cxxflags=-fPIC
        ${BOOST_COMPONENTS_BUILD}
        $<$<CONFIG:Debug>:variant=debug>
        link=static
        address-model=${ARCH}
        define=BOOST_LOG_NO_THREADS
        install
      INSTALL_COMMAND ""
    )
  endif()
  if(${BUILD_BOOST32} AND CMAKE_SIZEOF_VOID_P EQUAL 8)
    ExternalProject_Add(boost32
      URL https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz
      URL_HASH SHA256=c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f
      PREFIX ${BOOST32_ROOT}-build
      INSTALL_DIR ${BOOST32_ROOT}
      BUILD_IN_SOURCE ON
      CONFIGURE_COMMAND ./bootstrap.sh --prefix="${BOOST32_ROOT}"
        --with-libraries=${BOOST_COMPONENTS_COMMA_SEP}
        $<${CLANG}:--with-toolset=clang>
      BUILD_COMMAND ${BUILD_SCRIPTS_DIR}/build_boost.sh $<${CLANG}:toolset=clang>
        threading=single
        cflags=-fPIC cxxflags=-fPIC
        ${BOOST_COMPONENTS_BUILD}
        $<$<CONFIG:Debug>:variant=debug>
        link=static
        address-model=32
        define=BOOST_LOG_NO_THREADS
        install
      INSTALL_COMMAND ""
    )
  endif()
elseif(${CMAKE_SYSTEM_NAME} MATCHES "Windows")
  if(${CMAKE_BUILD_TYPE} MATCHES "Debug")
    set(BOOST_VARIANT variant=debug)
  else()
    set(BOOST_VARIANT variant=release)
  endif()
  if(${BUILD_BOOST})
    ExternalProject_Add(boost
      URL https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz
      URL_HASH SHA256=c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f
      PREFIX ${BOOST_ROOT}-build
      INSTALL_DIR ${BOOST_ROOT}
      BUILD_IN_SOURCE ON
      CONFIGURE_COMMAND bootstrap.bat --prefix="${BOOST_ROOT}"
        --with-libraries=${BOOST_COMPONENTS_COMMA_SEP}
      # TODO: spaces in BOOST_ROOT ("" do not work)
      BUILD_COMMAND b2 --prefix=${BOOST_ROOT}
        ${BOOST_COMPONENTS_BUILD}
        ${BOOST_VARIANT}
        link=static
        runtime-link=static
        address-model=${ARCH}
        install
      INSTALL_COMMAND ""
    )
  endif()
  if(${BUILD_BOOST32} AND CMAKE_SIZEOF_VOID_P EQUAL 8)
    ExternalProject_Add(boost32
      URL https://boostorg.jfrog.io/artifactory/main/release/1.72.0/source/boost_1_72_0.tar.gz
      URL_HASH SHA256=c66e88d5786f2ca4dbebb14e06b566fb642a1a6947ad8cc9091f9f445134143f
      PREFIX ${BOOST32_ROOT}-build
      INSTALL_DIR ${BOOST32_ROOT}
      BUILD_IN_SOURCE ON
      CONFIGURE_COMMAND bootstrap.bat --prefix="${BOOST32_ROOT}"
        --with-libraries=${BOOST_COMPONENTS_COMMA_SEP}
      # TODO: spaces in BOOST_ROOT ("" do not work)
      BUILD_COMMAND b2 --prefix=${BOOST32_ROOT}
        ${BOOST_COMPONENTS_BUILD}
        ${BOOST_VARIANT}
        link=static
        runtime-link=static
        address-model=32
        install
      INSTALL_COMMAND ""
    )
  endif()
endif()
illera88 commented 2 years ago

If you use vcpgk to get boost, you can restrict what parts of boost are downloaded. I use this command in Ponce to download de dependencies to build Triton:

vcpkg install capstone[x86,arm,arm64] z3 boost-multiprecision boost-numeric-conversion boost-math
illera88 commented 2 years ago

On the other had, after reading https://github.com/ckormanyos/wide-integer library it looks pretty good and its true that using boost requires downloading a large list of boost headers and files which is an overkill to just using a few types IMO.

Regarding convert_to I think you can just use static_cast to the wanted type with wide-integer-library (Example)

illera88 commented 2 years ago

I created a branch where I'm started moving boost to wide-integer library:

https://github.com/illera88/Triton/tree/wide-integer

JonathanSalwan commented 2 years ago

Before doing that are we sure that wide-integer is a good and reliable library? I still do not know if moving to another lib worth it. Is boost a real problem?

mrexodia commented 2 years ago

It’s up to you in the end, but adding numbers isn’t rocket science. I would assume that a dedicated wide-integer library can do arithmetic, but the Triton tests should catch any issues right?

SweetVishnya commented 2 years ago

We personally use Boost in Sydr anyway. However, we can benchmark (accuracy and speed) Triton on our system localy if you consider to change the wide integer library.

P.S. @JonathanSalwan, our benchmark shows worse results on one binary after recent BSWAP semantics support. Trying to examine it.

SweetVishnya commented 2 years ago

Furthermore, we use use Boost wide-integer api (like conver_to) in order to do some concrete values (from Triton) analysis for DSE.

JonathanSalwan commented 2 years ago

but the Triton tests should catch any issues right?

@mrexodia, theoretically yep :)

Your benchmark shows worse results on one binary after recent BSWAP semantics support. Trying to examine it.

@SweetVishnya, this is weird because we do not use the bswap operator in semantics.cpp.

illera88 commented 2 years ago

Before doing that are we sure that wide-integer is a good and reliable library? I still do not know if moving to another lib worth it. Is boost a real problem?

The maintainer of wide integer is the same one than the author and maintainer from boost-multiprecision

illera88 commented 2 years ago

The branch is still not compiling. Creating 80 bits integers is more complicated on wide-integuer than in boost.

As a reason to stick with boost is that Boost 1.79 which is supposed to roll out this spring will make boost multiprecision kind of standalone (only depending on config and boost math).

You can follow the issue on wide-integer where I'm going back and forth to port triton to wide-integer and the maintainer is super responsive here.

JonathanSalwan commented 2 years ago

As a reason to stick with boost is that Boost 1.79 which is supposed to roll out this spring will make boost multiprecision kind of standalone (only depending on config and boost math).

That's interesting!

illera88 commented 2 years ago

@JonathanSalwan can you check why this branch is not compiling on Linux? I've test it in windows and it compiles fine.

For some reason the linker can't find a couple functions I added.

[ 91%] Linking CXX executable taint_reg
/usr/bin/ld: ../../libtriton/libtriton.so: undefined reference to `triton::utils::convert_to_uint512(math::wide_integer::uintwide_t<80u, unsigned short, void, false>)'
/usr/bin/ld: ../../libtriton/libtriton.so: undefined reference to `triton::utils::convert_to_uint80(math::wide_integer::uintwide_t<512u, unsigned int, void, false>)'
collect2: error: ld returned 1 exit status

I would like to get that branch working even if you decide not to merge it in Triton and stick to boost.

Cheers

JonathanSalwan commented 2 years ago

In coreUtils.hpp and coreUtils.cpp remove TRITON_EXPORT and inline if you want to use WIDE_INTEGER_CONSTEXPR. Example:

56     WIDE_INTEGER_CONSTEXPR triton::uint80 convert_to_uint80(triton::uint512 value);
57     WIDE_INTEGER_CONSTEXPR triton::uint512 convert_to_uint512(triton::uint80 value);

I've also executed tests and looks good.

[ 89%] Built target python-triton
................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................
----------------------------------------------------------------------
Ran 496 tests in 359.792s

[...]

40/41 Test #40: UnicornARM32Semantics32 ..........   Passed    0.29 sec
      Start 41: UnicornARM32Semantics33
41/41 Test #41: UnicornARM32Semantics33 ..........   Passed    0.26 sec

100% tests passed, 0 tests failed out of 41

Total Test time (real) =  75.51 sec
[100%] Built target check

I still do not know if I want to merge it but can you make a PR so that we have a backup of this patch?

JonathanSalwan commented 2 years ago

It's merged into master and dev-v1.0. Thanks all!

cmake -DBOOST_INTERFACE=off

JonathanSalwan commented 2 years ago

With 243026c9c1e07a5ca834c4aaf628d1079f6a85ea, when boost headers are not found during the cmake init, we will use wide-integers headers which are shipped with the setup.