parallel-runtimes / lomp

Little OpenMP Library
Apache License 2.0
153 stars 17 forks source link

Perform proper ISA detection for ARM processors #19

Open mjklemm opened 3 years ago

mjklemm commented 3 years ago

Describe the bug

The CMakeLists.txt hardwires the ARM ISA to be armv8.1, which break code-generation for Raspi.

To Reproduce Compile the lomp runtime with standard settings on a 64-bit Raspi.

Expected behavior CMake should detect the proper instruction set.

Screenshots This commit introduced the problem:

commit 60c4ad607334b7021cc5d62f1f7bc07353ecf531
Author: Jim Cownie <jcownie@gmail.com>
Date:   Tue May 4 11:05:50 2021 +0000

    Add -march=armv8.1a by default on aarch64

diff --git a/CMakeLists.txt b/CMakeLists.txt
index aa3a938..b9cf518 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,8 @@ option(LOMP_ICC_SUPPORT "Build additional entry points for Intel classic compile
 option(LOMP_MICROBM_WITH_LOMP "Use LOMP for micro-benchmarks" OFF)
 option(LOMP_BUILD_MICROBM "Build micro-benchmarks" ON)
 option(LOMP_BUILD_EXAMPLES "Build example programs" ON)
-
+# Assume aarch64 v8.1a architecture by default.
+SET(LOMP_ARM64_ARCHITECTURE "armv8.1a" CACHE STRING "Detailed aarch64 architecture passed to -march=")
 include(CheckCXXCompilerFlag)

 # look for Git
@@ -52,6 +53,12 @@ message(STATUS "Target system: ${LOMP_TARGET_OS}")
 execute_process(COMMAND arch COMMAND tr -d '\n' OUTPUT_VARIABLE LOMP_TARGET_ARCH)
 message(STATUS "Architecture: ${LOMP_TARGET_ARCH}")

+if (${LOMP_TARGET_ARCH} STREQUAL "aarch64")
+   add_compile_options(-march=${LOMP_ARM64_ARCHITECTURE})
+   message("-- LOMP_ARM64_ARCHITECTURE=${LOMP_ARM64_ARCHITECTURE} => -march=${LOMP_ARM64_ARCHITECTURE}")
+   add_compile_definitions(LOMP_ARM64_ARCHITECTURE=${LOMP_ARM64_ARCHITECTURE})
+endif()
+
 # find all LOMP source files
 file(GLOB_RECURSE LOMP_SOURCE_FILES ${PROJECT_SOURCE_DIR}/*.cc
                                     ${PROJECT_SOURCE_DIR}/*.c

Desktop (please complete the following information):

mjklemm commented 2 years ago

No time to fix this in time for version 0.2