Open vmarkovtsev opened 2 weeks ago
I made it work with the following changes in EFA dkms. Please let me know how I can contribute these changes.
diff -ruN efa-2.6.0/config/efa.cmake efa-2.6.0/config/efa.cmake
--- efa-2.6.0/config/efa.cmake 2024-07-05 21:41:00.045808694 +0200
+++ efa-2.6.0/config/efa.cmake 2024-07-05 21:31:20.625037540 +0200
@@ -11,13 +11,15 @@
set(tmp_dir ${tmp_dir} PARENT_SCOPE)
configure_file(${CMAKE_SOURCE_DIR}/config/main.c.in ${tmp_dir}/main.c @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/config/Makefile ${tmp_dir} COPYONLY)
+ configure_file(${CMAKE_SOURCE_DIR}/config/kbuild.mk ${tmp_dir} COPYONLY)
endfunction()
function(try_compile_prog_test)
set_conf_tmp_dir("" "")
execute_process(COMMAND make -C ${tmp_dir} KERNEL_DIR=${KERNEL_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- OUTPUT_QUIET ERROR_QUIET
+ #OUTPUT_QUIET
+ #ERROR_QUIET
RESULT_VARIABLE res)
if(res)
message(FATAL_ERROR "Conftest failure, kernel headers missing?")
diff -ruN efa-2.6.0/config/kbuild.mk efa-2.6.0/config/kbuild.mk
--- efa-2.6.0/config/kbuild.mk 1970-01-01 01:00:00.000000000 +0100
+++ efa-2.6.0/config/kbuild.mk 2024-07-05 21:31:20.005066674 +0200
@@ -0,0 +1,27 @@
+KVER ?= $(shell uname -r)
+MLNX_ARCH = $(shell uname -m)
+OFA_DIR ?= /usr/src/ofa_kernel/$(MLNX_ARCH)
+OFA := $(shell ( test -d $(OFA_DIR)/$(KVER) && echo $(OFA_DIR)/$(KVER) ) || ( test -d /var/lib/dkms/mlnx-ofed-kernel/ && ls -d /var/lib/dkms/mlnx-ofed-kernel/*/build ) || ( echo $(OFA_DIR) ))
+K_BUILD ?= /lib/modules/$(KVER)/build
+KERNEL_DIR ?= /lib/modules/$(KVER)/build
+KBUILD_EXTRA_SYMBOLS := $(OFA)/Module.symvers
+autoconf_h=$(shell /bin/ls -1 $(K_BUILD)/include/*/autoconf.h 2> /dev/null | head -1)
+kconfig_h=$(shell /bin/ls -1 $(K_BUILD)/include/*/kconfig.h 2> /dev/null | head -1)
+
+LINUXINCLUDE := \
+-include $(kconfig_h) \
+-include $(OFA)/include/linux/compat-2.6.h \
+-I$(PWD) \
+-I$(OFA)/include \
+-I$(OFA)/include/uapi \
+-I$$(srctree)/arch/$$(SRCARCH)/include \
+-Iarch/$$(SRCARCH)/include/generated \
+-Iinclude \
+-I$$(srctree)/arch/$$(SRCARCH)/include/uapi \
+-Iarch/$$(SRCARCH)/include/generated/uapi \
+-I$$(srctree)/include \
+-I$$(srctree)/include/uapi \
+-Iinclude/generated/uapi \
+$$(if $$(KBUILD_SRC),-Iinclude2 -I$$(srctree)/include) \
+-I$$(srctree)/arch/$$(SRCARCH)/include \
+-Iarch/$$(SRCARCH)/include/generated
diff -ruN efa-2.6.0/config/Makefile efa-2.6.0/config/Makefile
--- efa-2.6.0/config/Makefile 2024-07-05 21:40:59.769821663 +0200
+++ efa-2.6.0/config/Makefile 2024-07-05 21:31:20.321051825 +0200
@@ -7,7 +7,8 @@
obj-m += $(DRIVER_NAME).o
$(DRIVER_NAME)-objs := main.o
-KERNEL_DIR ?= /lib/modules/$(shell uname -r)/build
+obj ?= .
+include $(obj)/kbuild.mk
modules:
- $(MAKE) -C $(KERNEL_DIR) M=$(CURDIR) modules
+ $(MAKE) -C $(KERNEL_DIR) M=$(CURDIR) V=1 LINUXINCLUDE='$(LINUXINCLUDE)' modules
diff -ruN efa-2.6.0/dkms.conf efa-2.6.0/dkms.conf
--- efa-2.6.0/dkms.conf 2024-07-05 21:41:10.201331451 +0200
+++ efa-2.6.0/dkms.conf 2024-07-05 21:31:31.188541134 +0200
@@ -3,7 +3,7 @@
CLEAN="cd build; make modules_clean; make clean"
PRE_BUILD="./configure-dkms.sh $kernelver $source_tree"
# Quoted 'make' to suppress DKMS append of KERNELRELEASE
-MAKE="cd build; 'make'"
+MAKE="cd build; make VERBOSE=1"
BUILT_MODULE_NAME[0]="efa"
BUILT_MODULE_LOCATION="build/src/"
DEST_MODULE_LOCATION="/extra"
diff -ruN efa-2.6.0/src/build.mk efa-2.6.0/src/build.mk
--- efa-2.6.0/src/build.mk 1970-01-01 01:00:00.000000000 +0100
+++ efa-2.6.0/src/build.mk 2024-07-05 21:31:23.816887549 +0200
@@ -0,0 +1,4 @@
+include $(M)/kbuild.mk
+
+modules:
+ $(MAKE) -C $(KERNEL_DIR) M=$(M) V=1 LINUXINCLUDE='$(LINUXINCLUDE)' modules
diff -ruN efa-2.6.0/src/CMakeLists.txt efa-2.6.0/src/CMakeLists.txt
--- efa-2.6.0/src/CMakeLists.txt 2024-07-05 21:41:04.721588963 +0200
+++ efa-2.6.0/src/CMakeLists.txt 2024-07-05 21:31:25.680799959 +0200
@@ -13,6 +13,8 @@
string(REPLACE ";" " " efa_sources_string "${efa_sources}")
configure_file(Kbuild.in Kbuild @ONLY)
+configure_file(build.mk build.mk COPYONLY)
+configure_file(../config/kbuild.mk kbuild.mk COPYONLY)
foreach(src ${efa_sources})
configure_file(${src} ${src} COPYONLY)
@@ -25,14 +27,14 @@
message("-- Peer-to-peer memory enabled")
endif()
-set(module_cmd make -C ${KERNEL_DIR} M=${CMAKE_CURRENT_BINARY_DIR})
+set(module_cmd make -C ${CMAKE_CURRENT_BINARY_DIR} -f build.mk M=${CMAKE_CURRENT_BINARY_DIR})
if(GCOV_PROFILE)
set(module_cmd ${module_cmd} GCOV_PROFILE=y)
endif()
add_custom_command(OUTPUT efa.ko
COMMAND ${module_cmd} modules
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
- DEPENDS ${efa_sources} ${CMAKE_CURRENT_BINARY_DIR}/Kbuild ${CMAKE_CURRENT_BINARY_DIR}/config.h
+ DEPENDS ${efa_sources} ${CMAKE_CURRENT_BINARY_DIR}/Kbuild build.mk ${CMAKE_CURRENT_BINARY_DIR}/config.h ../config/kbuild.mk
VERBATIM)
add_custom_target(modules ALL DEPENDS efa.ko)
diff -ruN efa-2.6.0/src/Kbuild.in efa-2.6.0/src/Kbuild.in
--- efa-2.6.0/src/Kbuild.in 2024-07-05 21:41:01.813725613 +0200
+++ efa-2.6.0/src/Kbuild.in 2024-07-05 21:31:22.364955777 +0200
@@ -1,2 +1,4 @@
+include $(obj)/kbuild.mk
+KBUILD_EXTRA_SYMBOLS := $(OFA)/Module.symvers
obj-m := efa.o
efa-y := $(patsubst %.c,%.o, $(filter %.c, @efa_sources_string@))
Preliminary Actions
Driver Type
Linux kernel driver for Elastic Fabric Adapter (EFA)
Driver Tag/Commit
efa_2.6.0-1.amzn1_amd64
Custom Code
No
OS Platform and Distribution
5.15.0-2000-aws #2000 SMP Thu May 23 13:16:55 UTC 2024 x86_64 x86_64 x86_64 GNU/Linux
Stock Ubuntu kernel 5.15.0-1006 with
nvme
moved from builtins to external modules.Bug description
When built with
rdma
pointing at Mellanox distribution (i.e.,/usr/src/ofa_kernel/x86_64/5.15.0-2000-aws/include/rdma
belonging to https://network.nvidia.com/products/infiniband-drivers/linux/mlnx_ofed/),insmod efa
fails withThe reason why we are doing these weird culprits is because we want GPUDirect in nvme working together with EFA. Right now, we have to choose one of the two because of the described crash.
Reproduction steps
Expected Behavior
efa
inserts along with existingib_core
,ib_uverbs
, etc. from Mellanox.Actual Behavior
insmod
hangs.Additional Data
Relevant log output
No response
Contact Details
vadim@poolside.ai