Open joerowell opened 2 months ago
@jackkosaian
Thanks for reporting this. I was able to reproduce this locally and resolve it with the following patch. Would you be willing to try this out?
diff --git a/python/cutlass/__init__.py b/python/cutlass/__init__.py
index dfc9b405..6b6130c4 100644
--- a/python/cutlass/__init__.py
+++ b/python/cutlass/__init__.py
@@ -57,6 +57,19 @@ CUTLASS_PATH = os.getenv("CUTLASS_PATH", cutlass_library.source_path)
# Alias CUTLASS_PATH as source_path
source_path = CUTLASS_PATH
+_NVCC_VERSION = None
+def nvcc_version():
+ global _NVCC_VERSION
+ if _NVCC_VERSION is None:
+ import subprocess
+
+ # Attempt to get NVCC version
+ result = subprocess.run(['nvcc', '--version'], capture_output=True)
+ if result.returncode != 0:
+ raise Exception('Unable to run `nvcc --version')
+ _NVCC_VERSION = str(result.stdout).split(" release ")[-1].split(",")[0]
+ return _NVCC_VERSION
+
_CUDA_INSTALL_PATH = None
def cuda_install_path():
"""
diff --git a/python/cutlass/backend/compiler.py b/python/cutlass/backend/compiler.py
index f52b1818..f08c1184 100644
--- a/python/cutlass/backend/compiler.py
+++ b/python/cutlass/backend/compiler.py
@@ -90,7 +90,7 @@ class CompilationOptions:
opts.append(f"--include-path={incl}")
arch_flag = f"-arch=sm_{self.arch}"
- if self.arch == 90:
+ if self.arch == 90 and int(cutlass.nvcc_version().split('.')[0]) >= 12:
arch_flag += "a"
opts.append(arch_flag)
diff --git a/python/cutlass/library_defaults.py b/python/cutlass/library_defaults.py
index 7c16cc68..2a02f61c 100644
--- a/python/cutlass/library_defaults.py
+++ b/python/cutlass/library_defaults.py
@@ -51,6 +51,20 @@ _generator_ccs = [50, 60, 61, 70, 75, 80, 90]
# Strip any additional information from the CUDA version
_cuda_version = __version__.split("rc")[0]
+# Check that Python CUDA version exceeds NVCC version
+_nvcc_version = cutlass.nvcc_version()
+_cuda_list = _cuda_version.split('.')
+_nvcc_list = _cuda_version.split('.')
+for val_cuda, val_nvcc in zip(_cuda_list, _nvcc_list):
+ if int(val_cuda) < int(val_nvcc):
+ raise Exception(f"Python CUDA version of {_cuda_version} must be greater than or equal to NVCC version of {_nvcc_version}")
+
+if len(_nvcc_list) > len(_cuda_list):
+ if len(_nvcc_list) != len(_cuda_list) + 1:
+ raise Exception(f"Malformatted NVCC version of {_nvcc_version}")
+ if _nvcc_list[:-1] == _cuda_list and int(_nvcc_list[-1]) != 0:
+ raise Exception(f"Python CUDA version of {_cuda_version} must be greater than or equal to NVCC version of {_nvcc_version}")
+
class KernelsForDataType:
"""
@@ -278,7 +292,7 @@ class ArchOptions:
]
manifest_args = cutlass_library.generator.define_parser().parse_args(args)
manifest = cutlass_library.manifest.Manifest(manifest_args)
- generate_function(manifest, _cuda_version)
+ generate_function(manifest, _nvcc_version)
if operation_kind not in manifest.operations:
# No kernels generated for this architecture, this could be because the CUDA
Hi: thanks for the diff. I can confirm that it fixes the issue on my side.
This issue has been labeled inactive-30d
due to no recent activity in the past 30 days. Please close this issue if no further response or action is needed. Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. This issue will be labeled inactive-90d
if there is no activity in the next 60 days.
Describe the bug The following sample program fails with the latest version of cutlass:
The error given is as follows:
Steps/Code to reproduce bug
-DCUTLASS_NVCC_ARCHS="90"
pip install .
in the root directory of cutlass.Expected behavior It looks like cutlass' python wrapper expects all H100 machines to want to run with CUDA 12+. I see, for instance, the following:
https://github.com/NVIDIA/cutlass/blob/3a8c01a18b24c35b216922481ac762496720a99d/python/cutlass/emit/pytorch.py#L691-L694
But that isn't necessarily the case.