JuliaGPU / CUDA.jl

CUDA programming in Julia.
https://juliagpu.org/cuda/
Other
1.22k stars 225 forks source link

Error when call sin in CUDA kernel after updating CUDA.jl to 5.5.2 #2515

Open 0samuraiE opened 2 months ago

0samuraiE commented 2 months ago

Describe the bug

As the title says, I get an error when I call sin function after updating CUDA.jl. It worked fine in the previous version. I tried some case and is seems that this error probably occurs only when all of the following conditions are met.

To reproduce

The Minimal Working Example (MWE) for this bug:

using CUDA
function test()
    sin(0)
    return nothing
end
CUDA.@cuda test()
Manifest.toml

``` julia_version = "1.10.5" manifest_format = "2.0" project_hash = "9ac2773310cf57c3389bea343cefd54d9ef9ded6" [[deps.AbstractFFTs]] deps = ["LinearAlgebra"] git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" version = "1.5.0" [deps.AbstractFFTs.extensions] AbstractFFTsChainRulesCoreExt = "ChainRulesCore" AbstractFFTsTestExt = "Test" [deps.AbstractFFTs.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[deps.Adapt]] deps = ["LinearAlgebra", "Requires"] git-tree-sha1 = "6a55b747d1812e699320963ffde36f1ebdda4099" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" version = "4.0.4" weakdeps = ["StaticArrays"] [deps.Adapt.extensions] AdaptStaticArraysExt = "StaticArrays" [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" version = "1.1.1" [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" [[deps.Atomix]] deps = ["UnsafeAtomics"] git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" version = "0.1.0" [[deps.BFloat16s]] deps = ["LinearAlgebra", "Printf", "Random", "Test"] git-tree-sha1 = "2c7cc21e8678eff479978a0a2ef5ce2f51b63dff" uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" version = "0.5.0" [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" [[deps.CEnum]] git-tree-sha1 = "389ad5c84de1ae7cf0e28e381131c98ea87d54fc" uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" version = "0.5.0" [[deps.CUDA]] deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "Crayons", "DataFrames", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LLVMLoopInfo", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "NVTX", "Preferences", "PrettyTables", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "StaticArrays", "Statistics", "demumble_jll"] git-tree-sha1 = "e0725a467822697171af4dae15cec10b4fc19053" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" version = "5.5.2" [deps.CUDA.extensions] ChainRulesCoreExt = "ChainRulesCore" EnzymeCoreExt = "EnzymeCore" SpecialFunctionsExt = "SpecialFunctions" [deps.CUDA.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b" [[deps.CUDA_Driver_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "ccd1e54610c222fadfd4737dac66bff786f63656" uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc" version = "0.10.3+0" [[deps.CUDA_Runtime_Discovery]] deps = ["Libdl"] git-tree-sha1 = "33576c7c1b2500f8e7e6baa082e04563203b3a45" uuid = "1af6417a-86b4-443c-805f-a4643ffb695f" version = "0.3.5" [[deps.CUDA_Runtime_jll]] deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] git-tree-sha1 = "e43727b237b2879a34391eeb81887699a26f8f2f" uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" version = "0.15.3+0" [[deps.ColorTypes]] deps = ["FixedPointNumbers", "Random"] git-tree-sha1 = "b10d0b65641d57b8b4d5e234446582de5047050d" uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" version = "0.11.5" [[deps.Colors]] deps = ["ColorTypes", "FixedPointNumbers", "Reexport"] git-tree-sha1 = "362a287c3aa50601b0bc359053d5c2468f0e7ce0" uuid = "5ae59095-9a9b-59fe-a467-6f913c188581" version = "0.12.11" [[deps.Compat]] deps = ["TOML", "UUIDs"] git-tree-sha1 = "8ae8d32e09f0dcf42a36b90d4e17f5dd2e4c4215" uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" version = "4.16.0" weakdeps = ["Dates", "LinearAlgebra"] [deps.Compat.extensions] CompatLinearAlgebraExt = "LinearAlgebra" [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" version = "1.1.1+0" [[deps.Crayons]] git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" version = "4.1.1" [[deps.DataAPI]] git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" version = "1.16.0" [[deps.DataFrames]] deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] git-tree-sha1 = "fb61b4812c49343d7ef0b533ba982c46021938a6" uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" version = "1.7.0" [[deps.DataStructures]] deps = ["Compat", "InteractiveUtils", "OrderedCollections"] git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82" uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" version = "0.18.20" [[deps.DataValueInterfaces]] git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" version = "1.0.0" [[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" [[deps.Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" [[deps.DocStringExtensions]] deps = ["LibGit2"] git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.9.3" [[deps.Downloads]] deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" version = "1.6.0" [[deps.ExprTools]] git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" version = "0.1.10" [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" [[deps.FixedPointNumbers]] deps = ["Statistics"] git-tree-sha1 = "05882d6995ae5c12bb5f36dd2ed3f61c98cbb172" uuid = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" version = "0.8.5" [[deps.Future]] deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" [[deps.GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] git-tree-sha1 = "62ee71528cca49be797076a76bdc654a170a523e" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "10.3.1" [[deps.GPUArraysCore]] deps = ["Adapt"] git-tree-sha1 = "ec632f177c0d990e64d955ccc1b8c04c485a0950" uuid = "46192b85-c4d5-4398-a991-12ede77f4527" version = "0.1.6" [[deps.GPUCompiler]] deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "PrecompileTools", "Preferences", "Scratch", "Serialization", "TOML", "TimerOutputs", "UUIDs"] git-tree-sha1 = "1d6f290a5eb1201cd63574fbc4440c788d5cb38f" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" version = "0.27.8" [[deps.Hwloc_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "dd3b49277ec2bb2c6b94eb1604d4d0616016f7a6" uuid = "e33a78d0-f292-5ffc-b300-72abe9b543c8" version = "2.11.2+0" [[deps.InlineStrings]] git-tree-sha1 = "45521d31238e87ee9f9732561bfee12d4eebd52d" uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" version = "1.4.2" [deps.InlineStrings.extensions] ArrowTypesExt = "ArrowTypes" ParsersExt = "Parsers" [deps.InlineStrings.weakdeps] ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" [[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[deps.InvertedIndices]] git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" version = "1.3.0" [[deps.IteratorInterfaceExtensions]] git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" uuid = "82899510-4779-5014-852e-03e436cf321d" version = "1.0.0" [[deps.JLLWrappers]] deps = ["Artifacts", "Preferences"] git-tree-sha1 = "f389674c99bfcde17dc57454011aa44d5a260a40" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" version = "1.6.0" [[deps.JuliaNVTXCallbacks_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "af433a10f3942e882d3c671aacb203e006a5808f" uuid = "9c1d0b0a-7046-5b2e-a33f-ea22f176ac7e" version = "0.2.1+0" [[deps.KernelAbstractions]] deps = ["Adapt", "Atomix", "InteractiveUtils", "MacroTools", "PrecompileTools", "Requires", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] git-tree-sha1 = "5126765c5847f74758c411c994312052eb7117ef" uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" version = "0.9.27" [deps.KernelAbstractions.extensions] EnzymeExt = "EnzymeCore" LinearAlgebraExt = "LinearAlgebra" SparseArraysExt = "SparseArrays" [deps.KernelAbstractions.weakdeps] EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[deps.LLVM]] deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Preferences", "Printf", "Requires", "Unicode"] git-tree-sha1 = "4ad43cb0a4bb5e5b1506e1d1f48646d7e0c80363" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" version = "9.1.2" weakdeps = ["BFloat16s"] [deps.LLVM.extensions] BFloat16sExt = "BFloat16s" [[deps.LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] git-tree-sha1 = "05a8bd5a42309a9ec82f700876903abce1017dd3" uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" version = "0.0.34+0" [[deps.LLVMLoopInfo]] git-tree-sha1 = "2e5c102cfc41f48ae4740c7eca7743cc7e7b75ea" uuid = "8b046642-f1f6-4319-8d3c-209ddc03c586" version = "1.0.0" [[deps.LaTeXStrings]] git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" version = "1.3.1" [[deps.LazyArtifacts]] deps = ["Artifacts", "Pkg"] uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" version = "0.6.4" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" version = "8.4.0+0" [[deps.LibGit2]] deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[deps.LibGit2_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" version = "1.6.4+0" [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" version = "1.11.0+1" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [[deps.LinearAlgebra]] deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[deps.MPI]] deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "PkgVersion", "PrecompileTools", "Requires", "Serialization", "Sockets"] git-tree-sha1 = "892676019c58f34e38743bc989b0eca5bce5edc5" uuid = "da04e1cc-30fd-572f-bb4f-1f8673147195" version = "0.20.22" [deps.MPI.extensions] AMDGPUExt = "AMDGPU" CUDAExt = "CUDA" [deps.MPI.weakdeps] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" [[deps.MPICH_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] git-tree-sha1 = "7715e65c47ba3941c502bffb7f266a41a7f54423" uuid = "7cb0a576-ebde-5e09-9194-50597f1243b4" version = "4.2.3+0" [[deps.MPIPreferences]] deps = ["Libdl", "Preferences"] git-tree-sha1 = "c105fe467859e7f6e9a852cb15cb4301126fac07" uuid = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267" version = "0.1.11" [[deps.MPItrampoline_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML"] git-tree-sha1 = "fde81c9f9c94fe5fbeaed7b3f1330305cf9a327c" uuid = "f1f71cc9-e9ae-5b93-9b94-4fe0e1ad3748" version = "5.5.0+0" [[deps.MacroTools]] deps = ["Markdown", "Random"] git-tree-sha1 = "2fa9ee3e63fd3a4f7a9a4f4744a52f4856de82df" uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" version = "0.5.13" [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" version = "2.28.2+1" [[deps.MicrosoftMPI_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "f12a29c4400ba812841c6ace3f4efbb6dbb3ba01" uuid = "9237b28f-5490-5468-be7b-bb81f5f5e6cf" version = "10.1.4+2" [[deps.Missings]] deps = ["DataAPI"] git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" version = "1.2.0" [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" version = "2023.1.10" [[deps.NVTX]] deps = ["Colors", "JuliaNVTXCallbacks_jll", "Libdl", "NVTX_jll"] git-tree-sha1 = "53046f0483375e3ed78e49190f1154fa0a4083a1" uuid = "5da4648a-3479-48b8-97b9-01cb529c0a1f" version = "0.3.4" [[deps.NVTX_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "ce3269ed42816bf18d500c9f63418d4b0d9f5a3b" uuid = "e98f9f5b-d649-5603-91fd-7774390e6439" version = "3.1.0+2" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" version = "1.2.0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" version = "0.3.23+4" [[deps.OpenMPI_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Hwloc_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "MPIPreferences", "TOML", "Zlib_jll"] git-tree-sha1 = "bfce6d523861a6c562721b262c0d1aaeead2647f" uuid = "fe0851c0-eecd-5654-98d4-656369965a5c" version = "5.0.5+0" [[deps.OrderedCollections]] git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" version = "1.6.3" [[deps.Pkg]] deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" version = "1.10.0" [[deps.PkgVersion]] deps = ["Pkg"] git-tree-sha1 = "f9501cc0430a26bc3d156ae1b5b0c1b47af4d6da" uuid = "eebad327-c553-4316-9ea0-9fa01ccd7688" version = "0.3.3" [[deps.PooledArrays]] deps = ["DataAPI", "Future"] git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" version = "1.4.3" [[deps.PrecompileTools]] deps = ["Preferences"] git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" version = "1.2.1" [[deps.Preferences]] deps = ["TOML"] git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" uuid = "21216c6a-2e73-6563-6e65-726566657250" version = "1.4.3" [[deps.PrettyTables]] deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] git-tree-sha1 = "1101cd475833706e4d0e7b122218257178f48f34" uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" version = "2.4.0" [[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" [[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[deps.Random]] deps = ["SHA"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[deps.Random123]] deps = ["Random", "RandomNumbers"] git-tree-sha1 = "4743b43e5a9c4a2ede372de7061eed81795b12e7" uuid = "74087812-796a-5b5d-8853-05524746bad3" version = "1.7.0" [[deps.RandomNumbers]] deps = ["Random"] git-tree-sha1 = "c6ec94d2aaba1ab2ff983052cf6a606ca5985902" uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" version = "1.6.0" [[deps.Reexport]] git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" uuid = "189a3867-3050-52da-a836-e630ba90ab69" version = "1.2.2" [[deps.Requires]] deps = ["UUIDs"] git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" uuid = "ae029012-a4dd-5104-9daa-d747884805df" version = "1.3.0" [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" version = "0.7.0" [[deps.Scratch]] deps = ["Dates"] git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" uuid = "6c6a2e73-6563-6170-7368-637461726353" version = "1.2.1" [[deps.SentinelArrays]] deps = ["Dates", "Random"] git-tree-sha1 = "ff11acffdb082493657550959d4feb4b6149e73a" uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" version = "1.4.5" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" [[deps.SortingAlgorithms]] deps = ["DataStructures"] git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" version = "1.2.1" [[deps.SparseArrays]] deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" version = "1.10.0" [[deps.StaticArrays]] deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] git-tree-sha1 = "eeafab08ae20c62c44c8399ccb9354a04b80db50" uuid = "90137ffa-7385-5640-81b9-e52037218182" version = "1.9.7" [deps.StaticArrays.extensions] StaticArraysChainRulesCoreExt = "ChainRulesCore" StaticArraysStatisticsExt = "Statistics" [deps.StaticArrays.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [[deps.StaticArraysCore]] git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" version = "1.4.3" [[deps.Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" version = "1.10.0" [[deps.StringManipulation]] deps = ["PrecompileTools"] git-tree-sha1 = "a6b1675a536c5ad1a60e5a5153e1fee12eb146e3" uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" version = "0.4.0" [[deps.SuiteSparse_jll]] deps = ["Artifacts", "Libdl", "libblastrampoline_jll"] uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" version = "7.2.1+1" [[deps.TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" version = "1.0.3" [[deps.TableTraits]] deps = ["IteratorInterfaceExtensions"] git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" version = "1.0.1" [[deps.Tables]] deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "OrderedCollections", "TableTraits"] git-tree-sha1 = "598cd7c1f68d1e205689b1c2fe65a9f85846f297" uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" version = "1.12.0" [[deps.Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" version = "1.10.0" [[deps.Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[deps.TimerOutputs]] deps = ["ExprTools", "Printf"] git-tree-sha1 = "5a13ae8a41237cff5ecf34f73eb1b8f42fff6531" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" version = "0.5.24" [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[deps.UnsafeAtomics]] git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" version = "0.2.1" [[deps.UnsafeAtomicsLLVM]] deps = ["LLVM", "UnsafeAtomics"] git-tree-sha1 = "2d17fabcd17e67d7625ce9c531fb9f40b7c42ce4" uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" version = "0.2.1" [[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" version = "1.2.13+1" [[deps.demumble_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] git-tree-sha1 = "6498e3581023f8e530f34760d18f75a69e3a4ea8" uuid = "1e29f10c-031c-5a83-9565-69cddfc27673" version = "1.3.0+0" [[deps.libblastrampoline_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" version = "5.11.0+0" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" version = "1.52.0+1" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" version = "17.4.0+2" ```

Expected behavior

Error should not occur.

Version info

Details on Julia:

Julia Version 1.10.5
Commit 6f3fdf7b362 (2024-08-27 14:19 UTC)
Build Info:
  Official https://julialang.org/ release
Platform Info:
  OS: Linux (x86_64-linux-gnu)
  CPU: 24 × 13th Gen Intel(R) Core(TM) i7-13700K
  WORD_SIZE: 64
  LIBM: libopenlibm
  LLVM: libLLVM-15.0.7 (ORCJIT, goldmont)
Threads: 24 default, 0 interactive, 12 GC (on 24 virtual cores)
Environment:
  JULIA_PKG_USE_CLI_GIT = true
  LD_LIBRARY_PATH = 
  JULIA_EDITOR = code
  JULIA_NUM_THREADS = 

Details on CUDA:

CUDA runtime 12.6, local installation
CUDA driver 12.6
NVIDIA driver 560.35.3

CUDA libraries: 
- CUBLAS: 12.6.1
- CURAND: 10.3.7
- CUFFT: 11.2.6
- CUSOLVER: 11.6.4
- CUSPARSE: 12.5.3
- CUPTI: 2024.3.1 (API 24.0.0)
- NVML: 12.0.0+560.35.3

Julia packages: 
- CUDA: 5.5.2
- CUDA_Driver_jll: 0.10.3+0
- CUDA_Runtime_jll: 0.15.3+0
- CUDA_Runtime_Discovery: 0.3.5

Toolchain:
- Julia: 1.10.5
- LLVM: 15.0.7

Preferences:
- CUDA_Runtime_jll.local: true

1 device:
  0: NVIDIA GeForce RTX 4070 Ti (sm_89, 10.235 GiB / 11.994 GiB available)

Additional context Actual error and back-trace is

ERROR: InvalidIRError: compiling MethodInstance for test() resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to __nv_sin)
Stacktrace:
 [1] #sin
   @ ~/.julia/packages/CUDA/2kjXI/src/device/intrinsics/math.jl:20
 [2] sin
   @ ./math.jl:1578
 [3] test
   @ ~/dev/bug-report/test.jl:4
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:382 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:381 [inlined]
  [5] #emit_llvm#188
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/utils.jl:108
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/utils.jl:106 [inlined]
  [7] 
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:100
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:82 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:79
 [10] compile
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:74 [inlined]
 [11] #1145
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:250 [inlined]
 [12] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{…}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:34
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:25
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:249
 [15] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:237
 [16] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
 [17] macro expansion
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:380 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::typeof(test), tt::Type{Tuple{}}; kwargs::@Kwargs{})
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:375
 [20] cufunction(f::typeof(test), tt::Type{Tuple{}})
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:372
 [21] top-level scope
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:112
Some type information was truncated. Use `show(err)` to see complete types.
maleadt commented 2 months ago

The bug template asks for quite some more information, such as the output of CUDA.versioninfo(). At the very least, you should include the actual error and back-trace, because such a simple kernel is obviously expected to work (and covered by CI, so does work in general).

0samuraiE commented 2 months ago

It appears that it was not displayed due to a syntax error in the markdown. The error has been added. Thank you for your consideration.

maleadt commented 2 months ago

Ah sorry, I had edited your post to fix one syntax issue but hadn't spotted the content at the bottom.

Can you try without a local installation? I'd guess that something is off with the libdevice that's being linked here, which should provide a definition of __nv_sin.

0samuraiE commented 2 months ago

Thank you. Without a local installation, this kernel works. Also,

maleadt commented 2 months ago

So an artifact installation works in any case?

Could you upload the following file here (you'll probably have to zip it for GitHub to allow an upload): CUDA.CUDA_Runtime.libdevice

0samuraiE commented 2 months ago

Thank you.

So an artifact installation works in any case?

Probably yes.

Could you upload the following file here (you'll probably have to zip it for GitHub to allow an upload): CUDA.CUDA_Runtime.libdevice

I uploaded libdevice.10.bc.zip.

maleadt commented 2 months ago

Probably yes.

What do you mean, probably? Does or doesn't it work in the two scenario's you've outlined here (with and without --check-bounds)?

I uploaded libdevice.10.bc.zip.

Are you certain that's the one from your local toolkit? It does contain a valid definition of __nv_sin.

0samuraiE commented 2 months ago

Thank you.

What do you mean, probably?

Sorry to be misleading. Absolutely yes.

Are you certain that's the one from your local toolkit? It does contain a valid definition of __nv_sin.

CUDA.CUDA_Runtime.libdevice is "/opt/cuda/nvvm/libdevice/libdevice.10.bc" so I sent it. I don't know how to be sure. But I am sending you the results of the REPL run for your reference.

julia> using CUDA

julia> CUDA.versioninfo()
CUDA runtime 12.6, local installation
CUDA driver 12.6
NVIDIA driver 560.35.3

CUDA libraries: 
- CUBLAS: 12.6.1
- CURAND: 10.3.7
- CUFFT: 11.2.6
- CUSOLVER: 11.6.4
- CUSPARSE: 12.5.3
- CUPTI: 2024.3.1 (API 24.0.0)
- NVML: 12.0.0+560.35.3

Julia packages: 
- CUDA: 5.5.2
- CUDA_Driver_jll: 0.10.3+0
- CUDA_Runtime_jll: 0.15.3+0
- CUDA_Runtime_Discovery: 0.3.5

Toolchain:
- Julia: 1.10.5
- LLVM: 15.0.7

Preferences:
- CUDA_Runtime_jll.local: true

1 device:
  0: NVIDIA GeForce RTX 4070 Ti (sm_89, 10.878 GiB / 11.994 GiB available)

julia> CUDA.CUDA_Runtime.libdevice
"/opt/cuda/nvvm/libdevice/libdevice.10.bc"

julia> function test()
           sin(0)
           return nothing
       end
test (generic function with 1 method)

julia> CUDA.@cuda test()
ERROR: InvalidIRError: compiling MethodInstance for test() resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to __nv_sin)
Stacktrace:
 [1] #sin
   @ ~/.julia/packages/CUDA/2kjXI/src/device/intrinsics/math.jl:20
 [2] sin
   @ ./math.jl:1578
 [3] test
   @ ./REPL[4]:2
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:382 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:381 [inlined]
  [5] #emit_llvm#188
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/utils.jl:108
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/utils.jl:106 [inlined]
  [7] 
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:100
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:82 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:79
 [10] compile
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:74 [inlined]
 [11] #1145
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:250 [inlined]
 [12] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{…}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:34
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:25
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:249
 [15] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:237
 [16] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
 [17] macro expansion
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:380 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::typeof(test), tt::Type{Tuple{}}; kwargs::@Kwargs{})
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:375
 [20] cufunction(f::typeof(test), tt::Type{Tuple{}})
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:372
 [21] top-level scope
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:112
Some type information was truncated. Use `show(err)` to see complete types.

julia> 
maleadt commented 1 month ago

What do you mean, probably?

Sorry to be misleading. Absolutely yes.

OK, so at least you have a workaround.


The issue doesn't seem to be with the device library; using yours on my system works just fine.

Could you do @device_code dir="..." @cuda ... in both cases (once using the local toolkit, once using artifacts, but both cases using the --check-bounds setting that is required to reproduce the issue with the local toolkit) and zip + upload the results here?

julia> @device_code dir="/tmp/foo" @cuda test()

shell> ls /tmp/foo
test_1.asm  test_1.lowered.jl  test_1.opt.ll  test_1.typed.jl  test_1.unopt.ll
0samuraiE commented 1 month ago

Thank you. I tried but failed when --check-bounds=no and with local toolkit. I will send you the ones that did not show any errors. report.zip

julia> @device_code dir="/tmp/foo" @cuda test()
ERROR: InvalidIRError: compiling MethodInstance for test() resulted in invalid LLVM IR
Reason: unsupported call to an unknown function (call to __nv_sin)
Stacktrace:
 [1] #sin
   @ ~/.julia/packages/CUDA/2kjXI/src/device/intrinsics/math.jl:20
 [2] sin
   @ ./math.jl:1578
 [3] test
   @ ~/dev/bug-report/test.jl:7
Hint: catch this exception as `err` and call `code_typed(err; interactive = true)` to introspect the erronous code with Cthulhu.jl
Stacktrace:
  [1] check_ir(job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams}, args::LLVM.Module)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/validation.jl:147
  [2] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:382 [inlined]
  [3] macro expansion
    @ ~/.julia/packages/TimerOutputs/Lw5SP/src/TimerOutput.jl:253 [inlined]
  [4] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:381 [inlined]
  [5] #emit_llvm#188
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/utils.jl:108
  [6] emit_llvm
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/utils.jl:106 [inlined]
  [7] 
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:100
  [8] codegen
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:82 [inlined]
  [9] compile(target::Symbol, job::GPUCompiler.CompilerJob; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:79
 [10] compile
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:74 [inlined]
 [11] #1145
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:250 [inlined]
 [12] JuliaContext(f::CUDA.var"#1145#1148"{GPUCompiler.CompilerJob{…}}; kwargs::@Kwargs{})
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:34
 [13] JuliaContext(f::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/driver.jl:25
 [14] compile(job::GPUCompiler.CompilerJob)
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/compilation.jl:249
 [15] actual_compilation(cache::Dict{…}, src::Core.MethodInstance, world::UInt64, cfg::GPUCompiler.CompilerConfig{…}, compiler::typeof(CUDA.compile), linker::typeof(CUDA.link))
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:237
 [16] cached_compilation(cache::Dict{…}, src::Core.MethodInstance, cfg::GPUCompiler.CompilerConfig{…}, compiler::Function, linker::Function)
    @ GPUCompiler ~/.julia/packages/GPUCompiler/2CW9L/src/execution.jl:151
 [17] macro expansion
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:380 [inlined]
 [18] macro expansion
    @ ./lock.jl:267 [inlined]
 [19] cufunction(f::typeof(test), tt::Type{Tuple{}}; kwargs::@Kwargs{})
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:375
 [20] cufunction(f::typeof(test), tt::Type{Tuple{}})
    @ CUDA ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:372
 [21] macro expansion
    @ ~/.julia/packages/CUDA/2kjXI/src/compiler/execution.jl:112 [inlined]
 [22] macro expansion
    @ ~/.julia/packages/GPUCompiler/2CW9L/src/reflection.jl:185 [inlined]
 [23] top-level scope
    @ REPL[2]:1
Some type information was truncated. Use `show(err)` to see complete types.