Open yuvalwas opened 1 year ago
This throws an extremely long message that took quite some time to finish
That's ultimately due to the CUDA driver not handling the amount of output we throw at it. It shouldn't crash, but we probably also shouldn't be emitting that much output in the first place.
Maybe we could have a global atomic flag that keeps track of how many exceptions have been reported, so that the signal_exception
method can bail out.
Hi sir, I would be happy to address this issue , According to me the bug could be fixed by following these things for the below provide method. --> adding a global flag variable --> increment flag with the number of exceptions received --> if it crosses a certain thresold value , we exist the method
https://github.com/JuliaGPU/CUDA.jl/blob/master/src/device/runtime.jl#L29-L48
if you agree I can start a PR on it.
Yes, that may work. Also, no need to ask for permission to work on issues!
https://developer.nvidia.com/blog/asynchronous-error-reporting-when-printf-just-wont-do/ may contain some useful information for this
Describe the bug
I'm not sure whether this is helpful, but the error message explicitly asked to be submitted, so here it is. I produced it by mistake.
To reproduce
This throws an extremely long message that took quite some time to finish, VSCode could not fit it all, so this does not include the beginning of the message. Each of the below lines was repeated many times.
And finally
Manifest.toml
``` # This file is machine-generated - editing it directly is not advised julia_version = "1.9.2" manifest_format = "2.0" project_hash = "4f2a4aecbf90fe45c3ba83d86b99f39c4bcb13fb" [[deps.AbstractFFTs]] deps = ["LinearAlgebra"] git-tree-sha1 = "d92ad398961a3ed262d8bf04a1a2b8340f915fef" uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" version = "1.5.0" [deps.AbstractFFTs.extensions] AbstractFFTsChainRulesCoreExt = "ChainRulesCore" AbstractFFTsTestExt = "Test" [deps.AbstractFFTs.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[deps.Adapt]] deps = ["LinearAlgebra", "Requires"] git-tree-sha1 = "76289dc51920fdc6e0013c872ba9551d54961c24" uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" version = "3.6.2" weakdeps = ["StaticArrays"] [deps.Adapt.extensions] AdaptStaticArraysExt = "StaticArrays" [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" version = "1.1.1" [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" [[deps.Atomix]] deps = ["UnsafeAtomics"] git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" version = "0.1.0" [[deps.BFloat16s]] deps = ["LinearAlgebra", "Printf", "Random", "Test"] git-tree-sha1 = "dbf84058d0a8cbbadee18d25cf606934b22d7c66" uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" version = "0.4.2" [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" [[deps.BenchmarkTools]] deps = ["JSON", "Logging", "Printf", "Profile", "Statistics", "UUIDs"] git-tree-sha1 = "d9a9701b899b30332bbcb3e1679c41cce81fb0e8" uuid = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" version = "1.3.2" [[deps.CEnum]] git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" version = "0.4.2" [[deps.CUDA]] deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CUDA_Driver_jll", "CUDA_Runtime_Discovery", "CUDA_Runtime_jll", "ExprTools", "GPUArrays", "GPUCompiler", "KernelAbstractions", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Preferences", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "UnsafeAtomicsLLVM"] git-tree-sha1 = "968c1365e2992824c3e7a794e30907483f8469a9" uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" version = "4.4.1" [[deps.CUDA_Driver_jll]] deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg"] git-tree-sha1 = "498f45593f6ddc0adff64a9310bb6710e851781b" uuid = "4ee394cb-3365-5eb0-8335-949819d2adfc" version = "0.5.0+1" [[deps.CUDA_Runtime_Discovery]] deps = ["Libdl"] git-tree-sha1 = "bcc4a23cbbd99c8535a5318455dcf0f2546ec536" uuid = "1af6417a-86b4-443c-805f-a4643ffb695f" version = "0.2.2" [[deps.CUDA_Runtime_jll]] deps = ["Artifacts", "CUDA_Driver_jll", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] git-tree-sha1 = "5248d9c45712e51e27ba9b30eebec65658c6ce29" uuid = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" version = "0.6.0+0" [[deps.CommonSubexpressions]] deps = ["MacroTools", "Test"] git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" uuid = "bbf7d656-a473-5ed7-a52c-81e309532950" version = "0.3.0" [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" version = "1.0.5+0" [[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" [[deps.DiffResults]] deps = ["StaticArraysCore"] git-tree-sha1 = "782dd5f4561f5d267313f23853baaaa4c52ea621" uuid = "163ba53b-c6d8-5494-b064-1a9d43ac40c5" version = "1.1.0" [[deps.DiffRules]] deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] git-tree-sha1 = "23163d55f885173722d1e4cf0f6110cdbaf7e272" uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" version = "1.15.1" [[deps.DocStringExtensions]] deps = ["LibGit2"] git-tree-sha1 = "2fb1e02f2b635d0845df5d7c167fec4dd739b00d" uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" version = "0.9.3" [[deps.Downloads]] deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" version = "1.6.0" [[deps.ExprTools]] git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" version = "0.1.10" [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" [[deps.ForwardDiff]] deps = ["CommonSubexpressions", "DiffResults", "DiffRules", "LinearAlgebra", "LogExpFunctions", "NaNMath", "Preferences", "Printf", "Random", "SpecialFunctions"] git-tree-sha1 = "00e252f4d706b3d55a8863432e742bf5717b498d" uuid = "f6369f11-7733-5829-9624-2563aa707210" version = "0.10.35" weakdeps = ["StaticArrays"] [deps.ForwardDiff.extensions] ForwardDiffStaticArraysExt = "StaticArrays" [[deps.GPUArrays]] deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] git-tree-sha1 = "2e57b4a4f9cc15e85a24d603256fe08e527f48d1" uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" version = "8.8.1" [[deps.GPUArraysCore]] deps = ["Adapt"] git-tree-sha1 = "2d6ca471a6c7b536127afccfa7564b5b39227fe0" uuid = "46192b85-c4d5-4398-a991-12ede77f4527" version = "0.1.5" [[deps.GPUCompiler]] deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "Scratch", "TimerOutputs", "UUIDs"] git-tree-sha1 = "72b2e3c2ba583d1a7aa35129e56cf92e07c083e3" uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" version = "0.21.4" [[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" [[deps.IrrationalConstants]] git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2" uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" version = "0.2.2" [[deps.JLLWrappers]] deps = ["Preferences"] git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" version = "1.4.1" [[deps.JSON]] deps = ["Dates", "Mmap", "Parsers", "Unicode"] git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" version = "0.21.4" [[deps.KernelAbstractions]] deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "PrecompileTools", "Requires", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] git-tree-sha1 = "4c5875e4c228247e1c2b087669846941fb6e0118" uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" version = "0.9.8" [deps.KernelAbstractions.extensions] EnzymeExt = "EnzymeCore" [deps.KernelAbstractions.weakdeps] EnzymeCore = "f151be2c-9106-41f4-ab19-57ee4f262869" [[deps.LLVM]] deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] git-tree-sha1 = "8695a49bfe05a2dc0feeefd06b4ca6361a018729" uuid = "929cbde3-209d-540e-8aea-75f648917ca0" version = "6.1.0" [[deps.LLVMExtra_jll]] deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "TOML"] git-tree-sha1 = "c35203c1e1002747da220ffc3c0762ce7754b08c" uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" version = "0.0.23+0" [[deps.LazyArtifacts]] deps = ["Artifacts", "Pkg"] uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" version = "0.6.3" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" version = "7.84.0+0" [[deps.LibGit2]] deps = ["Base64", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" version = "1.10.2+0" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" [[deps.LinearAlgebra]] deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" [[deps.LogExpFunctions]] deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"] git-tree-sha1 = "c3ce8e7420b3a6e071e0fe4745f5d4300e37b13f" uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" version = "0.3.24" [deps.LogExpFunctions.extensions] LogExpFunctionsChainRulesCoreExt = "ChainRulesCore" LogExpFunctionsChangesOfVariablesExt = "ChangesOfVariables" LogExpFunctionsInverseFunctionsExt = "InverseFunctions" [deps.LogExpFunctions.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" ChangesOfVariables = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" InverseFunctions = "3587e190-3f89-42d0-90ee-14403ec27112" [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" [[deps.MacroTools]] deps = ["Markdown", "Random"] git-tree-sha1 = "42324d08725e200c23d4dfb549e0d5d89dede2d2" uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" version = "0.5.10" [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" version = "2.28.2+0" [[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" version = "2022.10.11" [[deps.NaNMath]] deps = ["OpenLibm_jll"] git-tree-sha1 = "0877504529a3e5c3343c6f8b4c0381e57e4387e4" uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" version = "1.0.2" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" version = "1.2.0" [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" version = "0.3.21+4" [[deps.OpenLibm_jll]] deps = ["Artifacts", "Libdl"] uuid = "05823500-19ac-5b8b-9628-191a04bc5112" version = "0.8.1+0" [[deps.OpenSpecFun_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" version = "0.5.5+0" [[deps.Parsers]] deps = ["Dates", "PrecompileTools", "UUIDs"] git-tree-sha1 = "a5aef8d4a6e8d81f171b2bd4be5265b01384c74c" uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" version = "2.5.10" [[deps.Pkg]] deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" version = "1.9.2" [[deps.PrecompileTools]] deps = ["Preferences"] git-tree-sha1 = "259e206946c293698122f63e2b513a7c99a244e8" uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" version = "1.1.1" [[deps.Preferences]] deps = ["TOML"] git-tree-sha1 = "7eb1686b4f04b82f96ed7a4ea5890a4f0c7a09f1" uuid = "21216c6a-2e73-6563-6e65-726566657250" version = "1.4.0" [[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" [[deps.Profile]] deps = ["Printf"] uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" [[deps.REPL]] deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" [[deps.Random]] deps = ["SHA", "Serialization"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" [[deps.Random123]] deps = ["Random", "RandomNumbers"] git-tree-sha1 = "552f30e847641591ba3f39fd1bed559b9deb0ef3" uuid = "74087812-796a-5b5d-8853-05524746bad3" version = "1.6.1" [[deps.RandomNumbers]] deps = ["Random", "Requires"] git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111" uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" version = "1.5.3" [[deps.Reexport]] git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" uuid = "189a3867-3050-52da-a836-e630ba90ab69" version = "1.2.2" [[deps.Requires]] deps = ["UUIDs"] git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" uuid = "ae029012-a4dd-5104-9daa-d747884805df" version = "1.3.0" [[deps.SHA]] uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" version = "0.7.0" [[deps.Scratch]] deps = ["Dates"] git-tree-sha1 = "30449ee12237627992a99d5e30ae63e4d78cd24a" uuid = "6c6a2e73-6563-6170-7368-637461726353" version = "1.2.0" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" [[deps.SparseArrays]] deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"] uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" [[deps.SpecialFunctions]] deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] git-tree-sha1 = "7beb031cf8145577fbccacd94b8a8f4ce78428d3" uuid = "276daf66-3868-5448-9aa4-cd146d93841b" version = "2.3.0" [deps.SpecialFunctions.extensions] SpecialFunctionsChainRulesCoreExt = "ChainRulesCore" [deps.SpecialFunctions.weakdeps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" [[deps.StaticArrays]] deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"] git-tree-sha1 = "8982b3607a212b070a5e46eea83eb62b4744ae12" uuid = "90137ffa-7385-5640-81b9-e52037218182" version = "1.5.25" [[deps.StaticArraysCore]] git-tree-sha1 = "6b7ba252635a5eff6a0b0664a41ee140a1c9e72a" uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" version = "1.4.0" [[deps.Statistics]] deps = ["LinearAlgebra", "SparseArrays"] uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" version = "1.9.0" [[deps.SuiteSparse_jll]] deps = ["Artifacts", "Libdl", "Pkg", "libblastrampoline_jll"] uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c" version = "5.10.1+6" [[deps.TOML]] deps = ["Dates"] uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" version = "1.0.3" [[deps.Tar]] deps = ["ArgTools", "SHA"] uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" version = "1.10.0" [[deps.Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [[deps.TimerOutputs]] deps = ["ExprTools", "Printf"] git-tree-sha1 = "f548a9e9c490030e545f72074a41edfd0e5bcdd7" uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" version = "0.5.23" [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" [[deps.UnsafeAtomics]] git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" version = "0.2.1" [[deps.UnsafeAtomicsLLVM]] deps = ["LLVM", "UnsafeAtomics"] git-tree-sha1 = "323e3d0acf5e78a56dfae7bd8928c989b4f3083e" uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" version = "0.1.3" [[deps.Zlib_jll]] deps = ["Libdl"] uuid = "83775a58-1f1d-513f-b197-d71354ab007a" version = "1.2.13+0" [[deps.libblastrampoline_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" version = "5.8.0+0" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" version = "1.48.0+0" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" version = "17.4.0+0" ```
Expected behavior
Same as if you drop
CUDA
in the above code.Version info
Details on Julia:
Details on CUDA: