terryzhao127 / tensorflow-windows-build-script

A script to automate building Tensorflow on Windows and solve some problems
GNU General Public License v3.0
102 stars 35 forks source link

Crash in libtensorflow_cc.dll, no PDB symbols and I can't seem to generate them #23

Open Scorillo47 opened 5 years ago

Scorillo47 commented 5 years ago

Hi all,

When attempting to load my custom op I get a crash somewhere in libtensorflow_cc.dll.

I don't have the PDB debugger symbols to debug it, however, and all my attempts to generate PDBs using this build script have failed. Bazel seems to fail after a few minutes if I add --compilation_mode=dbg See https://github.com/guikarist/tensorflow-windows-build-script/issues/12 for my notes on this

Note: based on the stack frames, the crash appears to be related with a C++ exception caught by the VCRUNTIME140.dll

Note: I verified that the CUDA runtime is in the path

Child-SP RetAddr Call Site

00 000000886bde94d0 00007ff8d3472c6f KERNELBASE!RaiseException+0x68 01 000000886bde95b0 00007ff8e6939bc3 VCRUNTIME140!CxxCallCatchBlock+0x17f [f:\dd\vctools\crt\vcruntime\src\eh\frame.cpp @ 1186] 02 000000886bde9660 00007ff84c945151 ntdll!RcConsolidateFrames+0x3 03 000000886bdeba90 00007ff8d347c330 libtensorflow_cc!TF_DeletePRunHandle+0xbf01 04 000000886bdebac0 00007ff8d3472c01 VCRUNTIME140!_CallSettingFrame+0x20 [f:\dd\vctools\crt\vcruntime\src\eh\amd64\handlers.asm @ 50] 05 000000886bdebaf0 00007ff8e6939bc3 VCRUNTIME140!__CxxCallCatchBlock+0x111 [f:\dd\vctools\crt\vcruntime\src\eh\frame.cpp @ 1180] 06 000000886bdebba0 00007ff84950208b ntdll!RcConsolidateFrames+0x3 07 000000886bdedfc0 00007ff8495025e6 libtensorflow_cc!TFE_TensorHandleTensorDebugInfo+0x6f5b 08 000000886bdee020 00007ff8494fb7c6 libtensorflow_cc!TFE_TensorHandleTensorDebugInfo+0x74b6 09 000000886bdee050 00007ff8494ffcff libtensorflow_cc!TFE_TensorHandleTensorDebugInfo+0x696 0a 000000886bdee090 00007ff84c6a7b03 libtensorflow_cc!TFE_TensorHandleTensorDebugInfo+0x4bcf 0b 000000886bdee0d0 00007ff864b7a12d libtensorflow_cc!tensorflow::OpDefBuilder::OpDefBuilder+0xf3 0c 000000886bdee130 00007ff864b73bff tf_ZeroOut_gpu3!tensorflow::register_op::OpDefBuilderWrapper<1>::OpDefBuilderWrapper<1>+0x6d [f:_git\tf_custom_op_windows\external\tf_lib_site-packages_tensorflow_include\tensorflow\core\framework\op.h @ 211] 0d 000000886bdee290 00007ff8648d4219 tf_ZeroOut_gpu3!dynamic initializer for 'register_op0''+0xef [f:\_git\tf_custom_op_windows\tf.zeroout.gpu3\example.cc @ 11] 0e 000000886bdee770 00007ff864b829cd ucrtbased!_initterm+0x59 [minkernel\crts\ucrt\src\appcrt\startup\initterm.cpp @ 22] 0f 000000886bdee7b0 00007ff864b828a7 tf_ZeroOut_gpu3!dllmain_crt_process_attach+0xbd [f:\dd\vctools\crt\vcstartup\src\startup\dll_dllmain.cpp @ 67] 10 000000886bdee800 00007ff864b82bc5 tf_ZeroOut_gpu3!dllmain_crt_dispatch+0x47 [f:\dd\vctools\crt\vcstartup\src\startup\dll_dllmain.cpp @ 133] 11 000000886bdee840 00007ff864b82dc1 tf_ZeroOut_gpu3!dllmain_dispatch+0x75 [f:\dd\vctools\crt\vcstartup\src\startup\dll_dllmain.cpp @ 190] 12 000000886bdee890 00007ff8e68b389f tf_ZeroOut_gpu3!_DllMainCRTStartup+0x31 [f:\dd\vctools\crt\vcstartup\src\startup\dll_dllmain.cpp @ 249] 13 000000886bdee8c0 00007ff8e69010de ntdll!LdrpCallInitRoutine+0x4b 14 000000886bdee920 00007ff8e6900f2b ntdll!LdrpInitializeNode+0x15a 15 000000886bdeea40 00007ff8e68c8b79 ntdll!LdrpInitializeGraphRecurse+0x73 16 000000886bdeea80 00007ff8e68b3e4d ntdll!LdrpPrepareModuleForExecution+0xc5 17 000000886bdeeac0 00007ff8e68b042d ntdll!LdrpLoadDllInternal+0x18d 18 000000886bdeeb40 00007ff8e68acf4c ntdll!LdrpLoadDll+0xf1 19 000000886bdeece0 00007ff8e3534c6f ntdll!LdrLoadDll+0x8c 1a 000000886bdeede0 00007ff890e606bd KERNELBASE!LoadLibraryExW+0x16f 1b 000000886bdeee50 00007ff890db2bef _pywrap_tensorflow_internal!tensorflow::Env::Default+0x80d 1c 000000886bdeef10 00007ff88dc5084f _pywrap_tensorflow_internal!tensorflow::LoadLibrary+0x59f 1d 000000886bdef0d0 00007ff88da2d224 _pywrap_tensorflow_internal!TF_LoadLibrary+0x3f 1e 000000886bdef110 000000007262ae05 _pywrap_tensorflow_internal+0x26d224 1f 000000886bdef170 000000007262b4d9 python36!_PyCFunction_FastCallDict+0x195 [c:\ci\python_1546215302383\work\objects\methodobject.c @ 236] 20 (Inline Function) ---------------- python36!_PyCFunction_FastCallKeywords+0x1d [c:\ci\python_1546215302383\work\objects\methodobject.c @ 294] 21 000000886bdef1f0 000000007262bf0e python36!call_function+0x89 [c:\ci\python_1546215302383\work\python\ceval.c @ 4851] 22 000000886bdef2d0 000000007262b6a0 python36!_PyEval_EvalFrameDefault+0x2ae [c:\ci\python_1546215302383\work\python\ceval.c @ 3336] 23 (Inline Function) ---------------- python36!PyEval_EvalFrameEx+0x13 [c:\ci\python_1546215302383\work\python\ceval.c @ 754] 24 (Inline Function) ---------------- python36!_PyFunction_FastCall+0x5e [c:\ci\python_1546215302383\work\python\ceval.c @ 4933] 25 (Inline Function) ---------------- python36!fast_function+0xa9 [c:\ci\python_1546215302383\work\python\ceval.c @ 4968] 26 000000886bdef3e0 000000007262bf0e python36!call_function+0x250 [c:\ci\python_1546215302383\work\python\ceval.c @ 4872] 27 000000886bdef4c0 00000000726291f8 python36!_PyEval_EvalFrameDefault+0x2ae [c:\ci\python_1546215302383\work\python\ceval.c @ 3336] 28 (Inline Function) ---------------- python36!PyEval_EvalFrameEx+0x14 [c:\ci\python_1546215302383\work\python\ceval.c @ 754] 29 000000886bdef5d0 00000000725f5973 python36!_PyEval_EvalCodeWithName+0x198 [c:\ci\python_1546215302383\work\python\ceval.c @ 4166] 2a 000000886bdef680 00000000725f58d1 python36!PyEval_EvalCodeEx+0x9b [c:\ci\python_1546215302383\work\python\ceval.c @ 4194] 2b 000000886bdef710 00000000725f587b python36!PyEval_EvalCode+0x2d [c:\ci\python_1546215302383\work\python\ceval.c @ 737] 2c 000000886bdef780 000000007278cf01 python36!run_mod+0x43 [c:\ci\python_1546215302383\work\python\pythonrun.c @ 1026] 2d 000000886bdef7c0 000000007278d72c python36!PyRun_FileExFlags+0xc5 [c:\ci\python_1546215302383\work\python\pythonrun.c @ 978] 2e 000000886bdef830 000000007278cdcf python36!PyRun_SimpleFileExFlags+0x24c [c:\ci\python_1546215302383\work\python\pythonrun.c @ 422] 2f 000000886bdef8a0 00000000726dbdaf python36!PyRun_AnyFileExFlags+0x63 [c:\ci\python_1546215302383\work\python\pythonrun.c @ 82] 30 000000886bdef8d0 000000007264f087 python36!run_file+0xbf [c:\ci\python_1546215302383\work\modules\main.c @ 340] 31 000000886bdef910 000000001c7f1258 python36!Py_Main+0x5dec3 [c:\ci\python_1546215302383\work\modules\main.c @ 811] 32 (Inline Function) ---------------- python!invoke_main+0x22 [f:\dd\vctools\crt\vcstartup\src\startup\exe_common.inl @ 90] 33 000000886bdefa10 00007ff8`e4cf84d4 python!scrt_common_main_seh+0x10c [f:\dd\vctools\crt\vcstartup\src\startup\exe_common.inl @ 288] 34 000000886bdefa50 00007ff8e68fe851 KERNEL32!BaseThreadInitThunk+0x14 35 000000886bdefa80 0000000000000000 ntdll!RtlUserThreadStart+0x21

Scorillo47 commented 5 years ago

Here is my list of loaded symbols in Windbg. Only the tensorflow.dll has problems.

0:000> lml start end module name 000000001c7f0000 000000001c80b000 python C (private pdb symbols) d:\adi\symcache\python.pdb\13C3E68B6EB140B6A9022F4A44396DCC1\python.pdb 00000000725f0000 0000000072990000 python36 C (private pdb symbols) d:\adi\symcache\python36.pdb\C122DBBC35354CC6A2D153E61A43ADD11\python36.pdb 00007ff8492a0000 00007ff86127e000 libtensorflow_cc C (export symbols) D:\adi\as\test_op\libtensorflow_cc.dll 00007ff864860000 00007ff864a22000 ucrtbased (private pdb symbols) d:\adi\symcache\ucrtbased.pdb\C2CF5FE2AF454E1774172E10CCC7F07D2\ucrtbased.pdb 00007ff864b60000 00007ff864bab000 tf_ZeroOut_gpu3 C (private pdb symbols) d:\adi\symcache\tf.ZeroOut.gpu3.pdb\D78A48A9EAB84BFB9A963BAE61D98D471\tf.ZeroOut.gpu3.pdb 00007ff88d7c0000 00007ff8a63b5000 _pywrap_tensorflow_internal C (export symbols) d:\Data\miniconda\miniconda3\envs\adaptivesoftmax36_ops\lib\site-packages\tensorflow\python_pywrap_tensorflow_internal.pyd 00007ff8d3470000 00007ff8d3486000 VCRUNTIME140 (private pdb symbols) d:\adi\symcache\vcruntime140.amd64.pdb\06C195E992BE4BC6AC99624BAFFB2C261\vcruntime140.amd64.pdb 00007ff8dfae0000 00007ff8dfaea000 VERSION (pdb symbols) d:\adi\symcache\version.pdb\012578051C4E4FFB991FCF9D65D5143C1\version.pdb 00007ff8e3510000 00007ff8e372d000 KERNELBASE (pdb symbols) d:\adi\symcache\kernelbase.pdb\E26F9607943644BB8CDE6C806006A3F01\kernelbase.pdb 00007ff8e3730000 00007ff8e38b1000 gdi32full (pdb symbols) d:\adi\symcache\gdi32full.pdb\263BE087DF1D44CCAC8FCA6CAE8C7EE31\gdi32full.pdb 00007ff8e38c0000 00007ff8e38de000 win32u (pdb symbols) d:\adi\symcache\win32u.pdb\D746BAF2F0C04D5E9781C9CC9B12A2ED1\win32u.pdb 00007ff8e3980000 00007ff8e39ea000 bcryptPrimitives (pdb symbols) d:\adi\symcache\bcryptprimitives.pdb\7A85B74A87984A47BD39CA15D60335B81\bcryptprimitives.pdb 00007ff8e3c70000 00007ff8e3d64000 ucrtbase (pdb symbols) d:\adi\symcache\ucrtbase.pdb\FE175285B6A74774849AB615D1063C021\ucrtbase.pdb 00007ff8e3e20000 00007ff8e3ebe000 msvcrt (pdb symbols) d:\adi\symcache\msvcrt.pdb\BAFC8C0A0A7648E0A6C2FC97B72A04B11\msvcrt.pdb 00007ff8e42f0000 00007ff8e4324000 GDI32 (pdb symbols) d:\adi\symcache\gdi32.pdb\01F6AC111DBD40AA910292B0D768AD6C1\gdi32.pdb 00007ff8e4340000 00007ff8e43aa000 WS2_32 (pdb symbols) d:\adi\symcache\ws2_32.pdb\C10549D46F604C0697BF8A1527F13B471\ws2_32.pdb 00007ff8e43b0000 00007ff8e44d1000 RPCRT4 (pdb symbols) d:\adi\symcache\rpcrt4.pdb\F955660C379346AC82C0721E31F7F1571\rpcrt4.pdb 00007ff8e44e0000 00007ff8e4532000 SHLWAPI (pdb symbols) d:\adi\symcache\shlwapi.pdb\FB0E36D3E9774E3082EEF8F877CAE8721\shlwapi.pdb 00007ff8e4ad0000 00007ff8e4b72000 ADVAPI32 (pdb symbols) d:\adi\symcache\advapi32.pdb\3D08D5E1E3F84EB18B09F7D83723E0091\advapi32.pdb 00007ff8e4b80000 00007ff8e4ce5000 USER32 (pdb symbols) d:\adi\symcache\user32.pdb\BF7DE23509AA407B85B0E56332D3111F1\user32.pdb 00007ff8e4cf0000 00007ff8e4d9c000 KERNEL32 (pdb symbols) d:\adi\symcache\kernel32.pdb\8FF195CA44C14163911E632B74630FA21\kernel32.pdb 00007ff8e6560000 00007ff8e6825000 combase (private pdb symbols) d:\adi\symcache\combase.pdb\2613AF92FDBA485386C9577FA17EB8CA1\combase.pdb 00007ff8e6830000 00007ff8e6889000 sechost (pdb symbols) d:\adi\symcache\sechost.pdb\3EE0CC142F5246A2A0879E93A04D88341\sechost.pdb 00007ff8e6890000 00007ff8e6a60000 ntdll (pdb symbols) d:\adi\symcache\ntdll.pdb\87DB6E6182D343ABB83394F73BB3973E1\ntdll.pdb 0:000> lml start end module name 000000001c7f0000 000000001c80b000 python C (private pdb symbols) d:\adi\symcache\python.pdb\13C3E68B6EB140B6A9022F4A44396DCC1\python.pdb 00000000725f0000 0000000072990000 python36 C (private pdb symbols) d:\adi\symcache\python36.pdb\C122DBBC35354CC6A2D153E61A43ADD11\python36.pdb 00007ff8492a0000 00007ff86127e000 libtensorflow_cc C (export symbols) D:\adi\as\test_op\libtensorflow_cc.dll 00007ff864860000 00007ff864a22000 ucrtbased (private pdb symbols) d:\adi\symcache\ucrtbased.pdb\C2CF5FE2AF454E1774172E10CCC7F07D2\ucrtbased.pdb 00007ff864b60000 00007ff864bab000 tf_ZeroOut_gpu3 C (private pdb symbols) d:\adi\symcache\tf.ZeroOut.gpu3.pdb\D78A48A9EAB84BFB9A963BAE61D98D471\tf.ZeroOut.gpu3.pdb 00007ff88d7c0000 00007ff8a63b5000 _pywrap_tensorflow_internal C (export symbols) d:\Data\miniconda\miniconda3\envs\adaptivesoftmax36_ops\lib\site-packages\tensorflow\python_pywrap_tensorflow_internal.pyd 00007ff8d3470000 00007ff8d3486000 VCRUNTIME140 (private pdb symbols) d:\adi\symcache\vcruntime140.amd64.pdb\06C195E992BE4BC6AC99624BAFFB2C261\vcruntime140.amd64.pdb 00007ff8dfae0000 00007ff8dfaea000 VERSION (pdb symbols) d:\adi\symcache\version.pdb\012578051C4E4FFB991FCF9D65D5143C1\version.pdb 00007ff8e3510000 00007ff8e372d000 KERNELBASE (pdb symbols) d:\adi\symcache\kernelbase.pdb\E26F9607943644BB8CDE6C806006A3F01\kernelbase.pdb 00007ff8e3730000 00007ff8e38b1000 gdi32full (pdb symbols) d:\adi\symcache\gdi32full.pdb\263BE087DF1D44CCAC8FCA6CAE8C7EE31\gdi32full.pdb 00007ff8e38c0000 00007ff8e38de000 win32u (pdb symbols) d:\adi\symcache\win32u.pdb\D746BAF2F0C04D5E9781C9CC9B12A2ED1\win32u.pdb 00007ff8e3980000 00007ff8e39ea000 bcryptPrimitives (pdb symbols) d:\adi\symcache\bcryptprimitives.pdb\7A85B74A87984A47BD39CA15D60335B81\bcryptprimitives.pdb 00007ff8e3c70000 00007ff8e3d64000 ucrtbase (pdb symbols) d:\adi\symcache\ucrtbase.pdb\FE175285B6A74774849AB615D1063C021\ucrtbase.pdb 00007ff8e3e20000 00007ff8e3ebe000 msvcrt (pdb symbols) d:\adi\symcache\msvcrt.pdb\BAFC8C0A0A7648E0A6C2FC97B72A04B11\msvcrt.pdb 00007ff8e42f0000 00007ff8e4324000 GDI32 (pdb symbols) d:\adi\symcache\gdi32.pdb\01F6AC111DBD40AA910292B0D768AD6C1\gdi32.pdb 00007ff8e4340000 00007ff8e43aa000 WS2_32 (pdb symbols) d:\adi\symcache\ws2_32.pdb\C10549D46F604C0697BF8A1527F13B471\ws2_32.pdb 00007ff8e43b0000 00007ff8e44d1000 RPCRT4 (pdb symbols) d:\adi\symcache\rpcrt4.pdb\F955660C379346AC82C0721E31F7F1571\rpcrt4.pdb 00007ff8e44e0000 00007ff8e4532000 SHLWAPI (pdb symbols) d:\adi\symcache\shlwapi.pdb\FB0E36D3E9774E3082EEF8F877CAE8721\shlwapi.pdb 00007ff8e4ad0000 00007ff8e4b72000 ADVAPI32 (pdb symbols) d:\adi\symcache\advapi32.pdb\3D08D5E1E3F84EB18B09F7D83723E0091\advapi32.pdb 00007ff8e4b80000 00007ff8e4ce5000 USER32 (pdb symbols) d:\adi\symcache\user32.pdb\BF7DE23509AA407B85B0E56332D3111F1\user32.pdb 00007ff8e4cf0000 00007ff8e4d9c000 KERNEL32 (pdb symbols) d:\adi\symcache\kernel32.pdb\8FF195CA44C14163911E632B74630FA21\kernel32.pdb 00007ff8e6560000 00007ff8e6825000 combase (private pdb symbols) d:\adi\symcache\combase.pdb\2613AF92FDBA485386C9577FA17EB8CA1\combase.pdb 00007ff8e6830000 00007ff8e6889000 sechost (pdb symbols) d:\adi\symcache\sechost.pdb\3EE0CC142F5246A2A0879E93A04D88341\sechost.pdb 00007ff8e6890000 00007ff8e6a60000 ntdll (pdb symbols) d:\adi\symcache\ntdll.pdb\87DB6E6182D343ABB83394F73BB3973E1\ntdll.pdb