natefoo / slurm-drmaa

DRMAA for Slurm: Implementation of the DRMAA C bindings for Slurm
GNU General Public License v3.0
48 stars 22 forks source link

Segmentation fault when providing --time #14

Closed unode closed 5 years ago

unode commented 6 years ago

Got one more segmentation fault when specifying --time 1:00:00 for 1 hour. Seems to be just an alias problem since -t 1:00:00 works.

...
d #e494 [     0.03]  * # Native specification: --cpus-per-task=2 --nodes=1 --mem-per-cpu=50 --partition=htc --time 1:00:00 --tmp=100
t #e494 [     0.03] -> slurmdrmaa_parse_native
t #e494 [     0.03] -> slurmdrmaa_parse_additional_attr
d #e494 [     0.03]  * # cpus_per_task = 2
t #e494 [     0.03] <- slurmdrmaa_parse_additional_attr
t #e494 [     0.03] -> slurmdrmaa_parse_additional_attr
d #e494 [     0.03]  * nodes: 1 ->
d #e494 [     0.03]  * # min_nodes = 1
t #e494 [     0.03] <- slurmdrmaa_parse_additional_attr
t #e494 [     0.03] -> slurmdrmaa_parse_additional_attr
d #e494 [     0.03]  * # pn_min_memory (MEM_PER_CPU) = 50
t #e494 [     0.03] <- slurmdrmaa_parse_additional_attr
t #e494 [     0.03] -> slurmdrmaa_parse_additional_attr
d #e494 [     0.03]  * # partition = htc
t #e494 [     0.03] <- slurmdrmaa_parse_additional_attr
t #e494 [     0.03] -> slurmdrmaa_parse_additional_attr
d #e494 [     0.03]  * # time_limit = (null)
t #e494 [     0.03] -> slurmdrmaa_datetime_parse((null))

Program received signal SIGSEGV, Segmentation fault.
0x00007ffff6b20faf in __strlen_sse42 () from /lib64/libc.so.6
(gdb) bt
#0  0x00007ffff6b20faf in __strlen_sse42 () from /lib64/libc.so.6
#1  0x00007fffed955284 in slurmdrmaa_datetime_parse (string=0x0) at util.c:53
#2  0x00007fffed956295 in slurmdrmaa_add_attribute (job_desc=0x7fffffff9e10, attr=19, value=0x0) at util.c:292
#3  0x00007fffed956c19 in slurmdrmaa_parse_additional_attr (job_desc=0x7fffffff9e10, add_attr=0x7ac3bf "time", clusters_opt=0x7fffffff8af0) at util.c:427
#4  0x00007fffed9570f8 in slurmdrmaa_parse_native (job_desc=0x7fffffff9e10, value=0x79f8b0 "--cpus-per-task=2 --nodes=1 --mem-per-cpu=50 --partition=htc --time 1:00:00 --tmp=100") at util.c:502
#5  0x00007fffed95462e in slurmdrmaa_job_create (session=0x641ad0, jt=0x7e3570, envp=0x7fffffffa0f8, expand=0x771280, job_desc=0x7fffffff9e10) at job.c:701
#6  0x00007fffed952d3b in slurmdrmaa_job_create_req (session=0x641ad0, jt=0x7e3570, envp=0x7fffffffa0f8, job_desc=0x7fffffff9e10) at job.c:302
#7  0x00007fffed954af4 in slurmdrmaa_session_run_bulk (self=0x641ad0, jt=0x7e3570, start=1, end=2, incr=1) at session.c:126
#8  0x00007fffed96facb in drmaa_run_bulk_jobs (job_ids=0x7fffeea84a28, jt=0x7e3570, start=1, end=2, incr=1, error_diagnosis=0x732960 "", error_diag_len=1024) at drmaa_base.c:427
#9  0x00007fffeffed550 in ffi_call_unix64 () at /home/ilan/minonda/conda-bld/python_1494526091235/work/Python-3.6.1/Modules/_ctypes/libffi/src/x86/unix64.S:76
#10 0x00007fffeffeccf5 in ffi_call (cif=<optimized out>, fn=0x7fffed96f8e3 <drmaa_run_bulk_jobs>, rvalue=<optimized out>, avalue=0x7fffffffa330) at /home/ilan/minonda/conda-bld/python_1494526091235/work/Python-3.6.1/Modules/_ctypes/libffi/src/x86/ffi64.c:525
#11 0x00007fffeffe483c in _call_function_pointer (argcount=7, resmem=0x7fffffffa380, restype=<optimized out>, atypes=<optimized out>, avalues=0x7fffffffa330, pProc=0x7fffed96f8e3 <drmaa_run_bulk_jobs>, flags=4353)
    at /home/ilan/minonda/conda-bld/python_1494526091235/work/Python-3.6.1/Modules/_ctypes/callproc.c:809
#12 _ctypes_callproc (pProc=0x7fffed96f8e3 <drmaa_run_bulk_jobs>, argtuple=0x7fffffffa4f0, flags=4353, argtypes=<optimized out>, restype=0x7ffff0236158, checker=0x0) at /home/ilan/minonda/conda-bld/python_1494526091235/work/Python-3.6.1/Modules/_ctypes/callproc.c:1147
#13 0x00007fffeffdcda3 in PyCFuncPtr_call (self=<optimized out>, inargs=<optimized out>, kwds=0x0) at /home/ilan/minonda/conda-bld/python_1494526091235/work/Python-3.6.1/Modules/_ctypes/_ctypes.c:3870
#14 0x00007ffff793fe96 in PyObject_Call (func=0x7fffeea66e58, args=<optimized out>, kwargs=<optimized out>) at Objects/abstract.c:2246
#15 0x00007ffff7a20236 in do_call_core (kwdict=0x0, callargs=<optimized out>, func=0x7fffeea66e58) at Python/ceval.c:5067
#16 _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) at Python/ceval.c:3366
#17 0x00007ffff7a1aa60 in _PyEval_EvalCodeWithName (_co=0x7ffff0220390, globals=<optimized out>, locals=<optimized out>, args=<optimized out>, argcount=6, kwnames=0x0, kwargs=0x7e61c8, kwcount=0, kwstep=1, defs=0x0, defcount=0, kwdefs=0x0, closure=0x0,
    name=0x7ffff7f66308, qualname=0x7ffff7f66308) at Python/ceval.c:4128
#18 0x00007ffff7a1c48a in fast_function (kwnames=<optimized out>, nargs=6, stack=<optimized out>, func=0x7fffeea7f840) at Python/ceval.c:4939
#19 call_function (pp_stack=0x7fffffffaa08, oparg=<optimized out>, kwnames=<optimized out>) at Python/ceval.c:4819
#20 0x00007ffff7a1f15d in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) at Python/ceval.c:3284
#21 0x00007ffff7969e33 in gen_send_ex (gen=0x7fffefd92200, arg=<optimized out>, exc=<optimized out>, closing=<optimized out>) at Objects/genobject.c:189
#22 0x00007ffff7978f16 in listextend (self=0x7fffeea8ee88, b=<optimized out>) at Objects/listobject.c:857
#23 0x00007ffff7979398 in list_init (self=0x7fffeea8ee88, args=<optimized out>, kw=<optimized out>) at Objects/listobject.c:2316
#24 0x00007ffff79add4c in type_call (type=<optimized out>, args=0x7ffff7e8e908, kwds=0x0) at Objects/typeobject.c:915
#25 0x00007ffff793fade in _PyObject_FastCallDict (func=0x7ffff7d5bb40 <PyList_Type>, args=<optimized out>, nargs=<optimized out>, kwargs=0x0) at Objects/abstract.c:2316
#26 0x00007ffff7a1c2bb in call_function (pp_stack=0x7fffffffad48, oparg=<optimized out>, kwnames=0x0) at Python/ceval.c:4822
#27 0x00007ffff7a1f15d in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) at Python/ceval.c:3284
#28 0x00007ffff7a1aa60 in _PyEval_EvalCodeWithName (_co=0x7ffff01ff420, globals=<optimized out>, locals=<optimized out>, args=<optimized out>, argcount=1, kwnames=0x7ffff7e9cb58, kwargs=0x7ffff7f8fba8, kwcount=3, kwstep=1, defs=0x0, defcount=0, kwdefs=0x0, closure=0x0,
    name=0x7ffff7ea3d70, qualname=0x7fffefd8f300) at Python/ceval.c:4128
#29 0x00007ffff7a1c48a in fast_function (kwnames=<optimized out>, nargs=1, stack=<optimized out>, func=0x7fffeea84400) at Python/ceval.c:4939
#30 call_function (pp_stack=0x7fffffffafe8, oparg=<optimized out>, kwnames=<optimized out>) at Python/ceval.c:4819
#31 0x00007ffff7a1e8dd in _PyEval_EvalFrameDefault (f=<optimized out>, throwflag=<optimized out>) at Python/ceval.c:3300
#32 0x00007ffff7a1aa60 in _PyEval_EvalCodeWithName (_co=0x7ffff7f1c930, globals=<optimized out>, locals=<optimized out>, args=<optimized out>, argcount=0, kwnames=0x0, kwargs=0x8, kwcount=0, kwstep=2, defs=0x0, defcount=0, kwdefs=0x0, closure=0x0, name=0x0, qualname=0x0)
    at Python/ceval.c:4128
#33 0x00007ffff7a1aee3 in PyEval_EvalCodeEx (_co=<optimized out>, globals=<optimized out>, locals=<optimized out>, args=<optimized out>, argcount=<optimized out>, kws=<optimized out>, kwcount=0, defs=0x0, defcount=0, kwdefs=0x0, closure=0x0) at Python/ceval.c:4149
#34 0x00007ffff7a1af2b in PyEval_EvalCode (co=<optimized out>, globals=<optimized out>, locals=<optimized out>) at Python/ceval.c:695
#35 0x00007ffff7a4d6c0 in run_mod (arena=0x7ffff7f79180, flags=0x7fffffffb340, locals=0x7ffff7f5df30, globals=0x7ffff7f5df30, filename=0x7ffff7ea3970, mod=0x6857d8) at Python/pythonrun.c:980
#36 PyRun_FileExFlags (fp=0x6438d0, filename_str=<optimized out>, start=<optimized out>, globals=0x7ffff7f5df30, locals=0x7ffff7f5df30, closeit=<optimized out>, flags=0x7fffffffb340) at Python/pythonrun.c:933
#37 0x00007ffff7a4ec83 in PyRun_SimpleFileExFlags (fp=0x6438d0, filename=<optimized out>, closeit=1, flags=0x7fffffffb340) at Python/pythonrun.c:396
#38 0x00007ffff7a6a0b5 in run_file (p_cf=0x7fffffffb340, filename=0x603310 L"test_drmaa.py", fp=0x6438d0) at Modules/main.c:338
#39 Py_Main (argc=<optimized out>, argv=<optimized out>) at Modules/main.c:810
#40 0x0000000000400c1d in main (argc=2, argv=<optimized out>) at ./Programs/python.c:69
natefoo commented 5 years ago

The = is required for long options that take a value (there can't be a space between the option and value). However, it will no longer segfault in this case.