Closed unode closed 5 years ago
Using 44cc67e7b:
t #6279 [ 5.70] <- slurmdrmaa_parse_native d #6279 [ 5.70] * job 16484204 submitted t #6279 [ 5.71] -> fsd_job_new(16484204_1) t #6279 [ 5.71] <- fsd_job_new=0xc163b0: ref_cnt=1 [lock 16484204_1] t #6279 [ 5.71] -> fsd_job_set_add(job=0xc163b0, job_id=16484204_1) t #6279 [ 5.71] <- fsd_job_set_add: job->ref_cnt=2 t #6279 [ 5.71] -> fsd_job_release(0xc163b0={job_id=16484204_1, ref_cnt=2}) [unlock 16484204_1] t #6279 [ 5.71] <- fsd_job_release t #6279 [ 5.71] -> fsd_job_new(16484204_2) t #6279 [ 5.71] <- fsd_job_new=0xd6acf0: ref_cnt=1 [lock 16484204_2] t #6279 [ 5.71] -> fsd_job_set_add(job=0xd6acf0, job_id=16484204_2) t #6279 [ 5.71] <- fsd_job_set_add: job->ref_cnt=2 t #6279 [ 5.71] -> fsd_job_release(0xd6acf0={job_id=16484204_2, ref_cnt=2}) [unlock 16484204_2] t #6279 [ 5.71] <- fsd_job_release t #6279 [ 5.71] -> fsd_job_new(16484204_3) t #6279 [ 5.71] <- fsd_job_new=0xdffed0: ref_cnt=1 [lock 16484204_3] t #6279 [ 5.71] -> fsd_job_set_add(job=0xdffed0, job_id=16484204_3) t #6279 [ 5.71] <- fsd_job_set_add: job->ref_cnt=2 t #6279 [ 5.71] -> fsd_job_release(0xdffed0={job_id=16484204_3, ref_cnt=2}) [unlock 16484204_3] t #6279 [ 5.71] <- fsd_job_release t #6279 [ 5.71] -> fsd_job_new(16484204_4) t #6279 [ 5.71] <- fsd_job_new=0xe55a70: ref_cnt=1 [lock 16484204_4] t #6279 [ 5.71] -> fsd_job_set_add(job=0xe55a70, job_id=16484204_4) t #6279 [ 5.71] <- fsd_job_set_add: job->ref_cnt=2 t #6279 [ 5.71] -> fsd_job_release(0xe55a70={job_id=16484204_4, ref_cnt=2}) [unlock 16484204_4] t #6279 [ 5.71] <- fsd_job_release t #6279 [ 5.71] -> slurmdrmaa_free_job_desc t #6279 [ 5.71] <- slurmdrmaa_free_job_desc t #6279 [ 5.71] <- drmaa_run_bulk_jobs =0 d #6279 [ 5.71] * fsd_exc_new(1006,Vector have no more elements.,0) t #6279 [ 5.71] <- drmaa_get_next_job_id=25: Vector have no more elements. t #6279 [ 5.71] -> drmaa_delete_job_template(0xe24e30) t #6279 [ 5.71] <- drmaa_delete_job_template =0 t #6279 [ 70.34] -> drmaa_job_ps(job_id=16484204_2) t #6279 [ 70.34] -> fsd_job_set_get(job_id=16484204_2) t #6279 [ 70.34] <- fsd_job_set_get(job_id=16484204_2) =0xd6acf0: ref_cnt=2 [lock 16484204_2] d #6279 [ 70.34] * job->last_update_time = 0 d #6279 [ 70.34] * updating status of job: 16484204_2 t #6279 [ 70.34] -> slurmdrmaa_job_update_status({job_id=16484204_2}) t #6279 [ 70.34] -> slurmdrmaa_set_job_id({job_id=16484204_2}) t #6279 [ 70.34] <- slurmdrmaa_set_job_id; job_id=16484204_2 E #6279 [ 70.34] * fsd_exc_new(1003,not an number: 16484204_2,1) t #6279 [ 70.34] -> slurmdrmaa_unset_job_id({job_id=(null)}) t #6279 [ 70.34] <- slurmdrmaa_unset_job_id; job_id=16484204_2 t #6279 [ 70.34] -> fsd_job_release(0xd6acf0={job_id=16484204_2, ref_cnt=2}) [unlock 16484204_2] t #6279 [ 70.34] <- fsd_job_release t #6279 [ 70.34] <- drmaa_job_ps=4: not an number: 16484204_2
Which causes DRMAA to drop these jobs.
The code seems to assume that jobids have to be numeric. SLURM uses ArrayJobIDs of the form `` which in this case isn't being handled properly.
Also noticed the line t #6279 [ 5.71] <- drmaa_run_bulk_jobs =0. Shouldn't it be =1 in this case?
t #6279 [ 5.71] <- drmaa_run_bulk_jobs =0
=1
Thanks for all the fixes!
Using 44cc67e7b:
Which causes DRMAA to drop these jobs.
The code seems to assume that jobids have to be numeric. SLURM uses ArrayJobIDs of the form `` which in this case isn't being handled properly.
Also noticed the line
t #6279 [ 5.71] <- drmaa_run_bulk_jobs =0
. Shouldn't it be=1
in this case?