Closed mscook closed 5 years ago
That should not happen :)
Can you please paste the output of qstat --version
and pbsnodes -a
here?
(I've only been using saga-python for a few hours) so it might be me...
qstat --version
pbs_version = PBSPro_11.3.0.121723
and the bottom of
pbsnodes -a
b01a07
Mom = b01a07.barrine.hpcu.uq.edu.au
ntype = PBS
state = <various>
pcpus = 16
jobs = 1417589[2].paroo3/0, 1417589[2].paroo3/1, 1417589[2].paroo3/2, 1417589[2].paroo3/3, 1417589[2].paroo3/0, 1417724.paroo3/1, 1417724.paroo3/2, 1417724.paroo3/3, 1417724.paroo3/3
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b01a07
resources_available.mem = 24602260kb
resources_available.ncpus = 8
resources_available.NodeType = medium
resources_available.router = b01a07,med-04,ib-04,barrine
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 19938
resources_available.vmem = 32794252kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241526648kb
resources_assigned.mem = 23068672kb
resources_assigned.ncpus = 8
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b02b35
Mom = b02b35.barrine.hpcu.uq.edu.au
ntype = PBS
state = <various>
pcpus = <various>
jobs = 1416574.paroo3/1, 1416574.paroo3/2, 1416574.paroo3/3, 1416574.paroo3/3, 1417623[1109].paroo3/0, 1417623[1034].paroo3/1
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b02b35
resources_available.mem = 24600820kb
resources_available.ncpus = 8
resources_available.NodeType = medium
resources_available.router = "b02b35,ib-04,barrine"
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 17984
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241524724kb
resources_assigned.mem = 14680064kb
resources_assigned.ncpus = 6
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b10a13
Mom = b10a13.barrine.hpcu.uq.edu.au
ntype = PBS
state = free
pcpus = <various>
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b10a13
resources_available.mem = 74243324kb
resources_available.ncpus = 8
resources_available.NodeType = large
resources_available.router = "b10a13,ib-05,barrine"
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 40320
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241522968kb
resources_assigned.mem = 0kb
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b07b35
Mom = b07b35.barrine.hpcu.uq.edu.au
ntype = PBS
state = free
pcpus = <various>
jobs = 1417623[884].paroo3/0, 1417071[9].paroo3/1, 1417623[1015].paroo3/2, 1417623[1015].paroo3/0, 1417071[9].paroo3/0, 1417627.paroo3/1
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b07b35
resources_available.mem = 24600820kb
resources_available.ncpus = 8
resources_available.NodeType = medium
resources_available.router = "b07b35,ib-05,barrine"
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 6854
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241462844kb
resources_assigned.mem = 24117248kb
resources_assigned.ncpus = 4
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b10b18
Mom = b10b18.barrine.hpcu.uq.edu.au
ntype = PBS
state = state-unknown,offline
pcpus = 1
comment = EACHAM Windows Cluster :20130312 09:51 dannys
resv_enable = True
sharing = default_shared
resources_available.host = b10b18
resources_available.mem = 0kb
resources_available.ncpus = 0
resources_available.NodeType = large
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 40320
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_assigned.mem = 0kb
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b10a06
Mom = b10a06.barrine.hpcu.uq.edu.au
ntype = PBS
state = free
pcpus = <various>
jobs = 1417387[3].paroo3/0, 1417387[3].paroo3/1, 1417387[3].paroo3/2, 1417387[3].paroo3/0
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b10a06
resources_available.mem = 74243316kb
resources_available.ncpus = 8
resources_available.NodeType = large
resources_available.router = "b10a06,ib-05,barrine"
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 8424
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 240796124kb
resources_assigned.mem = 52428800kb
resources_assigned.ncpus = 3
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b01a29
Mom = b01a29.barrine.hpcu.uq.edu.au
ntype = PBS
state = <various>
pcpus = 16
jobs = 1417589[3].paroo3/0, 1417589[3].paroo3/1, 1417589[3].paroo3/2, 1417589[3].paroo3/3, 1417589[3].paroo3/0, 1417725.paroo3/1, 1417725.paroo3/2, 1417725.paroo3/3, 1417725.paroo3/3
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b01a29
resources_available.mem = 24602260kb
resources_available.ncpus = 8
resources_available.NodeType = medium
resources_available.router = b01a29,med-04,ib-04,barrine
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 40320
resources_available.vmem = 32794252kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241529652kb
resources_assigned.mem = 23068672kb
resources_assigned.ncpus = 8
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b07b02
Mom = b07b02.barrine.hpcu.uq.edu.au
ntype = PBS
state = down
pcpus = <various>
comment = node down: communication closed
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b07b02
resources_available.mem = 24600820kb
resources_available.ncpus = 8
resources_available.NodeType = medium
resources_available.router = "b07b02,ib-05,barrine"
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 40320
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241481804kb
resources_assigned.mem = 0kb
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b10a04
Mom = b10a04.barrine.hpcu.uq.edu.au
ntype = PBS
state = free
pcpus = <various>
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b10a04
resources_available.mem = 74243324kb
resources_available.ncpus = 8
resources_available.NodeType = large
resources_available.router = b10a04,larg,ib-04,barrine
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 40320
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241524956kb
resources_assigned.mem = 0kb
resources_assigned.ncpus = 0
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
b02b33
Mom = b02b33.barrine.hpcu.uq.edu.au
ntype = PBS
state = free
pcpus = <various>
jobs = 1417335[7].paroo3/0, 1417335[7].paroo3/0, 1417623[1097].paroo3/1, 1417623[1076].paroo3/2
resv_enable = True
sharing = <various>
resources_available.arch = linux_cpuset
resources_available.host = b02b33
resources_available.mem = 24600820kb
resources_available.ncpus = 8
resources_available.NodeType = medium
resources_available.router = b02b33,med-04,ib-04,barrine
resources_available.schedclass = flex,normal,large,constrain,reserv
resources_available.schedmins = 40320
resources_available.vmem = 0kb
resources_available.vnode = <various>
resources_available.accelerator_memory = 0kb
resources_available.naccelerators = 0
resources_available.netwins = 0
resources_available.scratch = 241528360kb
resources_assigned.mem = 23068672kb
resources_assigned.ncpus = 3
resources_assigned.vmem = 0kb
resources_assigned.accelerator_memory = 0kb
resources_assigned.naccelerators = 0
resources_assigned.netwins = 0
Ok, I see what the problem is. Will get back at you. In the time being you might want to investigate how to install from source/branch instead of pypi (if you haven't already), so that you can test the change one I commit it.
The exception._type
problem has been fixed. Mark, any update on the PBS layer problem?
The
exception._type
problem has been fixed.
What was the issue? How did you fix it?
Have some issue in PBS Pro 14.1.2
Hi @alexsalex , the original thread is somewhart outdated by now. Would you mind opening a new issue with a description of the problem you face? Thank you!
Closing this ticket as there has been no update on this thread.
Hi,
I'm using this example modificed slightly for our PHC resource (http://saga-python.readthedocs.org/en/latest/adaptors/saga.adaptor.pbsjob.html)
SAGA_VERBOSE=5 (with initial lines deliberately missing).