Closed mturilli closed 10 years ago
Hmm....
Is it really my_bag-size
, not my_bag_size
? If so, please send me your config files...
Thanks, Andre.
# config_application.json
{
# variables we want to vary for each experiment run
"my_steps" : 256,
"my_bag-size" : 64,
"my_dci" : "X000",
"my_resource" : "ST00",
"my_pilot-system": "SP00",
"my_binding" : "E000",
"my_w-scheduler" : "WRR0",
"my_o-scheduler" : "ORR0",
"my_concurrency" : "0100",
"my_n-workloads" : "0001",
"my_n-pilots" : "0001",
"my_kernel" : "GROM",
"my_run" : "0001",
# build up a unique session id from those variables. This
# ID will be used by try to identify this run.
# X000ST00SP00-L000WRR0ORR0100-000100010064-GROM0001
"session_id": "%(my_dci)s%(my_resource)s%(my_pilot-system)s-%(my_binding)s%(my_w-scheduler)s%(my_o-scheduler)s%(my_concurrency)s-%(my_n-workloads)s%(my_n-pilots)s00%(my_bag-size)s-%(my_kernel)s%(my_run)s-",
# We add some additional, app specific information to the
# troy resource configuration, so that we can use placeholder
# like '%(mdrun)s' in our workload descriptions.
# This section *must* be named `resources`.
"resources" : {
"*.futuregrid.org" : {
"username": "mturilli",
"mdrun" : "/N/u/marksant/bin/mdrun"
},
"stampede.*" : {
"username": "mturilli",
"home" : "/home1/02855/mturilli",
"mdrun" : "/home1/01740/marksant/bin/mdrun"
},
# localhost has mdrun in path
"localhost" : {
"mdrun" : "mdrun"
}
}
}
# config_troy.json
{
# frequently changing variables
"hosts" : "slurm+ssh://stampede.tacc.utexas.edu",
"concurrency" : "100",
"pilot_backend" : "sagapilot",
"troy_strategy" : "basic_early_binding",
"troy_timing" : "store",
"troy_timing_db": "mongodb://ec2-184-72-89-141.compute-1.amazonaws.com:27017/timing/",
# troy plugin selection & plugin configurations
"planner": {
"plugin_planner_derive": "concurrent",
"derive": {
"concurrent": {
"concurrency": "%(concurrency)s"
}
}
},
"overlay_manager": {
# plugin selection for overlay manager
"plugin_overlay_scheduler" : "round_robin",
"plugin_overlay_provisioner" : "%(pilot_backend)s",
# plugin configuration for overlay manager
"overlay_provisioner": {
"bigjob": {
"coordination_url": "redis://%(redis_passwd)s@gw68.quarry.iu.teragrid.org:6379"
},
"sagapilot": {
"coordination_url": "mongodb://ec2-184-72-89-141.compute-1.amazonaws.com:27017/"
}
},
"overlay_scheduler": {
"round_robin": {
"resources": "%(hosts)s"
}
}
},
"workload_manager": {
"plugin_workload_scheduler" : "round_robin",
"plugin_workload_dispatcher": "%(pilot_backend)s",
"workload_dispatcher": {
"bigjob": {
"coordination_url" : "redis://%(redis_passwd)s@gw68.quarry.iu.teragrid.org:6379"
},
"sagapilot": {
"coordination_url" : "mongodb://ec2-184-72-89-141.compute-1.amazonaws.com:27017/"
}
}
}
}
# workload_gromacs.json
{
"tasks" : [
{
"tag" : "stager",
"cardinality" : 1,
"executable" : "true",
"walltime" : 1,
"working_directory" : "%(home)s/AIMES-SC2014-experiments/",
"inputs" : ["input/topol.tpr > topol.tpr"]
},
{
"tag" : "worker",
"cardinality" : "%(my_bag-size)s",
"executable" : "/bin/sh",
"walltime" : 20,
"arguments" : ["-c", "cd %(working_directory)s && ln -s ../topol.tpr . && %(mdrun)s"],
"working_directory" : "%(home)s/AIMES-SC2014-experiments/bag-%(my_bag-size)s_run-%(run)s_task-%(cardinal)s/",
"outputs" : ["output/%(session_id)s_state.cpt.%(cardinal)s < state.cpt",
"output/%(session_id)s_confout.gro.%(cardinal)s < confout.gro",
"output/%(session_id)s_ener.edr.%(cardinal)s < ener.edr",
"output/%(session_id)s_traj.trr.%(cardinal)s < traj.trr",
"output/%(session_id)s_md.log.%(cardinal)s < md.log"]
}
],
"relations" : [
{
"head" : "stager",
"tail" : "worker",
"relation_time" : "SEQUENTIAL_END",
"relation_space" : ""
}
]
}
Turned out to be a typo, and can be closed. But that ticket demonstrates the need for better error reporting on placeholder replacements...
In the workload configuration I have:
In pilot working directory on the remote resource I see:
And in AGENT.LOG I see:
It appears that the macros in:
are not expanded.