radical-cybertools / radical.owms

Tiered Resource OverlaY
Other
0 stars 1 forks source link

File staging: Macros are not expanded #79

Closed mturilli closed 10 years ago

mturilli commented 10 years ago

In the workload configuration I have:

{
  "tasks" : [
    {
      "tag"               : "stager",
      "cardinality"       : 1,
      "executable"        : "true",
      "walltime"          : 1,
      "working_directory" : "%(home)s/AIMES-SC2014-experiments/",
      "inputs"            : ["input/topol.tpr > topol.tpr"]
    },
    {
      "tag"               : "worker",
      "cardinality"       : "%(my_bag-size)s",
      "executable"        : "/bin/sh",
      "walltime"          : 20,
      "arguments"         : ["-c", "cd %(working_directory)s && ln -s ../topol.tpr . && %(mdrun)s"],
      "working_directory" : "%(home)s/AIMES-SC2014-experiments/bag-%(my_bag-size)s_run-%(run)s_task-%(cardinal)s/",
[...]

In pilot working directory on the remote resource I see:

$ ls -latr
total 124
-rwx------  1 mturilli G-801782  6370 Mar 12 20:23 bootstrap-and-run-agent
-rw-------  1 mturilli G-801782 42005 Mar 12 20:23 sagapilot-agent.py
-rw-------  1 mturilli G-801782   396 Mar 12 21:58 STDERR
-rw-------  1 mturilli G-801782  8067 Mar 12 21:59 STDOUT
drwx------  3 mturilli G-801782  4096 Mar 12 21:59 %(home)s
-rw-------  1 mturilli G-801782 38721 Mar 12 21:59 AGENT.LOG
drwx------  3 mturilli G-801782  4096 Mar 12 21:59 .
drwx------ 38 mturilli G-801782 12288 Mar 13 00:32 ..

And in AGENT.LOG I see:

2014-03-12 21:59:04,338 - sagapilot.agent - INFO - Launching task 5321087820a6411c3d3adcf0 via /usr/bin/ssh c548-603  cd /home1/02855/mturilli/AIMES-SC2014-experiments/ &&  true  (env: None) in /home1/02855/mturilli/AIMES-SC2014-experiments/
[...]
2014-03-12 21:59:10,722 - sagapilot.agent - INFO - Launching task 53211efb20a6411c3d3adcf1 via /usr/bin/ssh c548-603  cd %(home)s/AIMES-SC2014-experiments/bag-%(my_bag-size)s_run-%(run)s_task-%(cardinal)s/ &&  /bin/sh  -c  cd %(home)s/AIMES-SC2014-experiments/bag-%(my_bag-size)s_run-%(run)s_task-%(cardinal)s/ && ln -s ../topol.tpr . && /home1/01740/marksant/bin/mdrun  (env: None) in %(home)s/AIMES-SC2014-experiments/bag-%(my_bag-size)s_run-%(run)s_task-%(cardinal)s/

It appears that the macros in:

"arguments"         : ["-c", "cd %(working_directory)s && ln -s ../topol.tpr . && %(mdrun)s"]

are not expanded.

andre-merzky commented 10 years ago

Hmm....

Is it really my_bag-size, not my_bag_size? If so, please send me your config files...

Thanks, Andre.

mturilli commented 10 years ago
# config_application.json
{
    # variables we want to vary for each experiment run
    "my_steps"       : 256,
    "my_bag-size"    : 64,

    "my_dci"         : "X000",
    "my_resource"    : "ST00",
    "my_pilot-system": "SP00",
    "my_binding"     : "E000",
    "my_w-scheduler" : "WRR0",
    "my_o-scheduler" : "ORR0",
    "my_concurrency" : "0100",
    "my_n-workloads" : "0001",
    "my_n-pilots"    : "0001",
    "my_kernel"      : "GROM",
    "my_run"         : "0001",

    # build up a unique session id from those variables.  This 
    # ID will be used by try to identify this run.
    # X000ST00SP00-L000WRR0ORR0100-000100010064-GROM0001
    "session_id": "%(my_dci)s%(my_resource)s%(my_pilot-system)s-%(my_binding)s%(my_w-scheduler)s%(my_o-scheduler)s%(my_concurrency)s-%(my_n-workloads)s%(my_n-pilots)s00%(my_bag-size)s-%(my_kernel)s%(my_run)s-",

    # We add some additional, app specific information to the 
    # troy resource configuration, so that we can use placeholder
    # like '%(mdrun)s' in our workload descriptions.
    # This section *must* be named `resources`.
    "resources" : {
        "*.futuregrid.org" : {
            "username": "mturilli",
            "mdrun"   : "/N/u/marksant/bin/mdrun"
        },
        "stampede.*" : {
            "username": "mturilli",
            "home"    : "/home1/02855/mturilli",
            "mdrun"   : "/home1/01740/marksant/bin/mdrun"
        },
        # localhost has mdrun in path
        "localhost" : {
            "mdrun"        : "mdrun"
        }
    }
}
# config_troy.json
{
    # frequently changing variables
    "hosts"         : "slurm+ssh://stampede.tacc.utexas.edu",
    "concurrency"   : "100",
    "pilot_backend" : "sagapilot",
    "troy_strategy" : "basic_early_binding",
    "troy_timing"   : "store",
    "troy_timing_db": "mongodb://ec2-184-72-89-141.compute-1.amazonaws.com:27017/timing/",

    # troy plugin selection & plugin configurations
    "planner": {
        "plugin_planner_derive": "concurrent",
        "derive": {
            "concurrent": {
                "concurrency": "%(concurrency)s"
            }
        }
    },

    "overlay_manager": {

        # plugin selection for overlay manager
        "plugin_overlay_scheduler"   : "round_robin",
        "plugin_overlay_provisioner" : "%(pilot_backend)s",

        # plugin configuration for overlay manager
        "overlay_provisioner": {
            "bigjob": {
                "coordination_url": "redis://%(redis_passwd)s@gw68.quarry.iu.teragrid.org:6379"
            },
            "sagapilot": {
                "coordination_url": "mongodb://ec2-184-72-89-141.compute-1.amazonaws.com:27017/"
            }
        },
        "overlay_scheduler": {
            "round_robin": {
                "resources": "%(hosts)s"
            }
        }
    },

    "workload_manager": {
        "plugin_workload_scheduler" : "round_robin",
        "plugin_workload_dispatcher": "%(pilot_backend)s",
        "workload_dispatcher": {
            "bigjob": {
                "coordination_url"  : "redis://%(redis_passwd)s@gw68.quarry.iu.teragrid.org:6379"
            },
            "sagapilot": {
                "coordination_url"  : "mongodb://ec2-184-72-89-141.compute-1.amazonaws.com:27017/"
            }
        }
    }
}
# workload_gromacs.json
{
  "tasks" : [
    {
      "tag"               : "stager",
      "cardinality"       : 1,
      "executable"        : "true",
      "walltime"          : 1,
      "working_directory" : "%(home)s/AIMES-SC2014-experiments/",
      "inputs"            : ["input/topol.tpr > topol.tpr"]
    },
    {
      "tag"               : "worker",
      "cardinality"       : "%(my_bag-size)s",
      "executable"        : "/bin/sh",
      "walltime"          : 20,
      "arguments"         : ["-c", "cd %(working_directory)s && ln -s ../topol.tpr . && %(mdrun)s"],
      "working_directory" : "%(home)s/AIMES-SC2014-experiments/bag-%(my_bag-size)s_run-%(run)s_task-%(cardinal)s/",
      "outputs"           : ["output/%(session_id)s_state.cpt.%(cardinal)s   < state.cpt",
                             "output/%(session_id)s_confout.gro.%(cardinal)s < confout.gro",
                             "output/%(session_id)s_ener.edr.%(cardinal)s    < ener.edr",
                             "output/%(session_id)s_traj.trr.%(cardinal)s    < traj.trr",
                             "output/%(session_id)s_md.log.%(cardinal)s      < md.log"]
    }
  ], 
  "relations" : [
    {
        "head"           : "stager",
        "tail"           : "worker",
        "relation_time"  : "SEQUENTIAL_END",
        "relation_space" : ""
    }
  ]
}
andre-merzky commented 10 years ago

Turned out to be a typo, and can be closed. But that ticket demonstrates the need for better error reporting on placeholder replacements...