DataBiosphere / toil

A scalable, efficient, cross-platform (Linux/macOS) and easy-to-use workflow engine in pure Python.
http://toil.ucsc-cgl.org/.
Apache License 2.0
900 stars 240 forks source link

WDL has trouble with JSON file imports due to normalization #5120

Closed stxue1 closed 1 month ago

stxue1 commented 1 month ago
version 1.1

workflow wf {
  input {

#    File out = "foo.index"
    File data_file = "foo.data"
    String index = read_string(sub(data_file, "\\.data$", ".index"))
    String test = sub(data_file, "\\.data$", ".index")
  }
  File data_file_body = "foo.data"
  String index_body = read_string(sub(data_file, "\\.data$", ".index"))
  String test_body = sub(data_file, "\\.data$", ".index")
  output {
    File data_file_out = "foo.data"
    String data = read_string("foo.data")
    String index_out = read_string(sub(data_file_out, "\\.data$", ".index"))
    String index_input_out = index
    String data_file_out_as_str = data_file_out
#    String out_as_str = out
  }
}
{
  "wf.data_file": "/home/heaucques/Documents/wdl-conformance-tests/foo.data"
}

I think the defer virtualization PR inherently runs Toil.normalize_uri on all file imports from JSON, resulting in an improper use of os.path.join when we try to get the absolute path. The path passed in is already an absolute path.

Traceback (most recent call last):
  File "/home/heaucques/Documents/toil/venv3.12/lib/python3.12/site-packages/WDL/StdLib.py", line 277, in _call_eager
    ans: Value.Base = self.F(*argument_values)
                      ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 1022, in _f
    setattr(file, "virtualized_value", self._virtualize_filename(file.value))
                                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 1269, in _virtualize_filename
    file_id = self._file_store.writeGlobalFile(abs_filename)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/fileStores/nonCachingFileStore.py", line 128, in writeGlobalFile
    fileStoreID = self.jobStore.write_file(absLocalFileName, creatorID, cleanup)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/jobStores/fileJobStore.py", line 474, in write_file
    atomic_copy(local_path, absPath)
  File "/home/heaucques/Documents/toil/src/toil/lib/io.py", line 146, in atomic_copy
    executable = os.stat(src_path).st_mode & stat.S_IXUSR != 0
                 ^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/home/heaucques/Documents/wdl-conformance-tests/file:/home/heaucques/Documents/wdl-conformance-tests/foo.index'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/home/heaucques/Documents/toil/src/toil/worker.py", line 439, in workerScript
    job._runner(jobGraph=None, jobStore=job_store, fileStore=fileStore, defer=defer)
  File "/home/heaucques/Documents/toil/src/toil/job.py", line 3008, in _runner
    returnValues = self._run(jobGraph=None, fileStore=fileStore)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/job.py", line 2919, in _run
    return self.run(fileStore)
           ^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 169, in decorated
    return decoratee(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 3531, in run
    bindings = evaluate_decls_to_bindings(self._workflow.inputs, bindings, standard_library, include_previous=True)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 719, in evaluate_decls_to_bindings
    output_value = evaluate_defaultable_decl(each_decl, all_bindings, standard_library)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 1661, in evaluate_defaultable_decl
    return evaluate_decl(node, environment, stdlib)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 1621, in evaluate_decl
    return evaluate_named_expression(node, node.name, node.type, node.expr, environment, stdlib)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/src/toil/wdl/wdltoil.py", line 1602, in evaluate_named_expression
    value = expression.eval(environment, stdlib)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/venv3.12/lib/python3.12/site-packages/WDL/Expr.py", line 129, in eval
    ans = self._eval(env, stdlib)
          ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/venv3.12/lib/python3.12/site-packages/WDL/Expr.py", line 1150, in _eval
    return f(self, env, stdlib)
           ^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/venv3.12/lib/python3.12/site-packages/WDL/StdLib.py", line 233, in __call__
    return self._call_eager(expr, [arg.eval(env, stdlib=stdlib) for arg in expr.arguments])
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/heaucques/Documents/toil/venv3.12/lib/python3.12/site-packages/WDL/StdLib.py", line 282, in _call_eager
    raise Error.EvalError(expr, msg) from exn
WDL.Error.EvalError: function evaluation failed, [Errno 2] No such file or directory: '/home/heaucques/Documents/wdl-conformance-tests/file:/home/heaucques/Documents/wdl-conformance-tests/foo.index'

┆Issue is synchronized with this Jira Story ┆Issue Number: TOIL-1657