molgenis / molgenis-compute

MOLGENIS Compute is a framework for bioinformatics which enables large scale data and computational workflow management in a distributed execution environment.
http://wiki.gcc.rug.nl/wiki/ComputeStart
GNU Lesser General Public License v3.0
4 stars 16 forks source link

Invalid parameter remapping in workflow.csv does not throw errors #282

Open mmterpstra opened 4 years ago

mmterpstra commented 4 years ago

Parameter remapping in workflow.csv does not throw errors when the target parameter does not exist. https://rawgit.com/molgenis/molgenis-compute/master/molgenis-compute-core/README.html with workflow.csv:

step,protocol,parameterMapping
step1,protocols/step1.sh,in=input
step2,protocols/step2.sh,wf=workflowName;date=creationDate;strings=step1.out

you can do this and it happily runs (molgenis never complains about pinkunicornsdancingonrainbows while is is never set anywhere):

step,protocol,parameterMapping
step1,protocols/step1.sh,in=input;pinkunicornsdancingonrainbows=input
step2,protocols/step2.sh,wf=workflowName;date=creationDate;strings=step1.out

For archival reasons

mmterpstra commented 4 years ago

workaround, partial should actually check the protocols for the parameters to remap instead of the parameterfiles:

...oldcode...

edit: now works on my pc:

#!/usr/bin/env python
import sys
import time

#use workflow
try:
    workflow = open(sys.argv[1])
except:
    print('Die cannot open file' + sys.argv[1])
    exit

def info(info):
  print >> sys.stderr, "## %s ## INFO ## %s"  % (time.asctime(),info)

def errornofail(error):
  print >> sys.stderr, "## %s ## ERROR ## %s"  % (time.asctime(),error)

def error(error):
  print >> sys.stderr, "## %s ## ERROR ## %s"  % (time.asctime(),error)
  exit(1)

info("Analysing " + sys.argv[1])

protocolfiles = []

try:
    steps = {}
    parameterseen = {}
    for i, line in enumerate(workflow):
        if i == 0: 
            continue
        line=line.rstrip()
        csv= line.split(",")
    if csv[0].startswith("#"):
            continue
        if csv[0] in steps:
            steps[csv[0]]=steps[csv[0]]+1
        else:
            steps[csv[0]]=1
    protocolfiles.append(csv[1])
        #print csv[0]
        #print("node "+ csv[0])
        remap=csv[2].split(";")
        #print edges
        for element in remap:
            element.rstrip()
            if("=" in element and  element != ""):
                parameterseen[element.split("=")[0]]=0
finally:
    workflow.close()

#for parameterfile in sys.argv[2::]:
#    info('processing ' + parameterfile)
#    try:
#        parameters = open(parameterfile)
#    except:
#        print('Die cannot open file ' + parameterfile)
#        exit
#    try:
#        for i, line in enumerate(parameters):
#            line = line.rstrip()
#            csv = line.split(",")
#            if(csv[0] in parameterseen):
#                parameterseen[csv[0]]=parameterseen[csv[0]]+1
#    finally:
#        parameters.close()

for protocolfile in protocolfiles:
    info('processing ' + protocolfile)
    try:
        protocol = open(protocolfile)
    except:
        print('Die cannot open file ' + protocolfile)
        exit
    try:
        for i, line in enumerate(protocol):
            line = line.rstrip()
            if not(line.startswith("#list") or line.startswith("#string")):
                continue
            ssv = line.split(" ")
            csv = ssv[1].split(",")
            for val in csv:
                if(val in parameterseen):
                    parameterseen[val]+=1
    finally:
    protocol.close()

fail=0
for key in parameterseen.keys():
    if(parameterseen[key] < 1):
        errornofail("Parameter '" + key + "' not in parameter files")
        fail=1
if(fail == 1):
    exit(1)