cdanielmachado / carveme

CarveMe: genome-scale metabolic model reconstruction
Other
159 stars 54 forks source link

Multi-process processing error #211

Open green34567 opened 1 month ago

green34567 commented 1 month ago

when I use "carve -r ./*.faa ......",an error occurred: AttributeError: Can't pickle local object 'main..f' It shows that in CarveMe's code, there is a problem that cannot be serialized during parallel processing. Specifically, it's an error that occurs when trying to send a local function object through multiprocessing. I tried modifying the end of carve.py (after def main()) !!problem solved!!

def f(x, input_type, outputfile, diamond_args, universe, universe_file, ensemble_size, verbose, flavor, gapfill, init, mediadb, default_score, uptake_score, soft_score, soft, hard, reference, ref_score):
    maincall(
        inputfile=x,
        input_type=input_type,
        outputfile=outputfile,
        diamond_args=diamond_args,
        universe=universe,
        universe_file=universe_file,
        ensemble_size=ensemble_size,
        verbose=verbose,
        flavor=flavor,
        gapfill=gapfill,
        blind_gapfill=False,
        init=init,
        mediadb=mediadb,
        default_score=default_score,
        uptake_score=uptake_score,
        soft_score=soft_score,
        soft=soft,
        hard=hard,
        reference=reference,
        ref_score=ref_score,
        recursive_mode=True
    )

def main():
    parser = argparse.ArgumentParser(description="Reconstruct a metabolic model using CarveMe",
                                     formatter_class=argparse.RawTextHelpFormatter)

    parser.add_argument('input', metavar='INPUT', nargs='+',
                        help="Input (protein fasta file by default, see other options for details).\n" +
                             "When used with -r an input pattern with wildcards can also be used.\n" +
                             "When used with --refseq an NCBI RefSeq assembly accession is expected."
                        )

    input_type_args = parser.add_mutually_exclusive_group()
    input_type_args.add_argument('--dna', action='store_true', help="Build from DNA fasta file")
    input_type_args.add_argument('--egg', action='store_true', help="Build from eggNOG-mapper output file")
    input_type_args.add_argument('--diamond', action='store_true', help=argparse.SUPPRESS)
    input_type_args.add_argument('--refseq', action='store_true', help="Download genome from NCBI RefSeq and build")

    parser.add_argument('--diamond-args', help="Additional arguments for running diamond")
    parser.add_argument('-r', '--recursive', action='store_true', dest='recursive', help="Bulk reconstruction from folder with genome files")
    parser.add_argument('-o', '--output', dest='output', help="SBML output file (or output folder if -r is used)")

    univ = parser.add_mutually_exclusive_group()
    univ.add_argument('-u', '--universe', dest='universe', help="Pre-built universe model (default: bacteria)")
    univ.add_argument('--universe-file', dest='universe_file', help="Reaction universe file (SBML format)")

    sbml = parser.add_mutually_exclusive_group()
    sbml.add_argument('--cobra', action='store_true', help="Output SBML in old cobra format")
    sbml.add_argument('--fbc2', action='store_true', help="Output SBML in sbml-fbc2 format")

    parser.add_argument('-n', '--ensemble', type=int, dest='ensemble', help="Build model ensemble with N models")
    parser.add_argument('-g', '--gapfill', dest='gapfill', help="Gap fill model for given media")
    parser.add_argument('-i', '--init', dest='init', help="Initialize model with given medium")
    parser.add_argument('--mediadb', help="Media database file")
    parser.add_argument('-v', '--verbose', action='store_true', dest='verbose', help="Switch to verbose mode")
    parser.add_argument('-d', '--debug', action='store_true', dest='debug', help="Debug mode: writes intermediate results into output files")
    parser.add_argument('--soft', help="Soft constraints file")
    parser.add_argument('--hard', help="Hard constraints file")
    parser.add_argument('--reference', help="Manually curated model of a close reference species.")
    parser.add_argument('--solver', help="Select MILP solver. Available options: cplex [default], gurobi.")
    parser.add_argument('--default-score', type=float, default=-1.0, help=argparse.SUPPRESS)
    parser.add_argument('--uptake-score', type=float, default=0.0, help=argparse.SUPPRESS)
    parser.add_argument('--soft-score', type=float, default=1.0, help=argparse.SUPPRESS)
    parser.add_argument('--reference-score', type=float, default=0.0, help=argparse.SUPPRESS)
    parser.add_argument('--blind-gapfill', action='store_true', help=argparse.SUPPRESS)

    args = parser.parse_args()

    if args.gapfill and args.ensemble:
        parser.error('Gap fill and ensemble generation cannot currently be combined (not implemented yet).')

    if (args.soft or args.hard) and args.ensemble:
        parser.error('Soft/hard constraints and ensemble generation cannot currently be combined (not implemented yet).')

    if args.mediadb and not args.gapfill:
        parser.error('--mediadb can only be used with --gapfill')

    if args.recursive and args.refseq:
        parser.error('-r cannot be combined with --refseq')

    if args.egg:
        input_type = 'eggnog'
    elif args.dna:
        input_type = 'dna'
    elif args.diamond:
        input_type = 'diamond'
    elif args.refseq:
        input_type = 'refseq'
    else:
        input_type = 'protein'

    if args.fbc2:
        flavor = 'fbc2'
    elif args.cobra:
        flavor = 'cobra'
    else:
        flavor = config.get('sbml', 'default_flavor')

    if args.solver:
        set_default_solver(args.solver)

    first_run_check()

    if not args.recursive:
        if len(args.input) > 1:
            parser.error('Use -r when specifying more than one input file')

        maincall(
            inputfile=args.input[0],
            input_type=input_type,
            outputfile=args.output,
            diamond_args=args.diamond_args,
            universe=args.universe,
            universe_file=args.universe_file,
            ensemble_size=args.ensemble,
            verbose=args.verbose,
            debug=args.debug,
            flavor=flavor,
            gapfill=args.gapfill,
            blind_gapfill=False,
            init=args.init,
            mediadb=args.mediadb,
            default_score=args.default_score,
            uptake_score=args.uptake_score,
            soft_score=args.soft_score,
            soft=args.soft,
            hard=args.hard,
            reference=args.reference,
            ref_score=args.reference_score
        )

    else:
        with Pool() as p:
            p.starmap(f, [(x, input_type, args.output, args.diamond_args, args.universe, args.universe_file, args.ensemble, args.verbose, flavor, args.gapfill, args.init, args.mediadb, args.default_score, args.uptake_score, args.soft_score, args.soft, args.hard, args.reference, args.reference_score) for x in args.input])

if __name__ == '__main__':
    main()
xiangjianmao commented 1 month ago

That's great!I have face the same error, and sovled the problem obeying your idea. Thanks!