hammerlab / biokepi

Bioinformatics Ketrew Pipelines
Apache License 2.0
27 stars 4 forks source link

Installation of Mosaik is broken #146

Open smondet opened 8 years ago

smondet commented 8 years ago

On CentOS 6.x I run into https://github.com/wanpinglee/MOSAIK/issues/17

On CentOS 7.x I run into https://github.com/wanpinglee/MOSAIK/issues/23

On both I run into https://github.com/wanpinglee/MOSAIK/issues/22 which breaks this: https://github.com/hammerlab/biokepi/blob/f364d90975a1fa87b4bdf2b29d32d2ef0325a270/src/lib/tool_providers.ml#L75 (as && is not enough to detect build failures)

Things to consider:

smondet commented 8 years ago

I got at least this working on Linux:

  let mosaik_binary_tool ~host ~meta_playground =
    let open Ketrew.EDSL in
    let open Biokepi.Run_environment in
    let binary_url =
      "https://storage.googleapis.com/google-code-archive-downloads/v2/\
       code.google.com/mosaik-aligner/MOSAIK-2.2.3-Linux-x64.tar" in
    let install_path = meta_playground // "mosaik-linux-64" in
    let network_files_url =
      (* We also need the source tar because the `.ann` network files
         are not in the binary distribution *)
      "https://mosaik-aligner.googlecode.com/files/MOSAIK-2.2.3-source.tar" in
    let get_untar_and_cd url =
      let archive = Filename.basename url in
      Program.(
        Biokepi.Tool_providers.download_url_program url
        && shf "tar xvf %s" archive
        && shf "cd %s" (Filename.chop_extension archive)
      ) in
    let ensure =
      let product =
        list_of_files ~host [
          install_path // "MosaikAligner";
          install_path // "MosaikBuild";
          install_path // "MosaikJump";
          install_path // "MosaikText";
          install_path // "pe.ann";
          install_path // "se.ann";
        ] in
      workflow_node product
        ~name:(sprintf "Install MOSAIK (Linux 64 binary)")
        ~edges:[
          on_failure_activate
            (Biokepi.Tool_providers.rm_path ~host install_path);
        ]
        ~make:(
          daemonize ~using:`Python_daemon ~host
            Program.(
              shf "mkdir -p %s" install_path
              && shf "cd %s" install_path
              && get_untar_and_cd binary_url
              && sh "cp Mosaik* ../"
              && sh "cd .."
              && get_untar_and_cd network_files_url
              && sh "cp networkFile/*pe.ann ../pe.ann"
              && sh "cp networkFile/*se.ann ../se.ann"
              && sh "echo Done"
            ))
    in
    Tool.create Tool.Default.mosaik ~ensure 
      ~init:(
        Program.(
          shf "export PATH=%s:$PATH" install_path
          && shf "export MOSAIK_PE_ANN=%s/pe.ann" install_path
          && shf "export MOSAIK_SE_ANN=%s/se.ann" install_path
        ))

And then augmenting the default toolkit:

    let toolkit =
      let default =
        Biokepi.Tool_providers.default_toolkit ()
          ~host ~meta_playground
          ~gatk_jar_location:(fun () -> gatk_jar_location)
          ~mutect_jar_location:(fun () -> mutect_jar_location) in
      let mosaik = mosaik_binary_tool ~host ~meta_playground in
      Tool.Kit.{ tools = mosaik :: default.tools }  in