Create rule to download and setup panX data

This will be based on the following code.

wget http://pangenome.tuebingen.mpg.de/dataset/Escherichia_coli/all_gene_alignments.tar.gz -O - | tar xzf -
cd all_gene_alignments
# delete all protein alignments
find . -name '*_aa_*' | xargs -I @ rm @
# filter into subfolders
# outnum generates the name of the output directory
outnum=0
# n is the number of files we have moved
n=0

# Go through all fasta files in the current directory
for f in *.fa.gz; do
   # Create new output directory if first of new batch of 4000
   if [ $n -eq 0 ]; then
      outdir="$outnum"
      mkdir "$outdir"
      ((outnum++))
   fi
   # Move the file to the new subdirectory
   mv "$f" "$outdir"

   # Count how many we have moved to there
   ((n++))

   # Start a new output directory if we have sent 4000
   [ $n -eq 4000 ] && n=0
done

mbhall88 / pandora_simulations

Create rule to download and setup panX data #1