Fnyasimi / MARS-Update-Pipeline

0 stars 2 forks source link

bed2chipseq.sh #1

Open kipkurui opened 4 years ago

kipkurui commented 4 years ago

`#!/usr/bin/env bash

function get_fasta(){

#check the size of the input sequence of reduce the processing time form large files (assumes sorted file)

lenbed=$(wc -l $bed_in | cut -f1 -d " ")
lenuse=$(($lenbed/10))
if [ "$lenbed" -gt 5000 ];
    then
        cut -f 1,2,3 $bed_in | head -$lenuse | bed-widen -width $len  >$bed_wide.bed
 else
        cut -f 1,2,3 $bed_in | bed-widen -width 100  >$bed_wide.bed
fi

#Extract negative bed and the fasta
python extractnegative.py $bed_wide.bed $bed_wide.negbed 500

fastaFromBed -tab -fi $hg -bed $bed_wide.bed -fo $bed_wide.fas
fastaFromBed -tab -fi $hg -bed $bed_wide.negbed -fo $bed_wide.negfas

cut -f 4 $bed_in >/tmp/f1
cut -f 1 $bed_wide.fas >/tmp/f2
cut -f 2 $bed_wide.fas >/tmp/f3
paste /tmp/f2 /tmp/f1 /tmp/f3  >$bed_wide.fas

cut -f 4 $bed_in >/tmp/f1
cut -f 1 $bed_wide.negfas >/tmp/f2
cut -f 2 $bed_wide.negfas >/tmp/f3
paste /tmp/f2 /tmp/f1 /tmp/f3  >$bed_wide.negfas

python removemasked.py $bed_wide.fas $bed_wide.fa
python removemasked.py $bed_wide.negfas $bed_wide.negfa

# use the length of the available sequences to determine the size of test and negative sequences

lenfa=$(wc -l $bed_wide.fa | cut -f1 -d " ")
len_negfa=$(wc -l $bed_wide.negfa | cut -f1 -d " ")
lenuse=$(($lenbed/20))

if [ $lenuse -gt 500 ];
    then
        lenuse=$lenuse
 else
        lenuse=500
 fi

if [ "$lenfa" -gt $lenuse ] &&  [ "$len_negfa" -gt $lenuse ];
    then
        cutoff=$lenuse
elif [ "$lenfa" -lt "$len_negfa" ] ;
    then
        cutoff=$lenfa
 else
        cutoff=$len_negfa
fi

head -$cutoff $bed_wide.fa >$bed_wide.posneg
head -$cutoff $bed_wide.negfa >>$bed_wide.posneg

#clean up the temporary files
#rm $bed_wide.neg*
#rm $bed_wide.fa*
rm /tmp/f*

} bed_in=$1 bed_wide=$2 hg=$3 len=${4:-100} #Use a default f 100 if not provided get_fasta`