Open ruicatxiao opened 4 years ago
guppy_basecaller -i raw_multi -s guppy4fastq \ -c dna_r9.4.1_450bps_hac.cfg \ --device auto \ --gpu_runners_per_device 5 \ --cpu_threads_per_caller 4 \ --fast5_out
nanopolish index -d raw_multi/ dri13.fastq -v echo "Indexing done" echo " "
minimap2 -a -x map-ont dri13.fa dri13.fastq | samtools sort -T tmp -o "$NAME.sorted.bam" -@ 30 samtools index "$NAME.sorted.bam" module del SAMtools/1.8-foss-2018a module del minimap2/2.17-foss-2018a echo "Alignment done" echo " "
nanopolish call-methylation -t 20 -r "$NAME.fastq" \ -b "$NAME.sorted.bam" -g dri13.fa \ --methylation=cpg > "$NAME.cpg.tsv"
nanopolish call-methylation -t 20 -r "$NAME.fastq" \ -b "$NAME.sorted.bam" -g dri13.fa \ --methylation=gpc > "$NAME.gpc.tsv"
nanopolish call-methylation -t 20 -r "$NAME.fastq" \ -b "$NAME.sorted.bam" -g dri13.fa \ --methylation=dam > "$NAME.dam.tsv"
nanopolish call-methylation -t 20 -r "$NAME.fastq" \ -b "$NAME.sorted.bam" -g dri13.fa \ --methylation=dcm > "$NAME.dcm.tsv"
calculate_methylation_frequency.py "$NAME.cpg.tsv" > "$NAME.cpgfrequency.tsv"
calculate_methylation_frequency.py "$NAME.gpc.tsv" > "$NAME.gpcfrequency.tsv"
calculate_methylation_frequency.py "$NAME.dam.tsv" > "$NAME.damfrequency.tsv"
calculate_methylation_frequency.py "$NAME.dcm.tsv" > "$NAME.dcmfrequency.tsv"
echo "Average CpG methylation % is " tail -n +2 "$NAME.cpgfrequency.tsv" | awk -F '\t' '{sum+=$7 } END {print(sum/NR)}'
echo "Average GpC methylation % is " tail -n +2 "$NAME.gpcfrequency.tsv" | awk -F '\t' '{sum+=$7 } END {print(sum/NR)}'
echo "Average Dam methylation % is " tail -n +2 "$NAME.damfrequency.tsv" | awk -F '\t' '{sum+=$7 } END {print(sum/NR)}'
echo "Average Dcm methylation % is " tail -n +2 "$NAME.dcmfrequency.tsv" | awk -F '\t' '{sum+=$7 } END {print(sum/NR)}
awk -F '\t' '$5>30' OFS='\t' DRI13_NANOPOLISH.dam_freq.tsv > dri13_nanopolish_dam_filtered_1.tsv
tail -n +2 dri13_nanopolish_dam_filtered_1.tsv | awk -F '\t' '{sum+=$7 } END {print(sum/NR)}'
tail -n +2 dri13_nanopolish_dam_filtered_1.tsv | awk -F '\t' '{sum+=$5 } END {print(sum/NR)}'
tail -n +2 dri13_nanopolish_dam_filtered_1.tsv | awk -F '\t' '{sum+=$4 } END {print(sum)}'
tail -n+2 dri13_nanopolish_dam_filtered_1.tsv | awk -v OFS='\t' '{ print $1, $2, $3, $7}' > dri13_nanopolish_dam_filtered_1.bed
bedGraphToBigWig dri13_nanopolish_dam_filtered_1.bed ../chrm.sizes dri13_nanopolish_dam.bw
awk -F '\t' '{a[$7]++;} END{for(i in a) print a[i]" "i}' dri13.gff
gff2bed < dri13.gff > dri13.bed awk -F '\t' -v OFS='\t' '{ print $1, $2, $3, $6, $8}' dri13.bed > dri13_slim.bed
awk -F '\t' '$5 == "ncRNA"' dri13.gff | wc -l
awk -F '\t' '$5 == "CRISPR"' dri13_slim.bed > dri13_crispr.bed
awk -F '\t' '$4 == "+"' dri13_CDS.gff | wc -l
awk -F '\t' '$4 == "+"' dri13_slim.bed > dri13_fwd.bed
awk -F '\t' '$4 == "+"' dri13_repeat.bed > dri13_repeat_fwd.bed awk -F '\t' '$4 == "-"' dri13_repeat.bed > dri13_repeat_rev.bed
multiBigwigSummary bins -bs 100000 \ -b dri13_nanopolish_cpg.bw dri13_nanopolish_dcm.bw dri13_nanopolish_dam.bw \ -o dri13_cg_dcm_dam_100kb.npz -p 4 --outRawCounts dri13__cg_dcm_dam_100kb.tab -v
multiBigwigSummary bins -bs 100000 \ -b dri13_tombo_chh_plus.bw dri13_tombo_chh_minus.bw \ -o dri13_chh_100kb.npz -p 4 --outRawCounts dri13_chh_100kb.tab -v
plotCoverage -b DRI13_NANOPOLISH.sorted.bam \ --plotFile dri13_coverage.svg \ -n 100000 \ --plotTitle "100kbp Coverage" \ --outRawCounts dri13_coverage.tab \ --ignoreDuplicates \ --skipZeros -p 4
computeMatrix scale-regions \ -S dri13_nanopolish_cpg.bw dri13_nanopolish_dcm.bw dri13_nanopolish_dam.bw \ -R ../bed/dri13_CDS.bed \ -b 500 -a 500 -m 1000 -bs 25 \ -o results_dri13_metaplot1.gz \ --skipZeros -p 8 \ --missingDataAsZero
plotProfile \ -m results_dri13_metaplot1.gz --dpi 600 \ -out results_dri13_metaplot1.png --yMin 0 --yMax 0.02 \ --colors "#fb0106" "#107f40" "#0000fe"
computeMatrix scale-regions \ -S dri13_CHH_rev.bw \ -R ../bed/dri13_CDS_rev.bed \ -b 500 -a 500 -m 1000 -bs 25 \ -o results_dri13_metaplot3.gz \ --skipZeros -p 8 \ --missingDataAsZero
plotProfile \ -m results_dri13_metaplot2.gz --dpi 600 \ -out results_dri13_metaplot2.png --yMin 0 --yMax 0.02 \ --colors "#20fefe"
tombo detect_modifications de_novo \ --fast5-basedirs raw_single \ --statistics-file-basename dri13_denovo \ --coverage-dampen-counts 2 0 \ --processes 8 \ --minimum-test-reads 3
tombo text_output browser_files \ --file-types valid_coverage --statistics-filename dri13_denovo.tombo.stats \ --genome-fasta dri13.fa --browser-file-basename dri13 \ --fast5-basedirs raw_single
tail -n+3 dri13.valid_coverage.plus.wig | awk '{sum+=$2 } END {print(sum/NR)}'
tombo text_output browser_files \ --statistics-filename dri13_denovo.tombo.stats \ --browser-file-basename dri13 \ --genome-fasta dri13.fa \ --file-types dampened_fraction \ --motif-description CHH:1:CHH
tail -n+3 dri13.dampened_fraction_modified_reads.CHH.plus.wig | awk '{sum+=$2 } END {print(sum/NR)}' tail -n+3 dri13.dampened_fraction_modified_reads.CHH.minus.wig | awk '{sum+=$2 } END {print(sum/NR)}'
tombo text_output signif_sequence_context \ --statistics-filename dri13_denovo.tombo.stats \ --genome-fasta dri13.fa --num-regions 1000 --num-bases 100
tombo plot motif_with_stats \ --fast5-basedirs raw_single --statistics-filename dri13_denovo.tombo.stats \ --genome-fasta dri13.fa --motif CG \ --plot-standard-model --pdf-filename dri13_cg_plot.pdf \ --overplot-threshold 30
multiBigwigSummary BED-file \ -b dri13_nanopolish_cpg.bw dri13_nanopolish_dcm.bw dri13_nanopolish_dam.bw dri13_tombo_chh_plus.bw dri13_tombo_chh_minus.bw \ --BED ../dri13_CDS.bed \ -out dri13_geneanchor_allmet.npz --outRawCounts dri13_geneanchor_allmet.tab
"#fb0106" red cpg
"#107f40" moss dcm
"#20fefe" cyan chh fwd
"#7f007f" purple chh rev
"#0000fe" blue dam
This is the collection of all microbial methylome analysis sources codes.
(Saved for linking to future publications)