Open bradfordcondon opened 6 years ago
Using two SRR files: SRR1734725 - fully expanded leaves
size = 367.8Mb
SRR1734724 - Mature ovules
size = 392.8Mb
fastqc:
for f in ../raw_reads/*.fastq
do
filename=$(basename "$f")
base="${filename%%.fastq*}"
echo "filename $filename base $base"
mkdir $base.fastQC
/staton/software/FastQC-v0.11.5/FastQC/fastqc -o $base.fastQC $f >& $base.fastQC.out &
done
trimmomatic:
for f in ../raw_reads/*.fastq
do
BASE=$( basename $f | sed 's/.fastq*//g')
echo "f $f"
echo "BASE $BASE"
module load java/jdk8u181
java -jar /staton/software/Trimmomatic-0.38/trimmomatic-0.38.jar SE -phred33 $f $BASE.trim.fastq ILLUMINACLIP:/staton/software/Trimmomatic-0.38/adapters/all.fa:2:40:15 LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
done
RCorrector:
perl /staton/software/rcorrector/run_rcorrector.pl -s /staton/projects/undergrads/ginkgo_biloba/2_trimmomatic/SRR1734725_1.trim.fastq,/staton/projects/undergrads/ginkgo_biloba/2_trimmomatic/SRR1734724_1.trim.fastq -od /staton/projects/undergrads/ginkgo_biloba/3_RCorrector
trinity:
export PATH=$PATH:/staton/software/bowtie2-2.2.7/
module load java/jdk8u181
/staton/software/trinityrnaseq-Trinity-v2.4.0/Trinity --seqType fq --single /staton/projects/undergrads/ginkgo_biloba/3_RCorrector/SRR1734724_1.trim.cor.fq,/staton/projects/undergrads/ginkgo_biloba/3_RCorrector/SRR1734725_1.trim.cor.fq --CPU 6 --max_memory 20G --output se_trinity_out_dir
Skewer:
for R in /staton/projects/undergrads/ginkgo_biloba/raw_reads/*.fastq
do
BASE=$( basename $R | sed 's/_1.fastq*//g')
echo "R $R"
echo "BASE $BASE"
/staton/software/skewer/skewer \
-x /staton/software/Trimmomatic-0.38/adapters/all.fa \
-l 30 \
$R \
-o $BASE \
>& $BASE.trim_output &
done
indexing:
/staton/software/STAR-2.6.1a/bin/Linux_x86_64/STAR \
--runMode genomeGenerate \
--genomeDir genomeDir \
--genomeFastaFiles ../4_Trinity/se_trinity_out_dir/Trinity.fasta &
Alignment:
for f in ../5_skewer/*.log
do
BASE=$( basename $f | sed 's/.log*//g')
echo "BASE $BASE"
/staton/software/STAR-2.6.1a/STAR \
--genomeDir genomeDir \
--readFilesIn ../5_skewer/$BASE.fastq \
--outSAMtype BAM SortedByCoordinate \
--outFileNamePrefix $BASE. &
done
HTseq:
for bam in ../6_star/*.bam
do
base=$( basename $bam | sed 's/.sorted.bam//g')
echo "bam $bam"
echo "base $base"
echo "--"
/staton/software/htseq-count \
--format=bam \
--order=pos \
--stranded=no \
--type=gene \
--idattr=ID \
$bam \
../4_Trinity/se_trinity_out_dir/Trinity.fasta.gff3 \
>$base.counts.txt \
2> $base.out &
echo "-------"
done
blast refinement:
/staton/software/ncbi-blast-2.7.1+/bin/blastp \
-query Trinity.fasta.transdecoder_dir/longest_orfs.pep \
-db /staton/libraries/uniprot/uniprot_sprot.fasta \
-max_target_seqs 1 \
-outfmt 6 \
-evalue 1e-5 \
-num_threads 10 > blastp.outfmt6
TransDecoder2:
/staton/software/TransDecoder-3.0.0/TransDecoder.Predict -t ../4_Trinity/se_trinity_out_dir/Trinity.fasta --retain_blastp_hits blastp.outfmt6 &
Transcriptome assembled, no issues so far!
Organism Page: https://www.hardwoodgenomics.org/organism/Ginkgo/biloba Publication: https://www.hardwoodgenomics.org/Publication/3732396 Transcriptome assembly: https://www.hardwoodgenomics.org/Transcriptome-assembly/3732397?tripal_pane=group_summary_tripalpane InterProScan annotation: https://www.hardwoodgenomics.org/InterProScan-annotation/3732398 Swissprot annotation: https://www.hardwoodgenomics.org/BLAST-annotation/3732399 trembl annotation: https://www.hardwoodgenomics.org/BLAST-annotation/3732400 CDS fasta loader: https://www.hardwoodgenomics.org/admin/tripal/tripal_jobs/view/828387 Peptide fasta loader: https://www.hardwoodgenomics.org/admin/tripal/tripal_jobs/view/828388 Publish tripal content: https://www.hardwoodgenomics.org/admin/tripal/tripal_jobs/view/828769 Trembl results loader: https://www.hardwoodgenomics.org/admin/tripal/tripal_jobs/view/831432 Swissprot results loader: https://www.hardwoodgenomics.org/admin/tripal/tripal_jobs/view/831436 IPS results loader: https://www.hardwoodgenomics.org/admin/tripal/tripal_jobs/view/831440
Swissprot:
#PBS -N swissprot_BLAST
#PBS -S /bin/bash
#PBS -j oe
#PBS -A ACF-UTK0011
#PBS -t 1-200
#PBS -l nodes=1:ppn=2
#PBS -l walltime=04:00:00
cd $PBS_O_WORKDIR
module load blast
blastx \
-query /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/raw_data/BLAST_split/Trinity.fasta.fixed.cds.$PBS_ARRAYID \
-db /lustre/haven/gamma/staton/library/uniprot/uniprot_sprot.fasta \
-out /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/BLAST/swissprot/g_biloba.$PBS_ARRAYID.xml \
-evalue 1e-5 \
-outfmt 5
trembl:
#PBS -N trembl_BLAST
#PBS -S /bin/bash
#PBS -j oe
#PBS -A ACF-UTK0011
#PBS -t 1-200
#PBS -l nodes=1:ppn=2
#PBS -l walltime=12:00:00
cd $PBS_O_WORKDIR
module load blast
blastx \
-query /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/raw_data/BLAST_split/Trinity.fasta.fixed.cds.$PBS_ARRAYID \
-db /lustre/haven/gamma/staton/library/uniprot/uniprot_trembl_plants_July_2018.fasta \
-out /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/BLAST/trembl/g_biloba_trembl.$PBS_ARRAYID.xml \
-evalue 1e-5 \
-outfmt 5
IPS:
#PBS -N g_biloba_ips
#PBS -A ACF-UTK0011
#PBS -S /bin/bash
#PBS -t 1-200
#PBS -j oe
#PBS -l nodes=1:ppn=4
#PBS -l walltime=6:30:00
cd $PBS_O_WORKDIR
module load python3
/lustre/haven/gamma/staton/software/interproscan-5.34-73.0/interproscan.sh \
-i /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/raw_data/IPS_split/Trinity_noAst.pep.$PBS_ARRAYID \
-f XML \
-d /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/IPS/xmls \
--disable-precalc \
--iprlookup \
--goterms \
--pathways \
--tempdir /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/IPS/TMP \
> /lustre/haven/gamma/staton/projects/undergrads/ginkgo_biloba/IPS/xmls/TMP/$PBS_ARRAYID.out
https://www.researchgate.net/publication/299471846_Global_comparative_analysis_of_expressed_genes_in_ovules_and_leaves_of_Ginkgo_biloba_L