Open RaymondS1 opened 4 years ago
Cherry to Peach Blast
spack load blast-plus@2.9.0
blastp \
-query /staton/projects/alternative_splicing/prunus_avium/raw_reads/Prunus_avium_v1.0.a1_protein.fasta \
-db /staton/projects/alternative_splicing/prunus_persica/raw_reads/Ppersica_298_v2.1.protein.fa \
-out avium_to_persica_blast.txt \
-evalue 1e-5 \
-outfmt 6 \
-max_target_seqs 1 \
-num_threads 10
Peach to cherry blast
spack load blast-plus@2.9.0
blastp \
-query /staton/projects/alternative_splicing/prunus_persica/raw_reads/Ppersica_298_v2.1.protein.fa \
-db /staton/projects/alternative_splicing/prunus_avium/raw_reads/Prunus_avium_v1.0.a1_protein.fasta \
-out persica_to_avium_blast.txt \
-evalue 1e-5 \
-outfmt 6 \
-max_target_seqs 1 \
-num_threads 10 &
Counting Genes
import re
import sys
a_file = sys.argv[1]
output_file = sys.argv[2]
gene_list =[]
numb_lines = 0
with open(a_file, "r") as f:
for line in f:
try:
gene = re.findall('Prupe.\dG\d+_v2.0', line)[0]
gene_list.append(gene)
numb_lines += 1
except:
gene = None
gene_list = list(dict.fromkeys(gene_list))
print("number of lines:", numb_lines)
print("number of gene:", len(gene_list))
my_file = open(output_file, "w")
my_file.write(str(gene_list))
my_file.close
for f in *ioe
do
base=$(basename $f | sed 's/.ioe//g')
echo "filename $f base $base"
python gene_counts.py $f $base.gene_list.txt
done
Cherry Data ftp://ftp.bioinfo.wsu.edu/www.rosaceae.org/Prunus_avium/Prunus_avium-genome.v1.0.a1/genes
Peach Data https://genome.jgi.doe.gov/portal/pages/dynamicOrganismDownload.jsf?organism=Ppersica