Open DCGenomics opened 6 years ago
The objective would be to generate a matrix for datasets and plasmids?
magicblast -db test.fasta -sra SRR6227128 -perc_identity 90 > testout.foo &
magicblast -db test.fasta -sra SRR6227128 -no_unaligned -splice F -score 50 -outfmt tabular
blastn -db S_aureus_gapless.fasta -query /panfs/pan1.be-md.ncbi.nlm.nih.gov/product_manager_research_projects/richard_copin/contigs/matching_contigs_AB254848.fasta -parse_deflines -evalue .0000000001 -outfmt 6 -out sample_contigs_into_gapless_S_aureus_genomes.blastout
genome_getter.sh
for i in cat S_aureus_gapless_assemblies
; do wget esearch -db assembly -query "$i" | efetch -format docsum | xtract -pattern DocumentSummary -element FtpPath_GenBank | awk -F"/" '{print $0"/"$NF"_genomic.fna.gz"}'
; done
gunzip *.gz
https://www.ncbi.nlm.nih.gov/genome/plasmids/154?
awk -F"\t" '{print $7}' genomes_plasmids.txt -- for genbank $7, for refseq $6
esearch -db nucleotide -query CP007178 | efetch -format fasta > test.fasta
makeblastdb -dbtype nucl -in test.fasta