Perhaps a personal wish, but I often use the -taxidlist option to limit the blast to a certain taxonomical section of the database (and we used it for mussels). I see you have the negative taxids option, but not the "positive" option. Suggest adding this functionality.
example of previous blast function, in case it helps
> blast.min.bas2
function(infasta,refdb,blast_exec="blastn",wait=T,taxidlimit=NULL,inverse=F,ncbiTaxDir=NULL,overWrite=F,out=NULL,
opts=c("-task","blastn","-outfmt", "6 qseqid evalue pident qcovs saccver staxid ssciname sseq","-num_threads", 64,
"-max_target_seqs", 100, "-max_hsps",1,"-word_size", 11, "-perc_identity", 50,
"-qcov_hsp_perc", 98, "-gapopen", 0, "-gapextend", 2, "-reward", 1, "-penalty", -1)){
t1<-Sys.time()
require(processx)
if(!is.null(taxidlimit)) if(is.null(ncbiTaxDir)) stop("to use taxidlimit, ncbiTaxDir must be supplied")
if(is.null(out)) out<-paste0(gsub(".fasta", ".blast.txt",infasta))
outdircheck<-
if(overWrite==F) if(file.exists(out)) stop("The following file already exists ", out, "Use overWrite=T to overwrite")
if(!is.null(taxidlimit)){
h<-list()
#generate children of taxids and store in file
taxid.list<-list()
taxids_fileA<-paste0("taxids",as.numeric(Sys.time()),".txt")
for(i in 1:length(taxidlimit)){
system2(command = "taxonkit",args = c("list", "--ids", taxidlimit[i], "--indent", '""',"--data-dir",ncbiTaxDir)
,wait=T,stdout = taxids_fileA)
taxid.list[i]<-read.table(taxids_fileA)
}
write.table(unlist(taxid.list),taxids_fileA,row.names = F,quote = F,col.names = F,)
#change options to include taxidlimit
opts<-c(opts,"-taxidlist",taxids_fileA)
if(inverse) opts<-gsub("-taxidlimit","-negative_taxids",opts)
}
#run BLAST
error.log.file<-paste0("blast.error.temp.processx.file",as.numeric(Sys.time()),".txt")
h<-process$new(command = blast_exec, args=c("-query", infasta, "-db",refdb,opts, "-out", out),echo_cmd = T,
stderr = error.log.file)
Sys.sleep(time = 2)
#report PID
message(paste("PID:",h$get_pid()))
#check immediate exit status
exits<-h$get_exit_status()
if(1 %in% exits){
message("************
There was a problem with ", infasta[match(1,exits)], ", aborting blast
************")
print(grep("Error",readLines(error.log.file),value = T))
h$kill()
}
if(wait==T){
h$wait()
message(readLines(error.log.file))
message("exit_status=",exits)
file.remove(error.log.file)
if(!is.null(taxidlimit)) file.remove(taxids_fileA)
}
headers<-paste0(paste("'1i",paste(unlist(strsplit(opts[match("-outfmt",opts)+1]," "))[-1],collapse = "\t"),collapse = "\t"),"'")
system2("sed",c("-i", headers, out),wait = T)
t2<-Sys.time()
t3<-round(difftime(t2,t1,units = "mins"),digits = 2)
message(c("All blasts complete in ",t3," mins."))
return(h)
}
Perhaps a personal wish, but I often use the -taxidlist option to limit the blast to a certain taxonomical section of the database (and we used it for mussels). I see you have the negative taxids option, but not the "positive" option. Suggest adding this functionality.
example of previous blast function, in case it helps