Open markziemann opened 2 years ago
library("tictoc") library(XML) library(reutils)
tic() eres <- esearch("Escherichia coli[orgn] and transcriptomic[Source] and public[Access] ", db="sra",retmax=999000) str(uid(eres)) esum <- esummary(eres) econtent <- content(esum, "parsed") runvec <- econtent$Runs runvec <- gsub("><",">><<",runvec) runvec <- unlist(strsplit(runvec,"><")) runs <- lapply( runvec ,function(x) { as.vector(xmlToList(x)) } ) runs <- do.call(rbind,runs) toc()
simpler
pysradb search --organism="Escherichia coli" --source="transcriptomic" --max=999000 > ecoli.tsv
awk '{print $(NF-2)}' ecoli.tsv > ecoli_runs.tsv
This looks like a more stable alternative https://github.com/saketkc/pysradb