Closed markziemann closed 3 years ago
example efetch for SRA data wget -O /mnt/md0/dee2/sradb/test.csv 'http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?save=efetch&db=sra&rettype=runinfo&term="transcriptomic"[Source] AND "Homo sapiens"[Organism]'
For GEO
rm esearch.xml
wget -O esearch.xml 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds&term=(("Escherichia coli"[porgn:__txid562] AND "gsm"[Filter])) AND "high throughput sequencing"[Platform Technology Type] &retmax=5000&usehistory=y'
MCID=$(grep WebEnv esearch.xml | sed 's/<WebEnv>/\t/' | cut -f2 | cut -d '<' -f1)
echo $MCID
rm results.xml
wget -O results.xml "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=gds&version=2.0&query_key=1&WebEnv=${MCID}"
wc -l results.xml
grep -A5 Accession results.xml | egrep '(Accession|GSE)' | cut -d '>' -f2 | cut -d '<' -f1 | paste - - | sed 's/\t/\tGSE/' | awk '{OFS="\t"} {print $2,$1}' > ecoli_geo.tsv
ftp://ftp.ncbi.nlm.nih.gov/sra/reports/Metadata/
bsdtar -x -f ./../dee2/frontend/server/db_utils/NCBI_SRA_Metadata_Full_20201006.tar.gz -O | grep -C3 DATE