Open drseb opened 7 years ago
@drseb A a quick and dirty zeroth approximation:
ack-grep "'url':" dipper/sources/*.py
dipper/sources/AnimalQTLdb.py
55: 'url': AQDL + '/tmp/QTL_Btau_4.6.gff.txt.gz'},
59: # 'url': AQDL + '/tmp/QTL_UMD_3.1.gff.txt.gz'},
62: 'url': AQDL + '/export/KSUI8GFHOT6/cattle_QTLdata.txt'},
65: 'url': AQDL + '/tmp/QTL_GG_4.0.gff.txt.gz'},
68: 'url': AQDL + '/export/KSUI8GFHOT6/chicken_QTLdata.txt'},
71: 'url': AQDL + '/tmp/QTL_SS_10.2.gff.txt.gz'},
74: 'url': AQDL + '/export/KSUI8GFHOT6/pig_QTLdata.txt'},
77: 'url': AQDL + '/tmp/QTL_OAR_3.1.gff.txt.gz'},
80: 'url': AQDL + '/export/KSUI8GFHOT6/sheep_QTLdata.txt'},
83: 'url': AQDL + '/tmp/QTL_EquCab2.0.gff.txt.gz'},
86: 'url': AQDL + '/export/KSUI8GFHOT6/horse_QTLdata.txt'},
89: 'url': AQDL + '/export/KSUI8GFHOT6/rainbow_trout_QTLdata.txt'},
93: 'url': AQDL + '/export/trait_mappings.csv'}
dipper/sources/BioGrid.py
31: 'url': BGDL + '/BIOGRID-ALL-LATEST.mitab.zip'},
34: 'url': BGDL + '/BIOGRID-IDENTIFIERS-LATEST.tab.zip'}
dipper/sources/ClinVar.py
33: 'url': CVDL + '/variant_summary.txt.gz'
37: 'url': CVDL + '/var_citations.txt'
dipper/sources/ClinVarXML_alpha.py
51: 'url': 'ftp://ftp.ncbi.nlm.nih.gov/pub/clinvar/xml/ClinVarFullRelease_00-latest.xml.gz'}
dipper/sources/CTD.py
64: 'url': 'http://ctdbase.org/reports/CTD_chemicals_diseases.tsv.gz'
68: 'url': 'http://ctdbase.org/reports/CTD_genes_pathways.tsv.gz'
72: 'url': 'http://ctdbase.org/reports/CTD_genes_diseases.tsv.gz'
dipper/sources/Decipher.py
36: 'url': 'https://decipher.sanger.ac.uk/files/ddd/ddg2p.zip'}
dipper/sources/EOM.py
48: 'url': 'https://raw.githubusercontent.com/obophenotype/human-phenotype-ontology/master/src/mappings/hp-to-eom-mapping.tsv'
dipper/sources/FlyBase.py
70: 'url': 'ftp://ftp.flybase.net/releases/current/precomputed_files/human_disease/allele_human_disease_model_data_fb_*.tsv.gz'
dipper/sources/GeneOntology.py
45: 'url': GOGA+'/goa_dog.gaf.gz'},
48: 'url': GOGA+'/gene_association.fb.gz'},
51: 'url': GOGA+'/gene_association.zfin.gz'},
54: 'url': GOGA+'/gene_association.mgi.gz'},
57: 'url': GOGA+'/gene_association.rgd.gz'},
60: 'url': GOGA+'/gene_association.wb.gz'},
63: 'url': GOGA+'/goa_pig.gaf.gz'},
66: 'url': GOGA+'/goa_chicken.gaf.gz'},
69: 'url': GOGA+'/goa_human.gaf.gz'},
72: 'url': GOGA+'/goa_cow.gaf.gz'},
76: # 'url': 'ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/gene_association.goa_uniprot.gz'},
79: 'url': 'http://www.geneontology.org/doc/GO.references'},
82: 'url': 'ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping_selected.tab.gz'
dipper/sources/GeneReviews.py
55: 'url': GRDL + '/NBKid_shortname_OMIM.txt'},
57: 'url': GRDL + '/GRtitle_shortname_NBKid.txt'}
248: 'url': ''}
dipper/sources/GWASCatalog.py
53: 'url': 'ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-associations_ontology-annotated.tsv'},
56: 'url': 'http://www.ebi.ac.uk/efo/efo.owl'},
59: 'url': 'http://purl.obolibrary.org/obo/so.owl'}
dipper/sources/HGNC.py
28: 'url': 'ftp://ftp.ebi.ac.uk/pub/databases/genenames/new/tsv/hgnc_complete_set.txt'},
dipper/sources/HPOAnnotations.py
56: 'url': HPOADL + '/phenotype_annotation.tab'},
59: 'url': HPOADL + '/data_version.txt'},
62: # 'url': HPOADL + '/negative_phenotype_annotation.tab'},
65: 'url': 'https://raw.githubusercontent.com/monarch-initiative/human-disease-ontology/master/src/ontology/doid-edit.owl'
dipper/sources/IMPC.py
65: # 'url': IMPCDL + '/IMPC_genotype_phenotype.csv.gz'},
68: # 'url': IMPCDL + '/EuroPhenome_genotype_phenotype.csv.gz'},
71: # 'url': IMPCDL + '/MGP_genotype_phenotype.csv.gz'},
74: # 'url': IMPCDL + '/3I_genotype_phenotype.csv.gz'},
77: 'url': IMPCDL + '/ALL_genotype_phenotype.csv.gz'},
80: 'url': IMPCDL + '/checksum.md5'},
dipper/sources/KEGG.py
24: 'url': 'http://rest.genome.jp/list/disease'},
27: 'url': 'http://rest.genome.jp/list/pathway'},
30: 'url': 'http://rest.genome.jp/list/hsa'},
33: 'url': 'http://rest.genome.jp/list/orthology'},
36: 'url': 'http://rest.kegg.jp/link/disease/hsa'},
39: 'url': 'http://rest.genome.jp/link/disease/omim'},
42: 'url': 'http://rest.genome.jp/link/omim/hsa'},
45: 'url': 'http://rest.genome.jp/conv/ncbi-geneid/hsa'},
48: 'url': 'http://rest.kegg.jp/link/pathway/hsa'},
51: 'url': 'http://rest.kegg.jp/link/orthology/hsa'},
54: 'url': 'http://rest.kegg.jp/link/orthology/mmu'},
57: 'url': 'http://rest.kegg.jp/link/orthology/rno'},
60: 'url': 'http://rest.kegg.jp/link/orthology/dme'},
63: 'url': 'http://rest.kegg.jp/link/orthology/dre'},
66: 'url': 'http://rest.kegg.jp/link/orthology/cel'},
69: 'url': 'http://rest.kegg.jp/link/pathway/pubmed'},
72: 'url': 'http://rest.kegg.jp/link/pathway/ds'},
75: # 'url': 'http://rest.kegg.jp/link/pathway/pathway'},
78: 'url': 'http://rest.kegg.jp/link/pathway/ko'},
dipper/sources/MMRRC.py
44: 'url': 'https://www.mmrrc.org/about/mmrrc_catalog_data.csv'},
dipper/sources/Monochrom.py
74: 'url': MCDL + '/hg19/database/cytoBand.txt.gz',
80: 'url': MCDL + '/mm10/database/cytoBandIdeo.txt.gz',
88: 'url': MCDL + '/danRer10/database/cytoBandIdeo.txt.gz',
94: 'url': MCDL + '/rn6/database/cytoBandIdeo.txt.gz',
100: 'url': MCDL + '/bosTau7/database/cytoBandIdeo.txt.gz',
106: 'url': MCDL + '/galGal4/database/cytoBandIdeo.txt.gz',
112: 'url': MCDL + '/susScr3/database/cytoBandIdeo.txt.gz',
118: 'url': MCDL + '/oviAri3/database/cytoBandIdeo.txt.gz',
124: 'url': MCDL + '/equCab2/database/cytoBandIdeo.txt.gz',
dipper/sources/MPD.py
48: 'url': mdpdl+'/ontology_mappings.csv'},
51: 'url': mdpdl+'/straininfo.csv'},
54: 'url': mdpdl+'/measurements.csv'},
57: 'url': mdpdl+'/strainmeans.csv.gz'},
60: # 'url': mdpdl+'/mpd_datasets_metadata.xml.gz'},
dipper/sources/NCBIGene.py
54: 'url': 'http://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz'
58: 'url': 'http://ftp.ncbi.nih.gov/gene/DATA/gene_history.gz'
62: 'url': 'http://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz'
66: 'url': 'http://ftp.ncbi.nih.gov/gene/DATA/gene_group.gz'}
dipper/sources/OMIA.py
57: 'url': 'http://omia.angis.org.au/dumps/omia.xml.gz'},
dipper/sources/OMIM.py
58: 'url': 'http://omim.org/static/omim/data/mim2gene.txt'},
61: 'url': OMIMFTP + '/morbidmap.txt'},
64: 'url': 'http://www.omim.org/phenotypicSeriesTitle/all?format=tsv',
dipper/sources/Orphanet.py
27: 'url': 'http://www.orphadata.org/data/xml/en_product6.xml'}
dipper/sources/Panther.py
47: 'url': PNTHDL+'/RefGenomeOrthologs.tar.gz'},
50: 'url': PNTHDL+'/Orthologs_HCOP.tar.gz'}
dipper/sources/Reactome.py
22: 'url': PANTHER_BASE+'Ensembl2Reactome.txt'},
dipper/sources/UCSCBands.py
68: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/hg19/database/cytoBand.txt.gz',
74: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/mm10/database/cytoBandIdeo.txt.gz',
82: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/danRer10/database/cytoBandIdeo.txt.gz',
88: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/bosTau7/database/cytoBandIdeo.txt.gz',
94: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/galGal4/database/cytoBandIdeo.txt.gz',
100: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/susScr3/database/cytoBandIdeo.txt.gz',
106: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/oviAri3/database/cytoBandIdeo.txt.gz',
112: 'url': 'http://hgdownload.cse.ucsc.edu/goldenPath/equCab2/database/cytoBandIdeo.txt.gz',
dipper/sources/WormBase.py
53: 'url': wbprod + species +
57: # 'url': wbdev + species +
61: 'url': wbprod + '/ONTOLOGY/phenotype_association.WSNUMBER.wb'},
64: 'url': wbprod + '/ONTOLOGY/rnai_phenotypes.WSNUMBER.wb'},
67: 'url': 'http://tazendra.caltech.edu/~azurebrd/cgi-bin/forms/generic.cgi?action=WpaXref'},
70: 'url': wbprod + species +
74: 'url': 'ftp://ftp.sanger.ac.uk/pub/wormbase/releases/WSNUMBER/ONTOLOGY/disease_association.WSNUMBER.wb'},
80: # 'url': wbdev+'/ONTOLOGY/anatomy_association.WS249.wb'},
83: # 'url': wbdev + species +
87: # 'url': wbdev + species +
91: 'url': wbprod + species +
95: # 'url': wbprod + '/letter.WSNUMBER'},
98: 'url': wbprod + '/CHECKSUMS'}
dipper/sources/ZFIN.py
66: 'url': ZFDL + '/genotype_features.txt'},
69: 'url': ZFDL + '/phenotype_fish.txt'},
72: 'url': ZFDL + '/zfinpubs.txt'},
75: 'url': 'http://compbio.charite.de/hudson/job/zp-owl-new/lastSuccessfulBuild/artifact/zp.annot_sourceinfo'},
78: 'url': ZFDL + '/Morpholinos.txt'},
80: # 'url': ZFDL + '/pheno_environment.txt'},
83: 'url': ZFDL+'/pheno_environment_fish.txt'},
86: 'url': 'http://zfin.org/Downloads/stage_ontology.txt'},
89: # 'url': ZFDL + '/wildtype-expression.txt'},
92: 'url': ZFDL + '/mappings.txt'},
95: 'url': ZFDL + '/genotype_backgrounds.txt'},
98: 'url': ZFDL + '/genbank.txt'},
101: 'url': ZFDL + '/uniprot.txt'},
104: 'url': 'http://zfin.org/downloads/gene.txt'},
107: # 'url': ZFDL+'/wildtypes.txt'},
110: 'url': ZFDL + '/wildtypes_fish.txt'},
113: 'url': ZFDL + '/human_orthos.txt'},
116: 'url': ZFDL + '/features.txt'},
119: 'url': ZFDL + '/features-affected-genes.txt'},
122: 'url': ZFDL+'/gene_marker_relationship.txt'},
125: 'url': ZFDL + '/CRISPR.txt'},
128: 'url': ZFDL + '/TALEN.txt'},
131: 'url': ZFDL + '/pub_to_pubmed_id_translation.txt'},
134: 'url': ZFDL + '/E_zfin_gene_alias.gff3'
138: 'url': ZFDL + '/fish_model_disease.txt'
142: 'url': ZFDL + '/fish_components_fish.txt'
dipper/sources/ZFINSlim.py
21: 'url': 'https://zfin.org/downloads/phenoGeneCleanData_fish.txt'
25: 'url': 'http://compbio.charite.de/hudson/job/zp-owl-new/lastSuccessfulBuild/artifact/zp.annot_sourceinfo'
One would still need to look in the code for the constant's definition when the url is shortened.
n.b. this can not guarantee there are not other import vectors. (I do not know that there are)
another way is to concatenate all the foo_dataset.ttl files from https://data.monarchinitiative.org/ttl/
and query for ?s dcat:accessURL ?url
Can we do this now? Are the dataset metadata files loaded into SciGraph?
@cmungall didn't know we were pulling this: 'http://compbio.charite.de/hudson/job/zp-owl-new/lastSuccessfulBuild/artifact/zp.annot_sourceinfo'
I didn't build this in a almost year... good that I know you are pulling this
thanks a lot @TomConlin
I would really like to have a way to explore which files (url) are actually put into dipper.