Open jbusecke opened 6 months ago
An example (requires installing the pangeo-forge-esgf PR branch):
from pangeo_forge_esgf.async_client import ESGFAsyncClient;import asyncio
import intake
def zstore_to_iid(zstore: str):
# this is a bit whacky to account for the different way of storing old/new stores
iid = '.'.join(zstore.replace('gs://','').replace('.zarr','').replace('.','/').split('/')[-11:-1])
if not iid.startswith('CMIP6'):
iid = '.'.join(zstore.replace('gs://','').replace('.zarr','').replace('.','/').split('/')[-10:])
return iid
def search_iids(col_url:str):
col = intake.open_esm_datastore(col_url)
iids_all= [zstore_to_iid(z) for z in col.df['zstore'].tolist()]
return [iid for iid in iids_all if iid in iids_requested]
async with ESGFAsyncClient() as client:
iids_requested = await client.expand_iids(["CMIP6.*.*.*.[historical, ssp245].*.[Amon,mon].tas.*.*"])
url_dict = {
'qc':"https://storage.googleapis.com/cmip6/cmip6-pgf-ingestion-test/catalog/catalog.json",
'non-qc':"https://storage.googleapis.com/cmip6/cmip6-pgf-ingestion-test/catalog/catalog_noqc.json",
'retracted':"https://storage.googleapis.com/cmip6/cmip6-pgf-ingestion-test/catalog/catalog_retracted.json"
}
iids_found = []
for catalog,url in url_dict.items():
iids = search_iids(url)
iids_found.extend(iids)
print(f"Found in {catalog=}: {iids=}\n")
missing_iids = list(set(iids_requested) - set(iids_found))
print(f"\n\nStill missing {len(missing_iids)} of {len(iids_requested)}: \n{missing_iids=}")
Waiting on https://github.com/jbusecke/pangeo-forge-esgf/pulls
Once that is merged, I want to update the instructions in a few places:
All of these should support passing wildcard/square bracket strings.