Closed hannes-ucsc closed 3 years ago
Index: src/azul/plugins/metadata/hca/contributor_matrices.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/azul/plugins/metadata/hca/contributor_matrices.py b/src/azul/plugins/metadata/hca/contributor_matrices.py
--- a/src/azul/plugins/metadata/hca/contributor_matrices.py (revision 60e49827c76bfd3722555beb3455468d7a55109e)
+++ b/src/azul/plugins/metadata/hca/contributor_matrices.py (revision 2c017c453bf7f310a4c8d4d210583ff105adc9cd)
@@ -18,8 +18,7 @@
)
-def make_stratification_tree(files: Sequence[Mapping[str, str]],
- ) -> JSON:
+def make_stratification_tree(files: Sequence[Mapping[str, str]]) -> JSON:
"""
>>> from azul.doctests import assert_json
>>> def f(files):
@@ -34,7 +33,8 @@
{
"uuid": "u",
"version": "v",
- "name": "n"
+ "name": "n",
+ "url": null
}
]
}
@@ -52,12 +52,14 @@
{
"uuid": "u1",
"version": "v1",
- "name": "n1"
+ "name": "n1",
+ "url": null
},
{
"uuid": "u2",
"version": "v2",
- "name": "n2"
+ "name": "n2",
+ "url": null
}
]
}
@@ -75,14 +77,16 @@
{
"uuid": "u1",
"version": "v1",
- "name": "n1"
+ "name": "n1",
+ "url": null
}
],
"7": [
{
"uuid": "u2",
"version": "v2",
- "name": "n2"
+ "name": "n2",
+ "url": null
}
]
}
@@ -93,7 +97,8 @@
{
"uuid": "u1",
"version": "v1",
- "name": "n1"
+ "name": "n1",
+ "url": null
}
]
}
@@ -104,7 +109,8 @@
{
"uuid": "u2",
"version": "v2",
- "name": "n2"
+ "name": "n2",
+ "url": null
}
]
}
@@ -138,6 +144,7 @@
)))
for stratum in file['strata'].split('\n')
)),
+ "url": None, # request injection by caller
}
for file in files
]
Index: src/azul/service/hca_response_v5.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/azul/service/hca_response_v5.py b/src/azul/service/hca_response_v5.py
--- a/src/azul/service/hca_response_v5.py (revision 60e49827c76bfd3722555beb3455468d7a55109e)
+++ b/src/azul/service/hca_response_v5.py (revision 2c017c453bf7f310a4c8d4d210583ff105adc9cd)
@@ -323,6 +323,7 @@
"size": _file.get("size"),
"uuid": _file.get("uuid"),
"version": _file.get("version"),
+ "url": None # to be injected later in post-processing
}
files.append(translated_file)
return files
Index: src/azul/service/index_query_service.py
IDEA additional info:
Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP
<+>UTF-8
===================================================================
diff --git a/src/azul/service/index_query_service.py b/src/azul/service/index_query_service.py
--- a/src/azul/service/index_query_service.py (revision 60e49827c76bfd3722555beb3455468d7a55109e)
+++ b/src/azul/service/index_query_service.py (revision 2c017c453bf7f310a4c8d4d210583ff105adc9cd)
@@ -3,7 +3,6 @@
)
from typing import (
Optional,
- Union,
)
from elasticsearch_dsl.response import (
@@ -25,10 +24,9 @@
Pagination,
)
from azul.types import (
+ AnyMutableJSON,
JSON,
- JSONs,
MutableJSON,
- MutableJSONs,
)
from azul.uuids import (
validate_uuid,
@@ -63,47 +61,47 @@
:return: The Elasticsearch JSON response
"""
filters = self.parse_filters(filters)
+
if item_id is not None:
validate_uuid(item_id)
filters['entryId'] = {'is': [item_id]}
+
response = self.transform_request(catalog=catalog,
filters=filters,
pagination=pagination,
entity_type=entity_type)
- # FIXME: Generalize file URL injection.
- # https://github.com/DataBiosphere/azul/issues/2545
- if entity_type in ('files', 'bundles'):
- # Inject URLs to data files
- for hit in response['hits']:
- for file in hit['files']:
- file['url'] = file_url_func(catalog=catalog,
- file_uuid=file['uuid'],
- version=file['version'])
- elif entity_type == 'projects':
- # Similarly, inject URLs to matrix files in stratification trees
- def transform(node: Union[JSONs, JSON]) -> Union[MutableJSONs, MutableJSON]:
- if isinstance(node, dict):
- return {k: transform(v) for k, v in node.items()}
- elif isinstance(node, list):
- return [
- {
- 'name': file['name'],
- 'url': file_url_func(catalog=catalog,
- file_uuid=file['uuid'],
- version=file['version'])
- }
- for file in node
- ]
- else:
- assert False
+
+ def inject_file_urls(node: AnyMutableJSON) -> None:
+ if node is None:
+ pass
+ elif isinstance(node, (str, int, float, bool)):
+ pass
+ elif isinstance(node, dict):
+ try:
+ url = node['url']
+ version = node['version']
+ uuid = node['uuid']
+ except KeyError:
+ for child in node.values():
+ inject_file_urls(child)
+ else:
+ if url is None:
+ node['url'] = file_url_func(catalog=catalog,
+ file_uuid=uuid,
+ version=version)
+ elif isinstance(node, list):
+ for child in node:
+ inject_file_urls(child)
+ elif isinstance(node, (str, int, float, bool)):
+ pass
+ else:
+ assert False
- for hit in response['hits']:
- for project in hit['projects']:
- for key in 'matrices', 'contributorMatrices':
- project[key] = transform(project[key])
+ inject_file_urls(response['hits'])
if item_id is not None:
response = one(response['hits'], too_short=EntityNotFoundError(entity_type, item_id))
+
return response
def get_summary(self, catalog: CatalogName, filters):
The above is for inspiration. The crucial aspect of that implementation is that the plugin sets the url
property to None
, serving as a signal for the service to fill in the URL, assuming that the sibling properties uuid
and version
exist. The implementation above didn't pass all tests and was inefficient in that it traversed the entire response, down to going over lists of primitives. We should short-circuit the recursion to make it less expensive.
This is ok but we should try to make this even more generic. I'm thinking of something that can scan the entire response for file references and inject a URL to the file. A file reference could be a dictionary containing both
file_uuid
andfile_version
key. A recursive traversal would solve this new use case and the preexisting one right above this section. The only issue is that the file references for the preexisting use case use theuuid
andversion
keys instead offile_uuid
andfile_version
but I'm sure that can be fixed._Originally posted by @hannes-ucsc in https://github.com/DataBiosphere/azul/pull/2432#discussion_r530748940_