The get_files function does many different things, split it into smaller ones.
def get_files(
self, bundle: str = None, tags: List[str] = None, version: int = None
) -> Query:
"""Fetches files from the store based on the specified filters.
Args:
bundle (str, optional): Name of the bundle to fetch files from.
tags (List[str], optional): List of tags to filter files by.
version (int, optional): ID of the version to fetch files from.
Returns:
Query: A query that match the specified filters.
"""
query = self._get_file_query()
if bundle:
LOG.info(f"Fetching files from bundle {bundle}")
query = apply_bundle_filter(
bundles=query.join(self.File.version, self.Version.bundle),
filter_functions=[BundleFilters.FILTER_BY_NAME],
bundle_name=bundle,
)
if tags:
formatted_tags = ",".join(tags)
LOG.info(f"Fetching files with tags in [{formatted_tags}]")
query = apply_file_tag_filter(
files_tags=query.join(File.tags),
filter_functions=[FileTagFilter.FILTER_FILES_BY_TAGS],
tags=tags,
)
if version:
LOG.info(f"Fetching files from version {version}")
query = apply_version_filter(
versions=query.join(self.File.version),
filter_functions=[VersionFilters.FILTER_BY_ID],
version_id=version,
)
return query
Related to https://github.com/Clinical-Genomics/housekeeper/issues/105
The get_files function does many different things, split it into smaller ones.