StrongResearch / dimble

Nimble Digital Imaging IO for Medicine
BSD 3-Clause "New" or "Revised" License
7 stars 2 forks source link

Support for Sequence (SQ) VRs #2

Closed StrongChris closed 1 year ago

StrongChris commented 1 year ago

Currently elements with the SQ VR are ignored by dimble. Ideally they are fully supported.

sluijs commented 1 year ago

SQs are essential for all IODs, but especially for enhanced IODs. Converting "classic" DICOMs into enhanced formats and back is something I've looked into for Voxel as well. In traditional CTs/MRs there's so much duplicated data saved, because each slice is stored separately.

Here's a simple snippet that I used a while back to deduplicate top-level tags:

def _compress_value(value: List):
    val = np.array([json.dumps(v) for v in value], dtype="object")

    if np.all(val == val[0]):
        return [value[0]]

    return value

def compress_headers(headers: List[dict]):
    """Top-level header compression: a compression algorithm for DICOM+JSON headers.
    A utility function to compress top-level attributes from a list of DICOM+JSON headers. Only top-
    level attributes are compressed, as sequences are arrays with undetermined order in JSON.
    NB: attributes without "Value" properties are not preserved (eg, empty items, inlineBinary),
    making this a lossy compression algorithm.
    Args:
        headers (List[Dict]): Headers in DICOM+JSON format.
    Returns:
        Dict: a compressed representation of a list of DICOM+JSON headers.
    """
    out = _concat_headers(headers)

    for key in out:
        out[key]["Value"] = _compress_value(out[key]["Value"])

    out["__compressor__"] = "tlc"
    out["__len__"] = len(headers)

    return out

def _decompress_value(value: List, repeats: int):
    if len(value) == 1:
        return value * repeats

    return value

def decompress_header(compressed_header: Dict) -> List[Dict]:
    """Decompress a top-level compressed header into a list of DICOM+JSON headers.
    Args:
        compressed_header (Dict): top-level compressed header.
    Returns:
        headers (Lists[Dict]): Headers in DICOM+JSON format.
    """

    # create empty headers
    n_headers = compressed_header.get("__len__")
    headers = [{} for _ in range(n_headers)]

    for key in compressed_header:
        if key in ["__len__", "__compressor__"]:
            continue

        # essential elements
        vr = compressed_header.get(key).get("vr")
        value = compressed_header.get(key).get("Value")

        for i in range(n_headers):
            idx = 0 if len(value) == 1 else i
            headers[i][key] = { "vr": vr, "Value": value[idx]}

    return headers

class DatasetProxy:
    """Performant partial implementation of the pydicom.FileDataset metadata interface.
    This class is a wrapper around a DICOM+JSON dict that acts as pydicom.FileDataset. It supports
    accessing attributes by DICOM keywords.
    Example:
    >>> DatasetProxy(dicom_json_header).RescaleIntercept
    Args:
        header (Dict): Header in DICOM+JSON format.
    """

    def __init__(self, header: Dict):
        super().__init__()

        self._dict: MutableMapping[str, Dict] = header

    def _format_json_tag(self, keyword: str) -> str:
        """Convert a DICOM tag's keyword to its corresponding hex value."""

        tag = tag_for_keyword(keyword)
        if tag is None:
            raise AttributeError(f"Keyword `{keyword} was not found in the data dictionary.")

        json_tag = hex(tag)[2:].zfill(8).upper()
        if not json_tag in self._dict:
            raise AttributeError(f"Tag `{json_tag}` was not found in this header.")

        return json_tag

    def _set_json_tag(self, keyword: str, value):
        """Set the value of a DICOM+JSON tag."""

        json_tag = self._format_json_tag(keyword)

        vr = dictionary_VR(keyword)
        value = value if isinstance(value, list) else [value]
        self._dict[json_tag] = { "vr": vr, "Value": value }

    def __contains__(self, keyword: str):
        try:
            _ = self._format_json_tag(keyword)
            return True
        except AttributeError:
            return False

    def __getattr__(self, keyword: str):
        json_tag = self._format_json_tag(keyword)
        value = self._dict[json_tag]['Value']

        if isinstance(value, list):
            if len(value) == 1:
                return value[0]

        return value

    def __getitem__(self, keyword: str):
        try:
            return self.__getattr__(keyword)
        except AttributeError:
            raise KeyError

    def __delitem__(self, keyword: str):
        try:
            json_tag = self._format_json_tag(keyword)
            del self._dict[json_tag]

        except AttributeError:
            raise KeyError

    def __iter__(self):
        return iter(self._dict)

    def __len__(self) -> int:
        return len(self._dict)

    def __setitem__(self, keyword: str, value):
        self._set_json_tag(keyword, value)

    def __array__(self):
        return np.asarray(self._dict)

    def get(self, keyword: str, value: Optional[Any] = None):
        try:
            return self.__getitem__(keyword)
        except KeyError:
            return value

    def __repr__(self):
        return f"{self.__class__.__name__}(elements={len(self)})"
StrongChris commented 1 year ago

Thanks for the Input! This will be a useful reference for when we get to that

StrongChris commented 1 year ago

Sequence types are now supported