Closed StrongChris closed 1 year ago
SQs are essential for all IODs, but especially for enhanced IODs. Converting "classic" DICOMs into enhanced formats and back is something I've looked into for Voxel as well. In traditional CTs/MRs there's so much duplicated data saved, because each slice is stored separately.
Here's a simple snippet that I used a while back to deduplicate top-level tags:
def _compress_value(value: List):
val = np.array([json.dumps(v) for v in value], dtype="object")
if np.all(val == val[0]):
return [value[0]]
return value
def compress_headers(headers: List[dict]):
"""Top-level header compression: a compression algorithm for DICOM+JSON headers.
A utility function to compress top-level attributes from a list of DICOM+JSON headers. Only top-
level attributes are compressed, as sequences are arrays with undetermined order in JSON.
NB: attributes without "Value" properties are not preserved (eg, empty items, inlineBinary),
making this a lossy compression algorithm.
Args:
headers (List[Dict]): Headers in DICOM+JSON format.
Returns:
Dict: a compressed representation of a list of DICOM+JSON headers.
"""
out = _concat_headers(headers)
for key in out:
out[key]["Value"] = _compress_value(out[key]["Value"])
out["__compressor__"] = "tlc"
out["__len__"] = len(headers)
return out
def _decompress_value(value: List, repeats: int):
if len(value) == 1:
return value * repeats
return value
def decompress_header(compressed_header: Dict) -> List[Dict]:
"""Decompress a top-level compressed header into a list of DICOM+JSON headers.
Args:
compressed_header (Dict): top-level compressed header.
Returns:
headers (Lists[Dict]): Headers in DICOM+JSON format.
"""
# create empty headers
n_headers = compressed_header.get("__len__")
headers = [{} for _ in range(n_headers)]
for key in compressed_header:
if key in ["__len__", "__compressor__"]:
continue
# essential elements
vr = compressed_header.get(key).get("vr")
value = compressed_header.get(key).get("Value")
for i in range(n_headers):
idx = 0 if len(value) == 1 else i
headers[i][key] = { "vr": vr, "Value": value[idx]}
return headers
class DatasetProxy:
"""Performant partial implementation of the pydicom.FileDataset metadata interface.
This class is a wrapper around a DICOM+JSON dict that acts as pydicom.FileDataset. It supports
accessing attributes by DICOM keywords.
Example:
>>> DatasetProxy(dicom_json_header).RescaleIntercept
Args:
header (Dict): Header in DICOM+JSON format.
"""
def __init__(self, header: Dict):
super().__init__()
self._dict: MutableMapping[str, Dict] = header
def _format_json_tag(self, keyword: str) -> str:
"""Convert a DICOM tag's keyword to its corresponding hex value."""
tag = tag_for_keyword(keyword)
if tag is None:
raise AttributeError(f"Keyword `{keyword} was not found in the data dictionary.")
json_tag = hex(tag)[2:].zfill(8).upper()
if not json_tag in self._dict:
raise AttributeError(f"Tag `{json_tag}` was not found in this header.")
return json_tag
def _set_json_tag(self, keyword: str, value):
"""Set the value of a DICOM+JSON tag."""
json_tag = self._format_json_tag(keyword)
vr = dictionary_VR(keyword)
value = value if isinstance(value, list) else [value]
self._dict[json_tag] = { "vr": vr, "Value": value }
def __contains__(self, keyword: str):
try:
_ = self._format_json_tag(keyword)
return True
except AttributeError:
return False
def __getattr__(self, keyword: str):
json_tag = self._format_json_tag(keyword)
value = self._dict[json_tag]['Value']
if isinstance(value, list):
if len(value) == 1:
return value[0]
return value
def __getitem__(self, keyword: str):
try:
return self.__getattr__(keyword)
except AttributeError:
raise KeyError
def __delitem__(self, keyword: str):
try:
json_tag = self._format_json_tag(keyword)
del self._dict[json_tag]
except AttributeError:
raise KeyError
def __iter__(self):
return iter(self._dict)
def __len__(self) -> int:
return len(self._dict)
def __setitem__(self, keyword: str, value):
self._set_json_tag(keyword, value)
def __array__(self):
return np.asarray(self._dict)
def get(self, keyword: str, value: Optional[Any] = None):
try:
return self.__getitem__(keyword)
except KeyError:
return value
def __repr__(self):
return f"{self.__class__.__name__}(elements={len(self)})"
Thanks for the Input! This will be a useful reference for when we get to that
Sequence types are now supported
Currently elements with the SQ VR are ignored by dimble. Ideally they are fully supported.