KitwareMedical / dicom-anonymizer

Tool to anonymize DICOM files according to the DICOM standard
BSD 3-Clause "New" or "Revised" License
104 stars 47 forks source link

Update tag profile #60

Closed mkzia closed 10 months ago

mkzia commented 1 year ago

The following code was used to automatically generate the tag profiles from the Table E.1-1:

from collections import defaultdict
import requests

from bs4 import BeautifulSoup

URL = "https://dicom.nema.org/medical/dicom/current/output/chtml/part15/chapter_e.html"
page = requests.get(URL)
soup = BeautifulSoup(page.content, "html.parser")

headers = [th.text for th in soup.find(attrs={'id': 'table_E.1-1'}).parent.find('table').find('thead').find_all('strong')]
data = []

for tr in soup.find(attrs={'id': 'table_E.1-1'}).parent.find('table').find('tbody').find_all('tr'):
    tmp = {key: value.text.strip() for key, value in dict(zip(headers, tr.find_all('td'))).items() if key in ['Attribute Name', 'Tag', 'Basic Prof.']}
    tmp2 = (tmp.get('Tag'), tmp.get('Attribute Name'), tmp.get('Basic Prof.'))
    data.append(tmp2)

data = sorted(data, key=lambda ele: (ele[2], ele[1]))

profiles = defaultdict(list)
fields_to_skip = {
    'Private Attributes',
}
for tag, name, profile in data:
    if name in fields_to_skip:
        continue

    if name == 'Curve Data':
        new_tag = '(0x5000, 0x0000, 0xFF00, 0x0000)'
    elif name == 'Overlay Comments':
        new_tag = '(0x6000, 0x4000, 0xFF00, 0xFFFF)'
    elif name == 'Overlay Data':
        new_tag = '(0x6000, 0x3000, 0xFF00, 0xFFFF)'
    else:
        new_tag = list(tag)
        new_tag.insert(6, '0x')
        new_tag.insert(6, ' ')
        new_tag.insert(1, '0x')
        new_tag = ''.join(new_tag)

    name = name.replace('\u200b', '').replace('\n', '')
    string = f'{new_tag}, # {name}'
    profiles[profile].append(string)

str_template = """# Tags anonymized in DICOM standard
# Documentation for groups meaning can be found in default associated actions.
# https://dicom.nema.org/medical/dicom/current/output/chtml/part15/chapter_e.html

"""

for tag, tag_list, comment in (
    ('D', 'D_TAGS', '# Replaced tags'),
    ('Z', 'Z_TAGS', "# Replaced with empty values (0, '', ...)"),
    ('X', 'X_TAGS', '# Deleted tags'),
    ('U', 'U_TAGS', '# Replace UID'),

    ('Z/D', 'Z_D_TAGS', '# Replace element according to the VR'),
    ('X/Z', 'X_Z_TAGS', '# Set the value to empty according to the VR'),
    ('X/D', 'X_D_TAGS', "# Replace element according to the VR"),

    ('X/Z/D', 'X_Z_D_TAGS', '# Replace element according to the VR'),
    ('X/Z/U*', 'X_Z_U_STAR_TAGS',
     '# Replace element with UI as VR, else replace according to VR with empty values'),
):
    str_template += f'{comment}\n{tag_list} = [\n'
    for profile in profiles.get(tag):
        str_template += f'    {profile}\n'

    str_template += ']\n\n'

str_template += """# Contains all previous tags into one array
ALL_TAGS = []
ALL_TAGS.extend(D_TAGS)
ALL_TAGS.extend(Z_TAGS)
ALL_TAGS.extend(X_TAGS)
ALL_TAGS.extend(U_TAGS)
ALL_TAGS.extend(Z_D_TAGS)
ALL_TAGS.extend(X_Z_TAGS)
ALL_TAGS.extend(X_D_TAGS)
ALL_TAGS.extend(X_Z_D_TAGS)
ALL_TAGS.extend(X_Z_U_STAR_TAGS)
"""

with open('dicomfields.py', 'w') as file:
    file.write(str_template)
smjoshiatglobus commented 1 year ago

Nice! How about adding the above code to the repository as a new utility script?

pchoisel commented 10 months ago

Hi @mkzia,

Thank you for your contribution. Would you mind if I added your script in the repository ?

mkzia commented 10 months ago

Hi @mkzia,

Thank you for your contribution. Would you mind if I added your script in the repository ?

I will not mind at all. Please go ahead and add the code. Thanks!