dotimplement / HealthChain

Simplify testing and validating AI and NLP applications in a healthcare context 💫 🏥
https://dotimplement.github.io/HealthChain/
Apache License 2.0
25 stars 15 forks source link

Use Jinja templates for CDA parser #88

Open jenniferjiangkells opened 1 month ago

jenniferjiangkells commented 1 month ago

Description

CDA sections are currently hard-coded dictionaries inside functions, we could look into using Jinja to load templates in - would make it more configurable too.

Possible Implementation

from jinja2 import Environment, FileSystemLoader
import xmltodict
from pydantic import BaseModel
from typing import List, Dict

class ProblemConcept(BaseModel):
    code: str
    display_name: str
    # other fields...

class CdaAnnotator:
    def __init__(self, template_dir: str):
        self.env = Environment(loader=FileSystemLoader(template_dir))
        self.template = self.env.get_template('cda_template.xml')
        self.data = {
            'problems': [],
            'medications': [],
            'allergies': []
        }

    @classmethod
    def from_xml(cls, xml_string: str, template_dir: str):
        annotator = cls(template_dir)
        parsed_data = xmltodict.parse(xml_string)
        # Use Pydantic to validate and convert parsed data
        annotator.data = ClinicalDocument(**parsed_data['ClinicalDocument'])
        return annotator

    def add_problem(self, problem: ProblemConcept):
        # Pydantic model ensures data validity
        self.data['problems'].append(problem.model_dump())

    def export(self) -> str:
        # Render the template with the current data
        rendered_xml = self.template.render(self.data)
        # Parse the rendered XML to ensure it's valid
        parsed_xml = xmltodict.parse(rendered_xml)
        # Validate the entire structure using Pydantic if needed
        ClinicalDocument(**parsed_xml['ClinicalDocument'])
        # Convert back to XML string
        return xmltodict.unparse(parsed_xml, pretty=True)

# Usage
annotator = CdaAnnotator('path/to/templates')
problem = ProblemConcept(code='123', display_name='Hypertension')
annotator.add_problem(problem)
xml_output = annotator.export()