Open Zethson opened 3 years ago
java -jar synthea-with-dependencies.jar -p 5 -c synthea.properties
⋊> ~/Desktop cat synthea.properties
exporter.ccda.export = false
exporter.fhir.export = true
exporter.csv.export = true
generates csv files.
This could also be a cool option: https://github.com/ratschlab/HIRID-ICU-Benchmark/blob/master/icu_benchmarks/synthetic_data/generate_simple_fake_data.py
import random
import uuid
from datetime import datetime, timedelta
from faker import Faker
fake = Faker()
# Define some example FHIR resources and their fields
patient_fields = ['id', 'birthDate', 'gender', 'address', 'phone']
observation_fields = ['id', 'status', 'category', 'code', 'subject', 'issued', 'valueQuantity']
condition_fields = ['id', 'clinicalStatus', 'category', 'code', 'subject', 'onsetDateTime']
# Define some example values for FHIR resource fields
gender_values = ['male', 'female', 'other']
category_values = ['vital-signs', 'laboratory', 'imaging', 'procedure']
code_values = ['1000001', '1000002', '1000003', '1000004', '1000005']
clinical_status_values = ['active', 'resolved', 'remission', 'relapse']
def generate_patient():
"""
Generate a fake FHIR Patient resource.
"""
patient = {
'resourceType': 'Patient',
'id': str(uuid.uuid4()),
'birthDate': fake.date_of_birth(),
'gender': random.choice(gender_values),
'address': {
'line': fake.street_address(),
'city': fake.city(),
'state': fake.state_abbr(),
'postalCode': fake.zipcode()
},
'phone': fake.phone_number()
}
return patient
def generate_observation(patient_id):
"""
Generate a fake FHIR Observation resource.
"""
observation = {
'resourceType': 'Observation',
'id': str(uuid.uuid4()),
'status': 'final',
'category': {
'coding': [{
'system': 'http://terminology.hl7.org/CodeSystem/observation-category',
'code': random.choice(category_values)
}]
},
'code': {
'coding': [{
'system': 'http://loinc.org',
'code': random.choice(code_values),
'display': 'Example Code'
}]
},
'subject': {
'reference': f'Patient/{patient_id}'
},
'issued': datetime.now().isoformat(),
'valueQuantity': {
'value': random.randint(1, 100),
'unit': 'mmHg',
'system': 'http://unitsofmeasure.org',
'code': 'mm[Hg]'
}
}
return observation
def generate_condition(patient_id):
"""
Generate a fake FHIR Condition resource.
"""
condition = {
'resourceType': 'Condition',
'id': str(uuid.uuid4()),
'clinicalStatus': random.choice(clinical_status_values),
'category': {
'coding': [{
'system': 'http://terminology.hl7.org/CodeSystem/condition-category',
'code': random.choice(category_values)
}]
},
'code': {
'coding': [{
'system': 'http://snomed.info/sct',
'code': random.choice(code_values),
'display': 'Example Code'
}]
},
'subject': {
'reference': f'Patient/{patient_id}'
},
'onsetDateTime': (datetime.now() - timedelta(days=random.randint(1, 3650))).isoformat()
}
return condition
# Generate some example FHIR data
patient = generate_patient()
observation = generate_observation(patient['id'])
condition = generate_condition(patient['id'])
print(patient)
print(observation)
More complex
import random
import uuid
from datetime import datetime, timedelta
from faker import Faker
from itertools import cycle
fake = Faker()
# Define some example FHIR resources and their fields
patient_fields = ['id', 'birthDate', 'gender', 'address', 'phone']
observation_fields = ['id', 'status', 'category', 'code', 'subject', 'issued', 'valueQuantity']
condition_fields = ['id', 'clinicalStatus', 'category', 'code', 'subject', 'onsetDateTime']
encounter_fields = ['id', 'status', 'class', 'type', 'subject', 'period', 'participant']
medication_request_fields = ['id', 'status', 'medication', 'subject', 'authoredOn', 'dosageInstruction']
medication_dispense_fields = ['id', 'status', 'medicationReference', 'subject', 'whenPrepared', 'dosageInstruction']
procedure_fields = ['id', 'status', 'code', 'subject', 'performedDateTime', 'performer']
# Define some example values for FHIR resource fields
gender_values = ['male', 'female', 'other']
race_values = ['white', 'black', 'asian', 'hispanic', 'other']
ethnicity_values = ['nonhispanic', 'hispanic', 'unknown']
category_values = ['vital-signs', 'laboratory', 'imaging', 'procedure']
code_values = ['1000001', '1000002', '1000003', '1000004', '1000005']
clinical_status_values = ['active', 'resolved', 'remission', 'relapse']
encounter_status_values = ['planned', 'arrived', 'triaged', 'in-progress', 'on-leave', 'finished', 'cancelled']
encounter_class_values = ['ambulatory', 'emergency', 'inpatient', 'outpatient', 'urgentcare']
encounter_type_values = ['office-visit', 'emergency', 'inpatient', 'outpatient', 'urgentcare']
medication_status_values = ['active', 'completed', 'cancelled', 'on-hold', 'stopped', 'draft']
procedure_status_values = ['preparation', 'in-progress', 'not-done', 'on-hold', 'stopped', 'completed', 'entered-in-error', 'unknown']
procedure_code_values = ['1010001', '1010002', '1010003', '1010004', '1010005']
procedure_performer_values = ['primary', 'assistant', 'nurse', 'technician']
# Define some example medications and their dosages
medication_values = {
'atorvastatin': {
'dosage': {
'quantity': 1,
'unit': 'tablet',
'frequency': 1,
'period': 'day'
}
},
'metoprolol': {
'dosage': {
'quantity': 1,
'unit': 'tablet',
'frequency': 2,
'period': 'day'
}
},
'lisinopril': {
'dosage': {
'quantity': 1,
'unit': 'tablet',
'frequency': 1,
'period': 'day'
}
}
}
def generate_patient():
"""
Generate a fake FHIR Patient resource.
"""
patient = {
'resourceType': 'Patient',
'id': str(uuid.uuid4()),
'meta': {
'versionId': '1',
'lastUpdated
},
'text': {
'status': 'generated',
'div': '<div xmlns="http://www.w3.org/1999/xhtml"></div>'
},
'birthDate': fake.date_of_birth().isoformat(),
'gender': random.choice(gender_values),
'address': [{
'use': 'home',
'type': 'postal',
'text': fake.address(),
'line': [fake.street_address()],
'city': fake.city(),
'district': fake.state(),
'postalCode': fake.postcode(),
'country': fake.country()
}],
'phone': [{
'system': 'phone',
'value': fake.phone_number(),
'use': 'home'
}]
}
return patient
def generate_observation(patient_id, category=None, code=None, value=None):
"""
Generate a fake FHIR Observation resource.
"""
if not category:
category = random.choice(category_values)
if not code:
code = random.choice(code_values)
if not value:
value = round(random.uniform(1, 100), 2)
bservation = {
'resourceType': 'Observation',
'id': str(uuid.uuid4()),
'status': 'final',
'category': {
'coding': [{
'system': 'http://terminology.hl7.org/CodeSystem/observation-category',
'code': category,
'display': category.capitalize()
}]
},
'code': {
'coding': [{
'system': 'http://loinc.org',
'code': code,
'display': fake.word()
}]
},
'subject': {
'reference': f'Patient/{patient_id}'
},
'issued': datetime.now().isoformat(),
'valueQuantity': {
'value': value,
'unit': random.choice(['mg/dL', 'mmol/L', 'kg', 'cm', 'mmHg', 'bpm'])
}
}
return observation
def generate_condition(patient_id, category=None, code=None, onset=None):
"""
Generate a fake FHIR Condition resource.
"""
if not category:
category = random.choice(category_values)
if not code:
code = random.choice(code_values)
if not onset:
onset = fake.date_time_between(start_date='-50y', end_date='now').isoformat()
condition = {
'resourceType': 'Condition',
'id': str(uuid.uuid4()),
'clinicalStatus': random.choice(clinical_status_values),
'category': {
'coding': [{
'system': 'http://terminology.hl7.org/CodeSystem/condition-category',
'code': category,
'display': category.capitalize()
}]
},
'code': {
'coding': [{
'system': 'http://snomed.info/sct',
'code': code,
'display': fake.word()
}]
},
'subject': {
'reference': f'Patient/{patient_id}'
},
'onsetDateTime': onset
}
return condition
def generate_encounter(patient_id, type=None, start=None, end=None):
"""
Generate a fake FHIR Encounter resource.
"""
if not type:
type = random.choice(encounter_type_values)
if not start:
start = fake.date_time_between(start_date='-50y', end_date='now').isoformat()
if not end:
end = (datetime.fromisoformat(start) + timedelta(minutes=random.randint(10, 360))).isoformat()
encounter = {
'resourceType': 'Encounter',
'id': str(uuid.uuid4()),
'status': 'finished',
'class': {
'system': 'http://terminology.hl7.org/CodeSystem/v3-ActCode',
'code': 'AMB',
'display': 'ambulatory'
},
'type': [{
'coding': [{
'system': 'http://snomed.info/sct',
'code': type,
'display': type.capitalize()
}]
}],
'subject': {
'reference': f'Patient/{patient_id}'
},
'period': {
'start': start,
'end': end
},
'participant': [{
'individual': {
'reference': f'Patient/{patient_id}'
}
}]
}
return encounter
def generate_medication_request(patient_id, medication=None, status=None, intent=None):
"""
Generate a fake FHIR MedicationRequest resource.
"""
if not medication:
medication = random.choice(medication_values)
if not status:
status = random.choice(status_values)
if not intent:
intent = random.choice(intent_values)
medication_request = {
'resourceType': 'MedicationRequest',
'id': str(uuid.uuid4()),
'status': status,
'intent': intent,
'subject': {
'reference': f'Patient/{patient_id}'
},
'medicationCodeableConcept': {
'coding': [{
'system': 'http://www.nlm.nih.gov/research/umls/rxnorm',
'code': medication,
'display': fake.word()
}]
},
'dosageInstruction': [{
'sequence': 1,
'text': fake.sentence(nb_words=6),
'timing': {
'repeat': {
'frequency': random.randint(1, 3),
'period': random.randint(1, 5),
'periodUnit': 'd'
}
},
'route': {
'coding': [{
'system': 'http://snomed.info/sct',
'code': random.choice(['26643006', '255560000', '254790003']),
'display': random.choice(['Oral', 'Injection', 'Topical'])
}]
}
}]
}
return medication_request
def generate_all_resources(num_patients):
"""
Generate a list of all FHIR resources for the specified number of patients.
"""
patients = []
observations = []
conditions = []
encounters = []
medication_requests = []
for i in range(num_patients):
patient_id = i + 1
patients.append(generate_patient())
for j in range(random.randint(5, 20)):
observations.append(generate_observation(patient_id))
for j in range(random.randint(1, 5)):
conditions.append(generate_condition(patient_id))
for j in range(random.randint(1, 3)):
encounters.append(generate_encounter(patient_id))
for j in range(random.randint(1, 5)):
medication_requests.append(generate_medication_request(patient_id))
resources = patients + observations + conditions + encounters + medication_requests
return resources
import pytest
def test_generate_all_resources():
resources = generate_all_resources(10)
assert len(resources) == 10 * (5 + 1 + 1 + 1 + 1)
for resource in resources:
assert resource['resourceType'] in ['Patient', 'Observation', 'Condition', 'Encounter', 'MedicationRequest']
if resource['resourceType'] == 'Patient':
assert resource.get('birthDate') is not None
assert resource.get('gender') is not None
elif resource['resourceType'] == 'Observation':
assert resource.get('valueQuantity') is not None
assert resource.get('code') is not None
elif resource['resourceType'] == 'Condition':
assert resource.get('code') is not None
assert resource.get('subject') is not None
elif resource['resourceType'] == 'Encounter':
assert resource.get('class') is not None
assert resource.get('subject') is not None
elif resource['resourceType'] == 'MedicationRequest':
assert resource.get('medicationReference') is not None
assert resource.get('subject') is not None
There is a pipeline that first uses synthea to generate CSV files and then a synthea R package to create OMOP files
https://github.com/synthetichealth/synthea
We should try to wrap it in Python (while checking for JDK etc).
Output default csv, but others should be supported as well.
CC #102