Closed jkiddo closed 9 months ago
Hi @jkiddo, thanks for creating this issue!
Which artifact are you using? If it is the library runtime, we can probably shade it in if there is a conflict.
HAPI FHIR 6.10.x (transitively used by Pathling v6.4.2) uses Jackson 2.15.3, but the underlying Scala module from Pathling ain't pleased about that version as sticking to that version causes the following error:
Caused by: com.fasterxml.jackson.databind.JsonMappingException: Scala module 2.15.3 requires Jackson Databind version >= 2.15.0 and < 2.16.0 - Found jackson-databind version 2.13.4
com.fasterxml.jackson.core.StreamReadConstraints
was introduced in Jackson core in 2.15 which is then used in HAPI.
I'm using the encoders
and library-api
from Pathling.
I think it might just be a case of switching to using the library-runtime artifact instead of the library-api
.
The library runtime excludes and relocates certain dependencies to make it play nice when included in environments where there are other ambient and overlapping dependencies, e.g. Spark clusters and other Java apps.
I'll update the documentation to reflect this advice.
I got a simple app working along the lines of what you are describing:
package org.example;
import au.csiro.pathling.config.HttpClientCachingConfiguration;
import au.csiro.pathling.config.TerminologyConfiguration;
import au.csiro.pathling.encoders.FhirEncoders;
import au.csiro.pathling.library.PathlingContext;
import au.csiro.pathling.library.io.source.NdjsonSource;
import ca.uhn.fhir.context.FhirContext;
import ca.uhn.fhir.validation.FhirValidator;
import ca.uhn.fhir.validation.IValidatorModule;
import ca.uhn.fhir.validation.SingleValidationMessage;
import ca.uhn.fhir.validation.ValidationResult;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.hl7.fhir.common.hapi.validation.validator.FhirInstanceValidator;
import org.hl7.fhir.instance.model.api.IBaseResource;
import org.hl7.fhir.r4.model.Enumerations.ResourceType;
public class App {
public static void main(final String[] args) {
final SparkSession spark = SparkSession.builder().master("local[*]").getOrCreate();
final HttpClientCachingConfiguration cacheConfig = HttpClientCachingConfiguration.builder()
.overrideExpiry(2628000)
.build();
final TerminologyConfiguration terminologyConfig = TerminologyConfiguration.builder()
.serverUrl("https://r4.ontoserver.csiro.au/fhir")
.cache(cacheConfig)
.build();
final PathlingContext pc = PathlingContext.create(spark, terminologyConfig);
final NdjsonSource data = pc.read()
.ndjson("/Users/gri306/Library/CloudStorage/OneDrive-CSIRO/Data/synthea/1k_20190829/fhir");
final Dataset<Row> result = data.extract(ResourceType.PATIENT)
.column("name.first().given.first()", "Given name")
.column("name.first().family", "Family name")
.column("telecom.where(system = 'phone').value", "Phone number")
.column(
"reverseResolve(Condition.subject).exists(code.subsumedBy(http://snomed.info/sct|56265001))",
"Heart disease")
.filter(
"(reverseResolve(Immunization.patient).vaccineCode.memberOf('https://aehrc.csiro.au/fhir/ValueSet/covid-19-vaccines') contains true).not()")
.execute();
result.show(10, false);
final FhirEncoders encoders = FhirEncoders.forR4().getOrCreate();
final IBaseResource patient = data.read(ResourceType.PATIENT).as(encoders.of("Patient"))
.collectAsList().get(0);
final FhirContext context = encoders.getContext();
final FhirValidator validator = context.newValidator();
final IValidatorModule module = new FhirInstanceValidator(context);
validator.registerValidatorModule(module);
final ValidationResult validationResult = validator.validateWithResult(patient);
for (final SingleValidationMessage next : validationResult.getMessages()) {
System.out.println(next.getLocationString() + " " + next.getMessage());
}
}
}
And here is the pom.xml
:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>pathling-java-app</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>pathling-java-app</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<spark.version>3.4.2</spark.version>
<hadoop.version>3.3.4</hadoop.version>
<pathling.version>6.4.2</pathling.version>
<hapi.version>6.10.0</hapi.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-catalyst_2.12</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>au.csiro.pathling</groupId>
<artifactId>library-runtime</artifactId>
<version>${pathling.version}</version>
</dependency>
<dependency>
<groupId>ca.uhn.hapi.fhir</groupId>
<artifactId>hapi-fhir-base</artifactId>
<version>${hapi.version}</version>
</dependency>
<dependency>
<groupId>ca.uhn.hapi.fhir</groupId>
<artifactId>hapi-fhir-structures-r4</artifactId>
<version>${hapi.version}</version>
</dependency>
<dependency>
<groupId>ca.uhn.hapi.fhir</groupId>
<artifactId>hapi-fhir-validation</artifactId>
<version>${hapi.version}</version>
</dependency>
<dependency>
<groupId>ca.uhn.hapi.fhir</groupId>
<artifactId>hapi-fhir-validation-resources-r4</artifactId>
<version>${hapi.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
</plugins>
</build>
</project>
Give it a try and let me know how you go.
I found another way. Instead of using the library-runtime
and stick to the library-api
(thereby not having to 'restate' the spark dependencies which transitively comes along when using the library-api
) and then instead control the jackson version (<jackson-bom.version>2.15.3</jackson-bom.version>
) I got it to work (add to the mix, its also a Spring Boot application) with the following below. This will work as long as HAPI FHIR is within the version range >= 2.15.0 and < 2.16.0
which for my case is OK for now.
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.trifok</groupId>
<artifactId>nd2parquet</artifactId>
<version>1.0-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.7.18</version>
</parent>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<jackson-bom.version>2.15.3</jackson-bom.version>
</properties>
<build>
<finalName>nd2parquet</finalName>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>au.csiro.pathling</groupId>
<artifactId>encoders</artifactId>
<version>6.4.2</version>
</dependency>
<dependency>
<groupId>au.csiro.pathling</groupId>
<artifactId>library-api</artifactId>
<version>6.4.2</version>
</dependency>
</dependencies>
</project>
Good to hear that you got it working!
The FHIR server implementation within Pathling is also a Spring Boot + HAPI + Spark application, so its POM (+ the parent POM) might provide an example if you require any further guidance.
https://github.com/aehrc/pathling/blob/main/fhir-server/pom.xml
I have a project where I use pathling (v6.4.2) as well as parsing up some FHIR ndjson files (using the
FhirContext.forR4().newNDJsonParser().parseResource
for doing some integrity checks). It does create some issues as there are conflicting dependencies between the use ofjackson
between HAPI FHIR deps and pathing:Is it possible to get the dependencies of jackson bumped to a newer version in pathling ?