zazuko / rdf-validate-shacl

Validate RDF data purely in JavaScript. An implementation of the W3C SHACL specification on top of the RDFJS stack.
MIT License
95 stars 12 forks source link

Error with your playground example: MaxCountConstraintComponent #118

Closed benjaminaaron closed 8 months ago

benjaminaaron commented 8 months ago

I am getting Error: Cannot find validator for constraint component http://www.w3.org/ns/shacl#MaxCountConstraintComponent when running your playground example:

Screenshot 2023-10-26 at 11 24 58

This is my script.js:

const fs = require("fs")

async function importDependencies() {
    const [{ default: rdfExt }, { default: ParserN3 }, { default: SHACLValidator }] = await Promise.all([
        import("rdf-ext"),
        import("@rdfjs/parser-n3"),
        import("rdf-validate-shacl")
    ])
    return { factory: rdfExt, ParserN3, SHACLValidator }
}

async function loadDataset(filePath) {
    const { factory, ParserN3 } = await importDependencies()
    const stream = fs.createReadStream(filePath)
    const parser = new ParserN3()
    return factory.dataset().import(parser.import(stream))
}

async function run() {
    const { factory, SHACLValidator } = await importDependencies()
    const shapes = await loadDataset("shapes.ttl")
    const data = await loadDataset("data.ttl")

    const validator = new SHACLValidator(shapes, { factory })
    const report = await validator.validate(data)

    console.log(report)
}

run()

And this is my package.json:

{
  "dependencies": {
    "@rdfjs/parser-n3": "^2.0.1",
    "rdf-ext": "^2.4.0",
    "rdf-validate-shacl": "^0.5.1"
  }
}

I run it like this: node script.js.

👉 However, if I rewrite the SHACL shapes to not include blank nodes, it works:

@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <http://schema.org/> .

schema:PersonNamePropertyShape a sh:PropertyShape ;
    sh:path schema:name ;
    sh:minCount 1 ;
    sh:maxCount 1 ;
.

schema:PersonAgePropertyShape a sh:PropertyShape ;
    sh:path schema:age ;
    sh:minCount 1 ;
    sh:minInclusive 18 ;
.

schema:Person
    a rdfs:Class, sh:NodeShape ;
    sh:property schema:PersonNamePropertyShape, schema:PersonAgePropertyShape ;
.

That makes me think that you might have an internal bug regarding blank nodes? Or am I doing something wrong? 🤔

benjaminaaron commented 8 months ago

One additional observation. If I reduce the conditions from 2 to 1 in the example it works ([] instead of [],[]). So maybe the problem has to do with blank node in combination with multiple conditions?

tpluscode commented 8 months ago

Thank you for getting in touch about this. I was able to replicate your issue

You were right about blank nodes. Internally, the shapes graph is combined with the SHACL triples themselves and this is where blank nodes get mixed up.

It is a common problem with RDF/JS stack, which has no easy solution. The easiest possible fix of your snippet would be to ensure the parsing happens using the same factory you later pass to the SHACL Engine

async function loadDataset(filePath) {
    const { factory, ParserN3 } = await importDependencies()
    const stream = fs.createReadStream(filePath)
+   const parser = new ParserN3()
+   const parser = new ParserN3({ factory })
    return factory.dataset().import(parser.import(stream))
}

Otherwise, I might suggest to simplify this code significantly by using the RDF Environment. You could keep rdf-ext, or try @zazuko/env-node, which comes with some extra bells and whistles.

import SHACLValidator from 'rdf-validate-shacl'
import factory from '@zazuko/env-node'

async function run() {
    // use factory to parse
    const shapes = await factory.dataset().import(factory.fromFile("shapes.ttl"))
    const data = await factory.dataset().import(factory.fromFile("data.ttl"))

    console.log(await shapes.serialize({ format: 'text/n3' }))

    const validator = new SHACLValidator(shapes, { factory })
    const report = await validator.validate(data)

    console.log(await report.dataset.serialize({ format: 'text/n3' }))
}

run()

factory.fromFile and dataset.serialize are additions only available in @zazuko/env-node. Else, the interface closely resembles rdf-ext

benjaminaaron commented 8 months ago

Great, thanks a lot @tpluscode! Your suggestion indeed makes it work 👍 I will also try your simplified approach, that looks much more compact 🤩