digitalbazaar / jsonld.js

A JSON-LD Processor and API implementation in JavaScript
https://json-ld.org/
Other
1.66k stars 195 forks source link

Blank nodes defined in RDF are converted as Relative IRI Node #474

Open Yocote0111 opened 2 years ago

Yocote0111 commented 2 years ago
const jsonld = require('jsonld');
const N3 = require('n3');
const { Readable } = require('stream');

N3.Store.prototype.readDocAsync = function (doc, format = 'Turtle') {
    return new Promise( (resolve, reject) => {
        const source = Readable.from(doc)
            .pipe(new N3.StreamParser({ format }));
        this.import(source)
            .on('error', err => reject(err))
            .on('end', () => resolve(this));
    });
}

const rdfDoc = `
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@base <http://example.org> .
<#book1>
    dcterms:title "book 1" ;
    dcterms:description "this is book 1";
    dcterms:hasPart [ # blank node 1
        dcterms:title "section 1"; 
    ], [  # blank node 2
        dcterms:title "section 2";
    ]
    .
`;
// const rdfDoc = `
// <http://example.org#book1> <http://purl.org/dc/terms/description> "this is book 1" .
// <http://example.org#book1> <http://purl.org/dc/terms/hasPart> _:b0 .
// <http://example.org#book1> <http://purl.org/dc/terms/hasPart> _:b1 .
// <http://example.org#book1> <http://purl.org/dc/terms/title> "book 1" .
// _:b0 <http://purl.org/dc/terms/title> "section 1" .
// _:b1 <http://purl.org/dc/terms/title> "section 2" .
// `;

(async () => {
  const dataset = await new N3.Store().readDocAsync(rdfDoc);

  let jsondoc = await jsonld.fromRDF(dataset);
  console.log("<<Encode:RDF2JSONLD>>");
  console.log(JSON.stringify(jsondoc, null, 2); // output1

  let rdfDataset = await jsonld.toRDF(jsondoc, {format: 'application/n-quads'});
  console.log("<<Decode:JSONLD2RDF>>");
  console.log(rdfDataset);
})()

results in

<<Encode:RDF2JSONLD>>
[
  {
    "@id": "http://example.org#book1",
    "http://purl.org/dc/terms/title": [
      {
        "@value": "book 1"
      }
    ],
    "http://purl.org/dc/terms/description": [
      {
        "@value": "this is book 1"
      }
    ],
    "http://purl.org/dc/terms/hasPart": [
      {
        "@id": "n3-0"  // blank node 1 is converted as relative IRI node (not  blank node)
      },
      {
        "@id": "n3-1" // blank node 2 is converted as relative IRI node (not blank node)
      }
    ]
  },
  {
    "@id": "n3-0", 
    "http://purl.org/dc/terms/title": [
      {
        "@value": "section 1"
      }
    ]
  },
  {
    "@id": "n3-1",
    "http://purl.org/dc/terms/title": [
      {
        "@value": "section 2"
      }
    ]
  }
]

and the re-decoded RDF does not match the original one.

<<Decode:JSONLD2RDF>>
<http://example.org#book1> <http://purl.org/dc/terms/description> "this is book 1" .
<http://example.org#book1> <http://purl.org/dc/terms/title> "book 1" .

Why blank nodes in RDF are encoded as Relative IRI Node? Are there any way to encode blank nodes in RDF as objects with "@id" in the form of "_:xxx" ?

Yocote0111 commented 2 years ago

When I tried with the option {format: 'application/n-quads'}, it worked well.

Specifically,

const jsonld = require('jsonld');

const rdfDoc = `
<http://example.org#book1> <http://purl.org/dc/terms/description> "this is book 1" .
<http://example.org#book1> <http://purl.org/dc/terms/hasPart> _:b0 .
<http://example.org#book1> <http://purl.org/dc/terms/hasPart> _:b1 .
<http://example.org#book1> <http://purl.org/dc/terms/title> "book 1" .
_:b0 <http://purl.org/dc/terms/title> "section 1" .
_:b1 <http://purl.org/dc/terms/title> "section 2" .
`;

(async () => {
  let jsondoc = await jsonld.fromRDF(rdfDoc, {format: 'application/n-quads'});
  console.log("<<Encode:RDF2JSONLD>>");
  console.log(JSON.stringify(jsondoc, null, 2); // output1

  let rdfDataset = await jsonld.toRDF(jsondoc, {format: 'application/n-quads'});
  console.log("<<Decode:JSONLD2RDF>>");
  console.log(rdfDataset);
})()

results in

<<Encode:RDF2JSONLD>>
[
  {
    "@id": "_:b0",
    "http://purl.org/dc/terms/title": [
      {
        "@value": "section 1"
      }
    ]
  },
  {
    "@id": "_:b1",
    "http://purl.org/dc/terms/title": [
      {
        "@value": "section 2"
      }
    ]
  },
  {
    "@id": "http://example.org#book1",
    "http://purl.org/dc/terms/description": [
      {
        "@value": "this is book 1"
      }
    ],
    "http://purl.org/dc/terms/hasPart": [
      {
        "@id": "_:b0"
      },
      {
        "@id": "_:b1"
      }
    ],
    "http://purl.org/dc/terms/title": [
      {
        "@value": "book 1"
      }
    ]
  }
]

and the re-decoded RDF does match the original one.

<<Decode:JSONLD2RDF>>
<http://example.org#book1> <http://purl.org/dc/terms/description> "this is book 1" .
<http://example.org#book1> <http://purl.org/dc/terms/hasPart> _:b0 .
<http://example.org#book1> <http://purl.org/dc/terms/hasPart> _:b1 .
<http://example.org#book1> <http://purl.org/dc/terms/title> "book 1" .
_:b0 <http://purl.org/dc/terms/title> "section 1" .
_:b1 <http://purl.org/dc/terms/title> "section 2" .
Yocote0111 commented 2 years ago

I found that using {Term}.value to get the value of "@id" in the following code causes this result. https://github.com/digitalbazaar/jsonld.js/blob/20e2286d198ce7d376320306f9df3667f48a4544/lib/fromRdf.js#L57-L119

According to the description of BlankNode interface in RDF/JS Spec,
the "value" of BlankNode will sometimes take non-"_:xxx" form.

value blank node name as a string, without any serialization specific prefixes, e.g. when parsing, if the data was sourced from Turtle, remove "_:", if it was sourced from RDF/XML, do not change the blank node name (example: "blank3")

The library "n3" complies with this spec and consequently the "@id" of BlankNode in JSON-LD become non-"_:xxx" form (in particular, Relative IRI in case of the example above).

davidlehn commented 2 years ago

Looks like rdfDoc got cut off in https://github.com/digitalbazaar/jsonld.js/issues/474#issuecomment-1060193784.

I'm not sure how interoperability is supposed to work between implementations as far as blank nodes go. They are kind of tricky. How can code know what is a blank node if it doesn't have a known special naming pattern?

In JSON-LD, the terminology section says blank nodes have the _: prefix. But it does link to the RDF concepts definition that has a note about how they are dependent on the concrete syntax or implementation. I'm not sure where that leaves issues like this.

Have you tried the setting blankNodePrefix: '_:' in the N3 parser?

Yocote0111 commented 2 years ago

Oops! I corrected rdfDoc in https://github.com/digitalbazaar/jsonld.js/issues/474#issuecomment-1060193784 .

Unfortunately, I tried the setting blankNodePrefix: '_:' in the N3 parser but it also returns non-"_:xxx" form value (despite the id has "_:xxx"-form value.).

// Parse RDF and stdout the {termType, id , value} of subjects.
Readable.from(`_:b0 <http://purl.org/dc/terms/title> "section 1" .`)
    .pipe(new N3.StreamParser({ format:'N-Quad', blankNodePrefix: '_:' }))
    // .pipe(new N3.StreamParser({ format:'Turtle', blankNodePrefix: '_:' }))
    .pipe(new Transform({
        writableObjectMode: true,
        transform(quad, encoding, done){
            const subject = quad.subject;
            this.push("Subject "+JSON.stringify({termType:subject.termType, id: subject.id, value: subject.value})+'\n');
            done();
        }
    }))
   .pipe(process.stdout);
// >> Subject {"termType":"BlankNode","id":"_:b0","value":"b0"}

According to the Term interface in RDF/JS, there is termType member and it is likely that we can know which one is blank node.

Anyway it seems good to use jsonld.fromRDF(rdfDoc, {format: 'application/n-quads'}) following XXX/N-Quad conversion for now.

MarcusElevait commented 1 year ago

Have the same problem. N3 parser is giving me quads for blank nodes, where the _: prefix is cut of on the value of the blank node like this: image

Tried using format: 'application/n-quads' as option for the fromRDF function, but this then gives me the error:

TypeError: input.split is not a function
    at parse (NQuads.js:90:25)
    at jsonld.js:638:31
    at Generator.next (<anonymous>)
    at asyncGeneratorStep (asyncToGenerator.js:3:1)
    at _next (asyncToGenerator.js:22:1)
    at asyncToGenerator.js:27:1
    at new ZoneAwarePromise (zone.js:1432:21)
    at asyncToGenerator.js:19:1
    at jsonld.js:640:2
    at shape-framing-demo.component.ts:74:34

For now my workaround is to add the prefix to the value of the blank nodes after parsing it with N3, but this seems to be too hacky as the value could be anything.

Is there a reason, why the value and not the id attribute of the blank node is used?