OO-LD / schema

The Object Oriented Linked Data Schema
Creative Commons Zero v1.0 Universal
6 stars 0 forks source link

Normalization #11

Open simontaurus opened 2 months ago

simontaurus commented 2 months ago

Alias notation see #12

Normalization can be realized by chaining JSON-LD algorithms:

Input Expand Context Flatten Context Result
"skos:label": "test" "skos_label": "skos:prefLabel" "skos_label": {"@id": "skos:prefLabel", "@language": "en", "@container": "@set"} "skos:label": [{"@value": "test", "@lang": "en"}]
"rdfs:label": "test" "rdfs_label": "rdfs:prefLabel" "rdfs_label": skos:label "skos:label": "test"
"@id": "ex:P", "schema:worksFor": "ex:C" "schema_worksFor": "schema:worksFor" "schema_worksFor": {"@reverse": "schema:employes"} "@id": "ex:C", "schema:employes": "ex:P"
{
  "@context": {
    "schema": "http://schema.org/",
    "demo": "https://oo-ld.github.io/demo/",
    "works_for": {"@id": "schema:worksFor", "@type": "@id"},
    "works_for*": {"@id": "demo:worksFor", "@type": "@id"},
    "works_for**": {"@reverse": "schema:employes", "@type": "@id"},
    "employes":{"@id": "schema:employes", "@type": "@id"},
    "type": "@type",
    "id": "@id"

  },
  "@graph": [
    {
      "id": "demo:person1",
      "type": "schema:Person",
      "name": "Person1",
      "works_for": "demo:organizationA",
      "works_for*": "demo:organizationB"
    },
    {
      "id": "demo:organizationA",
      "type": "schema:Organization"
    },
    {
      "id": "demo:organizationB",
      "type": "schema:Organization"
    },
    {
      "id": "demo:organizationC",
      "type": "schema:Organization",
      "employes": "demo:person1"
    }
  ]
}

flattened

{
  "@graph": [
    {
      "@id": "https://oo-ld.github.io/demo/organizationA",
      "@type": "http://schema.org/Organization"
    },
    {
      "@id": "https://oo-ld.github.io/demo/organizationB",
      "@type": "http://schema.org/Organization"
    },
    {
      "@id": "https://oo-ld.github.io/demo/organizationC",
      "@type": "http://schema.org/Organization",
      "https://oo-ld.github.io/demo/employes": {
        "@id": "https://oo-ld.github.io/demo/person1"
      }
    },
    {
      "@id": "https://oo-ld.github.io/demo/person1",
      "@type": "http://schema.org/Person",
      "http://schema.org/worksFor": {
        "@id": "https://oo-ld.github.io/demo/organizationA"
      },
      "https://oo-ld.github.io/demo/worksFor": {
        "@id": "https://oo-ld.github.io/demo/organizationB"
      }
    }
  ]
}

playground

simontaurus commented 1 month ago

Implementation:

from pprint import pprint
from pyld import jsonld
from jsondiff import diff

graph = {
  "@context": {
    "schema": "http://schema.org/",
    "demo": "https://oo-ld.github.io/demo/",
    "name": "schema:name",
    "full_name": "demo:full_name",
    "label": "demo:label",
    "works_for": {"@id": "schema:worksFor", "@type": "@id"},
    "is_employed_by": {"@id": "demo:is_employed_by", "@type": "@id"},
    "employes":{"@id": "schema:employes", "@type": "@id"},
    "type": "@type",
    "id": "@id"

  },
  "@graph": [
    {
      "id": "demo:person1",
      "type": "schema:Person",
      "name": "Person1",
      "works_for": "demo:organizationA",
    },
        {
      "id": "demo:person2",
      "type": "schema:Person",
      "full_name": "Person2",
      "is_employed_by": "demo:organizationA"
    },
    {
      "id": "demo:person3",
      "type": "schema:Person",
      "name": "Person3"
    },
    {
      "id": "demo:organizationA",
      "type": "schema:Organization",
      "label": "organizationA",
      "employes": "demo:person3"
    }
  ]
}
#graph["@graph"] = sorted(graph["@graph"], key=lambda x: x['@id'])

context = {
    "schema": "http://schema.org/",
    "demo": "https://oo-ld.github.io/demo/",
    "skos": "http://www.w3.org/2004/02/skos/core#",
    "name": "schema:name",
    "name*": "demo:full_name",
    #"_demo_full_name": "demo:full_name", # generated
    ##"label": {"@id": "skos:prefLabel", "@container": "@set", "@language": "en", "@context": {"text": "@value", "lang": "@language"}},
    "text": "@value", "lang": "@language",
    "label": {"@id": "skos:prefLabel", "@container": "@set"},
    "label*": {"@id": "demo:label", "@container": "@set", "@language": "en"},
    #"_demo_label": {"@id": "demo:label"},#, "@container": "@set", "@language": "en"}, # generated
    "employes": {"@id": "schema:employes", "@type": "@id"},
    "employes*": {"@reverse": "schema:worksFor", "@type": "@id"},
    #"_schema_worksFor": {"@id": "schema:worksFor", "@type": "@id"}, # generated
    "employes**": {"@reverse": "demo:is_employed_by", "@type": "@id"},
    #"_demo_is_employed_by": {"@id": "demo:is_employed_by", "@type": "@id"}, # generated
    "type": "@type",
    "id": "@id"
}

temp1 = {}
temp2 = {}
for key, value in context.items():
    if key.endswith("*"):

        temp1_value = {}
        temp2_value = {}
        if type(value) is dict:
            if "@id" in value: temp1_value["@id"] = value["@id"]
            if "@reverse" in value: temp1_value["@id"] = value["@reverse"]
            if "@type" in value: temp1_value["@type"] = value["@type"]
            temp2_value = {**value}
            #if "@id" in value: del temp2_value["@id"]
            #if "@reverse" in value: del temp2_value["@reverse"]
        else: 
            temp1_value["@id"] = value
            temp2_value["@id"] = value

        org_key = key.replace("*", "")
        org_value = context[org_key]
        if type(org_value) is dict:
            if "@id" in org_value: 
                #temp2_value["@id"] = org_value["@id"]
                if "@id" in temp2_value: temp2_value["@id"] = org_value["@id"]
                if "@reverse" in temp2_value: temp2_value["@reverse"] = org_value["@id"]
            #if "@reverse" in org_value: temp2_value["@id"] = org_value["@reverse"]
            else: print("Error")
        else: 
            if "@id" in temp2_value: temp2_value["@id"] = org_value
            if "@reverse" in temp2_value: temp2_value["@reverse"] = org_value

        temp1["_" + temp1_value["@id"].replace(":", "_")] = temp1_value
        temp2["_" + temp1_value["@id"].replace(":", "_")] = temp2_value
pprint(temp1)
pprint(temp2)
print(diff(temp1, {
    "_demo_full_name": {"@id": "demo:full_name"}, # generated
    "_demo_label": {"@id": "demo:label"},#, "@container": "@set", "@language": "en"}, # generated
    "_schema_worksFor": {"@id": "schema:worksFor", "@type": "@id"}, # generated
    "_demo_is_employed_by": {"@id": "demo:is_employed_by", "@type": "@id"}, # generated    
}))
print(diff(temp2, {
    "_demo_full_name": {"@id": "schema:name"}, # generated
    "_demo_label": {"@id": "skos:prefLabel", "@container": "@set", "@language": "en"}, # generated
    "_schema_worksFor": {"@reverse": "schema:employes", "@type": "@id"}, # generated
    "_demo_is_employed_by": {"@reverse": "schema:employes", "@type": "@id"}, # generated  
}))

graph = jsonld.compact(graph, {**context, **temp1})

graph["@context"] = {**context, **temp2}
graph = jsonld.flatten(graph)

graph = jsonld.compact(graph, context)

pprint(graph)
print(diff(graph, {
    '@context': {'demo': 'https://oo-ld.github.io/demo/',
              'employes': {'@id': 'schema:employes', '@type': '@id'},
              'employes*': {'@reverse': 'schema:worksFor', '@type': '@id'},
              'employes**': {'@reverse': 'demo:is_employed_by', '@type': '@id'},
              'id': '@id',
              'label': {'@container': '@set', '@id': 'skos:prefLabel'},
              'label*': {'@container': '@set',
                         '@id': 'demo:label',
                         '@language': 'en'},
              'lang': '@language',
              'name': 'schema:name',
              'name*': 'demo:full_name',
              'schema': 'http://schema.org/',
              'skos': 'http://www.w3.org/2004/02/skos/core#',
              'text': '@value',
              'type': '@type'},
    '@graph': [{'employes': ['demo:person1', 'demo:person2', 'demo:person3'],
             'id': 'demo:organizationA',
             'label': [{'lang': 'en', 'text': 'organizationA'}],
             'type': 'schema:Organization'},
            {'id': 'demo:person1', 'name': 'Person1', 'type': 'schema:Person'},
            {'id': 'demo:person2', 'name': 'Person2', 'type': 'schema:Person'},
            {'id': 'demo:person3', 'name': 'Person3', 'type': 'schema:Person'}]
}))

To nest the persons within the organization, framing can be applied:

frame = {
  "type": "schema:Organization",
  "custom_type": {"@default": "demo:Org"},
  "employes": {
      "type": "schema:Person"
  }
}
frame["@context"] = {**context,
  "custom_type": "demo:customType"                    
}
graph = jsonld.frame(graph, frame, options={"requireAll": True})

resulting in:

{
  "employes": [
    {
      "id": "demo:person1",
      "name": "Person1",
      "type": "schema:Person"
    },
    {
      "id": "demo:person2",
      "name": "Person2",
      "type": "schema:Person"
    },
    {
      "id": "demo:person3",
      "name": "Person3",
      "type": "schema:Person"
    }
  ],
  "id": "demo:organizationA",
  "label": [
    {
      "lang": "en",
      "text": "organizationA"
    }
  ],
  "type": "schema:Organization"
}