Basic version of what is in the Shepherding BuildSys 2020 paper, but using dedupe to drive the record linkage.
import brickschema
from brickschema.merge import get_entity_feature_vectors, merge_type_cluster, cluster_by_type
from brickschema.namespaces import BRICK, A, RDFS
from rdflib import Namespace, Literal
BLDG = Namespace("urn:bldg#")
g1 = brickschema.Graph(load_brick=True)
g1.bind("bldg", BLDG)
for i in range(5):
g1.add((BLDG[f"VAV{i}"], A, BRICK.RVAV))
g1.add((BLDG[f"VAV{i}"], RDFS.label, Literal(f"RVAV {i}")))
g2 = brickschema.Graph(load_brick=True)
g2.bind("bldg", BLDG)
for i in range(5):
g2.add((BLDG[f"VAV_{i}"], A, BRICK.VAV))
g2.add((BLDG[f"VAV_{i}"], RDFS.label, Literal(f"VAV {i}")))
g1.expand("brick")
g2.expand("brick")
G = merge_type_cluster(g1, g2, BLDG)
G.serialize("output.ttl", format="ttl")
Basic version of what is in the Shepherding BuildSys 2020 paper, but using
dedupe
to drive the record linkage.