Open marc-portier opened 7 months ago
quick separate exercise showing how the config entries in the yml can be normalised using the prefix declarations
from typing import Dict, List
import validators
import re
from re import Match
from rdflib import Namespace, Graph, URIRef
from rdflib.namespace import NamespaceManager
# see https://rdflib.readthedocs.io/en/stable/namespaces_and_bindings.html
def makeNSM(pfx_declarations: Dict[str, str]) -> Dict[str, Namespace]:
pfxs = {k: Namespace(v) for k, v in pfx_declarations.items()}
print(f"{pfxs=}")
nsm = NamespaceManager(Graph(), bind_namespaces="none")
for pf, ns in pfxs.items():
nsm.bind(pf, ns, override=True)
print(f"{list(nsm.namespaces())=}")
return nsm
def resolve_uri(uri: str, nsm: NamespaceManager) -> URIRef:
# TODO reconsider the validators trick -- we might want to explicitely demand <> surrounding the <uri>
return URIRef(uri) if validators.url(uri) else nsm.expand_curie(uri)
def resolve_literals(literal_uris: List[str], nsm: NamespaceManager) -> List[URIRef]:
return [resolve_uri(u, nsm) for u in literal_uris]
def resolve_sparql(sparql, nsm):
pfxlines: str = "\n".join((f"PREFIX {p}: {u.n3()}" for p,u in nsm.namespaces()))
return f"{pfxlines}\n{sparql}"
PPATH_RE: str = r'(([^<>\/\s]+)|<([^>]+)>)\s*\/' # how to match parts of property-paths
def ppath_split(ppath: str) -> List[str]:
return (m.group(2) or m.group(3) for m in re.finditer(pattern=PPATH_RE, string=ppath + "/"))
def resolve_ppaths(ppaths: List[str], nsm: NamespaceManager):
return [
" / ".join(resolve_uri(part, nsm).n3() for part in ppath_split(ppath)) for ppath in ppaths
]
def do():
yml_pfx_declarations = dict(
schema="https://schema.org",
ex="https://example.org/",
)
yml_literals = [
"ex:test",
"schema:DataSet",
"https://demo.me/whatever",
]
yml_sparql = """select * where ?s schema:name ?n ."""
yml_ppaths = [
"<https://demo.me/whatever> / ex:some",
"ex:some",
"<https://demo.me/whatever>",
"schema:owner / schema:name",
]
# make actual namespaces that can be used
nsm: NamespaceManager = makeNSM(yml_pfx_declarations)
literals = resolve_literals(yml_literals, nsm)
print(f"{literals=}")
sparql = resolve_sparql(yml_sparql, nsm)
print(f"{sparql=}")
ppaths = resolve_ppaths(yml_ppaths, nsm)
print(f"{ppaths=}")
waiting for PR #51 to get merged with main branch
the prefix config in the yml should extend to
an updated test-yml should show this is all actually working (and if needed implementation fixes should make it work)