datova-kancelaria / nkod-pipeline

Obsahuje export LP-ETL pipeline NKOD pro deployment do k8s
0 stars 1 forks source link

Naharvestovanym datasetom sa menia URIcka #21

Closed miroslavliska closed 6 months ago

miroslavliska commented 7 months ago

Naharvestovanym datasetom sa menia URIcka typu https://data.gov.sk/.... Je to v tomto DPU image

Považjem to za chybu, hoc predpokladám, že v rámci zadania to možno malo byť upresnené, že URI pre dataset používa doménu data.gov.sk len pre datasety zaevidované priamo do portálu. Plán bol, že do NKODu sa má zapísať LKOD taký aký je.

Treba preveriť, či je to vôbec chyba, resp. či to nespôsobuje chybu.

miroslavliska commented 7 months ago
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX schema: <http://schema.org/>
PREFIX vcard: <http://www.w3.org/2006/vcard/ns#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

PREFIX leg: <https://data.gov.sk/def/ontology/legislation/>

CONSTRUCT {
  ?catalog dcat:dataset ?dataset_new .

  ?dataset_new a dcat:Dataset ;
    dcterms:title ?title ;
    dcterms:description ?description ;
    dcterms:type ?type ;
    dcat:theme ?theme ;
    dcterms:accrualPeriodicity ?accrualPeriodicity ;
    dcat:keyword ?keyword ;
    dcterms:spatial ?spatial ;
    dcterms:temporal ?temporal_new ;
    dcat:contactPoint ?cp_new ;
    foaf:page ?page ;
    dcterms:issued ?issued ;
    dcterms:conformsTo ?conformsTo ;
    dcat:spatialResolutionInMeters ?spatialResolution ;
    dcat:temporalResolution ?temporalResolution ;
    dcterms:isPartOf ?topDatasetNew ;
    dcat:distribution ?distribution_new ;
    dcterms:publisher ?publisher ;
    dcterms:identifier ?dataset_old_str .

  ?cp_new a ?cptype;
    vcard:fn ?cpfn ;
    vcard:hasEmail ?cpemail .

  ?temporal_new a dcterms:PeriodOfTime;
    dcat:startDate ?finalStartDate ;
    dcat:endDate ?finalEndDate .

  ?publisher a foaf:Agent;
    foaf:name ?pname ;
    <https://data.gov.sk/def/ontology/legal-subject/legalFormType> ?ptype .

  ?distribution_new a dcat:Distribution ;
    dcat:downloadURL ?ddURL ;
    dcat:accessURL ?daURL ;
    dcterms:format ?dformat ;
    dcat:mediaType ?dmimeType ;
    dcterms:conformsTo ?dconformsTo ;
    dcat:compressFormat ?dcompressFormat ;
    dcat:packageFormat ?dpackageFormat ;
    dcterms:title ?dtitle ;
    dcterms:license ?license .

  ?distribution_new leg:termsOfUse ?pu_new .

  ?pu_new a leg:TermsOfUse ;
    leg:authorsWorkType ?touGeneral ;
    leg:originalDatabaseType ?touDatabase ;
    leg:databaseProtectedBySpecialRightsType ?touDatabaseExtra ;
    leg:personalDataContainmentType ?touPersonalData ;
    leg:authorName ?touGeneralAuthor ;
    leg:originalDatabaseAuthorName ?touDatabaselAuthor .

  ?distribution_new dcat:accessService ?dataservice_new .
  ?dataservice_new a dcat:DataService ;
    dcterms:title ?sTitle ;
    dcterms:conformsTo ?sConformsTo ;
    dcat:endpointURL ?sEndpointURL ;
    dcat:endpointDescription ?sEndpointDescription .

  ?record_new a dcat:CatalogRecord;
    dcterms:source ?dataset ;
    foaf:primaryTopic ?dataset_new ;
    dcterms:language <http://publications.europa.eu/resource/authority/language/SVK>, <http://publications.europa.eu/resource/authority/language/ENG> ;
    dcterms:conformsTo <https://joinup.ec.europa.eu/collection/semantic-interoperability-community-semic/solution/dcat-application-profile-data-portals-europe/release/211> .
}
WHERE {
  ?catalog a dcat:Catalog ;
             dcat:dataset ?dataset .

  ?dataset a dcat:Dataset ;
             dcterms:title ?title ;
             dcterms:description ?description ;
             dcat:theme ?theme ;
             dcterms:accrualPeriodicity ?accrualPeriodicity ;
             dcat:keyword ?keyword ;
             dcterms:spatial ?spatial ;
             dcterms:publisher ?publisher .

  ?publisher foaf:name ?pname .
  OPTIONAL {?publisher <https://data.gov.sk/def/ontology/legal-subject/legalFormType> ?ptype . }

  FILTER(isIRI(?spatial) && isIRI(?theme) && isIRI(?accrualPeriodicity))
  OPTIONAL { ?dataset dcterms:temporal ?temporal .
    OPTIONAL {
      ?temporal dcat:startDate ?startDate .
      FILTER(REGEX(STR(?startDate), "[0-9]{4}-[0-9]{2}-[0-9]{2}") && DATATYPE(?startDate) = xsd:date)
    }
    OPTIONAL {
      ?temporal dcat:endDate ?endDate .
      FILTER(REGEX(STR(?endDate), "[0-9]{4}-[0-9]{2}-[0-9]{2}") && DATATYPE(?endDate) = xsd:date)
    }
    OPTIONAL {
      ?temporal schema:startDate ?schemaStartDate .
      FILTER(REGEX(STR(?schemaStartDate), "[0-9]{4}-[0-9]{2}-[0-9]{2}") && DATATYPE(?schemaStartDate) = xsd:date)
    }
    OPTIONAL {
      ?temporal schema:endDate ?schemaEndDate .
      FILTER(REGEX(STR(?schemaEndDate), "[0-9]{4}-[0-9]{2}-[0-9]{2}") && DATATYPE(?schemaEndDate) = xsd:date)
    }
    BIND(IF(BOUND(?startDate), ?startDate, ?schemaStartDate) AS ?finalStartDate)
    BIND(IF(BOUND(?endDate), ?endDate, ?schemaEndDate) AS ?finalEndDate)
  }

  OPTIONAL { ?dataset dcat:contactPoint ?cp .
    ?cp a ?cptype.
    OPTIONAL { ?cp vcard:fn ?cpfn . }
    OPTIONAL {
      ?cp vcard:hasEmail ?cpemail .
      FILTER(isIRI(?cpemail))
    }
  }

  OPTIONAL {
    ?dataset dcterms:type ?type .
    FILTER(isIRI(?type))
  }
  OPTIONAL {
    ?dataset foaf:page ?page .
    FILTER(isIRI(?page))
  }
  OPTIONAL {
    ?dataset dcterms:conformsTo ?conformsTo .
    FILTER(isIRI(?conformsTo))
  }
  OPTIONAL { ?dataset dcat:spatialResolutionInMeters ?spatialResolution . }
  OPTIONAL { ?dataset dcat:temporalResolution ?temporalResolution . }
  OPTIONAL {
    ?dataset dcterms:isPartOf ?topDataset .
    FILTER(isIRI(?topDataset))
  }

  OPTIONAL {
    ?dataset dcat:distribution ?distribution .
    ?distribution a dcat:Distribution ;
             dcat:accessURL ?daURL .

    OPTIONAL {
      ?distribution leg:termsOfUse ?pu .

      ?pu a leg:TermsOfUse ;
        leg:authorsWorkType ?touGeneral ;
        leg:originalDatabaseType ?touDatabase ;
        leg:databaseProtectedBySpecialRightsType ?touDatabaseExtra ;
        leg:personalDataContainmentType ?touPersonalData .

        OPTIONAL { ?pu leg:authorName ?touGeneralAuthor . }
        OPTIONAL { ?pu leg:originalDatabaseAuthorName ?touDatabaselAuthor .}
    }

    OPTIONAL { ?distribution dcterms:license ?license . }
    OPTIONAL { ?distribution dcat:downloadURL ?ddURL . }
    OPTIONAL {
      ?distribution dcterms:format ?dformat .
      FILTER(isIRI(?dformat))
    }
    OPTIONAL {
      ?distribution dcat:mediaType ?dmimeType .
      FILTER(isIRI(?dmimeType))
    }
    OPTIONAL {
      ?distribution dcterms:conformsTo ?dconformsTo .
      FILTER(isIRI(?dconformsTo))
    }
    OPTIONAL {
      ?distribution dcat:compressFormat ?dcompressFormat .
      FILTER(isIRI(?dcompressFormat))
    }
    OPTIONAL {
      ?distribution dcat:packageFormat ?dpackageFormat .
      FILTER(isIRI(?dpackageFormat))
    }
    OPTIONAL { ?distribution dcterms:title ?dtitle . }
    OPTIONAL {
      ?distribution dcat:accessService ?dataService .
      ?dataService a dcat:DataService ;
                    dcterms:title ?sTitle ;
                    dcat:endpointURL ?sEndpointURL .
      OPTIONAL {
        ?dataService dcat:endpointDescription ?sEndpointDescription .
        FILTER(isIRI(?sEndpointDescription))
      }
      OPTIONAL {
        ?dataService dcterms:conformsTo ?sConformsTo .
        FILTER(isIRI(?sConformsTo))
      }
      FILTER(isIRI(?sEndpointURL))
    }
  }

  BIND (STRDT(STR(?dataset), xsd:anyURI) AS ?dataset_old_str)
  BIND ("https://data.gov.sk/set/" AS ?new_IRI_base)
  BIND (MD5(CONCAT(STR(?dataset), STR(?publisher))) AS ?datasetID)
  BIND (IRI(CONCAT(?new_IRI_base, ?datasetID)) as ?dataset_new)
  BIND (IRI(CONCAT(?new_IRI_base, MD5(CONCAT(STR(?topDataset), STR(?publisher))))) as ?topDatasetNew)
  BIND (IRI(CONCAT(STR(?dataset_new), "/resource/", MD5(STR(?distribution)))) as ?distribution_new)
  BIND (IRI(CONCAT(STR(?distribution_new), "/data-service/", MD5(CONCAT(STR(?dataService), STR(?publisher))))) as ?dataservice_new)
  BIND (IRI(CONCAT(STR(?dataset_new),"/record")) as ?record_new)
  BIND (IRI(CONCAT(STR(?dataset_new),"/contact-point")) as ?cp_new)
  BIND (IRI(CONCAT(STR(?distribution_new),"/terms-of-use")) as ?pu_new)
  BIND (IRI(CONCAT(STR(?dataset_new),"/temporal")) as ?temporal_new)
}
miroslavliska commented 6 months ago

OK, po diskusii s Jakubom Klimekom ponechávam prístup URIčok, ktoré referencujú jednotné URI https://data.gov.sk/, ktoré budú dereferencované na NKOD. Pôvodné URI bude tiež uchovávané.