RDFLib / prez-ui

BSD 3-Clause "New" or "Revised" License
10 stars 7 forks source link

Implement Catalogue Search #58

Closed nicholascar closed 1 year ago

nicholascar commented 1 year ago

CatPrez' top-level page should be a search page, like SpacePrez' is.

e.g. https://data.idnau.org/c should look like https://data.idnau.org/s with an altered emphasis, as below

Image

nicholascar commented 1 year ago

Query for "Catalogues to include":

PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>

SELECT ?c ?t
WHERE {
  GRAPH ?g {
  ?c a dcat:Catalog ;
    dcterms:title ?t .
  }
}
ORDER BY ?t
nicholascar commented 1 year ago

Query "Search text" input for Resources in the DemCat only (https://data.idnau.org/pid/democat) using cumulative match weightings as:

PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>

SELECT DISTINCT ?r ?t ?d (SUM(?w) AS ?weight)
WHERE {
  {
    SELECT * 
    WHERE {
      ?r a dcat:Resource ;
        ^dcterms:hasPart <https://data.idnau.org/pid/democat>  ;   
      .
    }
  }
  {
    ?r
      dcterms:title ?t ;
      dcterms:description ?d ;        
    .
    BIND (50 AS ?w)
    FILTER REGEX(?t, "^Indigenous$", "i")
  }
  UNION
  {
    ?r 
      dcterms:title ?t ;
      dcterms:description ?d ;      
    .
    BIND (10 AS ?w)
    FILTER REGEX(?t, "Indigenous", "i")
  }
  UNION
  {
    ?r 
      dcterms:title ?t ;
      dcterms:description ?d ;      
    .
    BIND (5 AS ?w)
    FILTER REGEX(?d, "Indigenous", "i")
  }  
}
GROUP BY ?r ?t ?d ?match
ORDER BY DESC(?weight) ?t
nicholascar commented 1 year ago

Query for "Theme filter" the retrieves the top 4 most commonly used themes for Resources in the https://data.idnau.org/pid/democat catalogue:

PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>

SELECT ?th ?pl (COUNT(?th) AS ?count)
WHERE {
  ?r a dcat:Resource ;
    ^dcterms:hasPart <https://data.idnau.org/pid/democat>  ;   
  .

  ?r dcat:theme ?th .
  ?th skos:prefLabel ?pl .
}
GROUP BY ?th ?pl
ORDER BY DESC(?count) ?pl 
LIMIT 4

There are only 7 distinct themes in the catalogue for now, but I may add more by Friday. the 4 above is just to show an artificial limit. Should be at least 20 so will show all 7 for now!

nicholascar commented 1 year ago

Query for "Spatial filter" with a given geometry:

PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX geof: <http://www.opengis.net/def/function/geosparql/>

SELECT *
WHERE {
  GRAPH ?g {
    ?r a dcat:Resource ;
      ^dcterms:hasPart <https://data.idnau.org/pid/democat>  ;  
      geo:hasBoundingBox/geo:asWKT ?wkt ;
    .

    FILTER (geof:sfOverlaps("POLYGON ((159.86557837137977 -11.168170683361419, 159.86557837137977 -39.83427815914998, 111.08628149637977 -39.83427815914998, 111.08628149637977 -11.168170683361419, 159.86557837137977 -11.168170683361419)))"^^geo:wktLiteral, ?wkt))
  }
}

Selects only things in the DemoCat, https://data.idnau.org/pid/democat, that have a bounding box wit coordinates, geo:hasBoundingBox/geo:asWKT and filters them using only sfOverlaps, don't worry about other spatial relations.

nicholascar commented 1 year ago

Combined query using all filters:

PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX geo: <http://www.opengis.net/ont/geosparql#>
PREFIX geof: <http://www.opengis.net/def/function/geosparql/>

SELECT ?r ?t ?d ?th ?thpl ?weight
WHERE {
  GRAPH ?g {
    # Only look for Resources (not spatial Datasets)
    ?r a dcat:Resource .

    # Only look in DemoCat
    ?r  ^dcterms:hasPart <https://data.idnau.org/pid/democat>  .

    # Weighted text search
    {
      SELECT DISTINCT ?r ?t ?d (SUM(?w) AS ?weight)
      WHERE {
        ?r a dcat:Resource .
        {
          ?r
            dcterms:title ?t ;
            dcterms:description ?d ;
          .
          BIND (50 AS ?w)
          FILTER REGEX(?t, "^Police$", "i")
        }
        UNION
        {
          ?r 
            dcterms:title ?t ;
            dcterms:description ?d ;
          .
          BIND (10 AS ?w)
          FILTER REGEX(?t, "Police", "i")
        }
        UNION
        {
          ?r 
            dcterms:title ?t ;
            dcterms:description ?d ;
          .
          BIND (5 AS ?w)
          FILTER REGEX(?d, "Police", "i")
        }  
      }
      GROUP BY ?r ?t ?d ?match
      ORDER BY DESC(?weight) ?t
    }

    # Theme filter. each theme's IRI is a new line in the VALUES {}
    VALUES ?th {
      <https://vocabularyserver.com/apais/xml.php?skosTema=28>
    }
    ?r dcat:theme ?th .
    ?th skos:prefLabel ?thpl .

    # Spatial Filter
    ?r geo:hasBoundingBox/geo:asWKT ?wkt ;
    FILTER (geof:sfOverlaps("POLYGON ((159.865578 -11.168170, 159.865578 -39.834278, 111.086281 -39.834278, 111.086281 -11.168170, 159.865578 -11.168170)))"^^geo:wktLiteral, ?wkt))
  }
}

This should return only 2 results - for the same dataset but with 2 themes, so 2 results.

jamiefeiss commented 1 year ago

Resolved in #62