Open domm opened 3 months ago
WIP patch. This successfully matches with both point-to-area queries as well as boundindbox-to-area, with ES doing the right thing as far as I can tell:
diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm
index f11a833503..319651d19f 100644
--- a/Koha/SearchEngine/Elasticsearch.pm
+++ b/Koha/SearchEngine/Elasticsearch.pm
@@ -215,10 +215,10 @@ sub get_elasticsearch_mappings {
$es_type = 'cn_sort';
} elsif ($type eq 'geo_point') {
$es_type = 'geo_point';
- }
-
- if ($type eq 'geo_point') {
$name =~ s/_(lat|lon)$//;
+ } elsif ($type eq 'geo_shape') {
+ $es_type = 'geo_shape';
+ $name =~ s/_(north|south|east|west)ernmost$/_area/;
}
if ($search) {
@@ -768,6 +768,30 @@ sub marc_records_to_documents {
delete $record_document->{$field};
}
+ my %bounds;
+ foreach my $field ( @{ $rules->{geo_shape} } ) {
+ next unless $record_document->{$field};
+ $field =~ /geolocation_(north|south|east|west)ernmost$/;
+ my $direction = $1;
+ $bounds{$direction} = $record_document->{$field};
+ delete $record_document->{$field};
+ }
+ if (%bounds == 4) {
+ my @shapes;
+ for my $i (0..scalar($bounds{north}->@*) - 1) {
+ $shapes[$i] = [
+ [ 0+$bounds{west}[$i], 0+$bounds{north}[$i] ],
+ [ 0+$bounds{east}[$i], 0+$bounds{south}[$i] ],
+ ];
+ }
+ $record_document->{geolocation_area} = [map {
+ +{
+ type => 'envelope',
+ coordinates => $_,
+ }
+ } @shapes];
+ }
+
# Remove duplicate values and collapse sort fields
foreach my $field (keys %{$record_document}) {
if (ref($record_document->{$field}) eq 'ARRAY') {
@@ -1140,6 +1164,9 @@ sub _get_marc_mapping_rules {
elsif ($type eq 'geo_point') {
push @{$rules->{geo_point}}, $name;
}
+ elsif ($type eq 'geo_shape') {
+ push @{$rules->{geo_shape}}, $name;
+ }
elsif ($type eq 'boolean') {
# boolean gets special handling, if value doesn't exist for a field,
# it is set to false
diff --git a/Koha/SearchEngine/Elasticsearch/Indexer.pm b/Koha/SearchEngine/Elasticsearch/Indexer.pm
index 1f914b8a65..641dc1b57b 100644
--- a/Koha/SearchEngine/Elasticsearch/Indexer.pm
+++ b/Koha/SearchEngine/Elasticsearch/Indexer.pm
@@ -138,6 +138,9 @@ sub update_index {
body => \@body
);
if ($response->{errors}) {
+ my @errors = grep { $_->{index}{status} >= 400 } $response->{items}->@*;
+ use DDP;
+ p @errors;
carp "One or more ElasticSearch errors occurred when indexing documents";
}
} catch {
diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
index 9e1574ede9..fbd1a09b4a 100644
--- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
+++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
@@ -1409,35 +1409,63 @@ sub _is_safe_to_auto_truncate {
sub _rebuild_to_es_advanced_query {
my ($res) = @_;
my $query_string = $res->{query}->{query_string};
- $query_string->{query} = '*' unless $query_string->{query};
delete $res->{query}->{query_string};
- my %filter;
+ my @should;
for my $advanced_query (@$es_advanced_searches) {
if ( $advanced_query->{field} eq 'geolocation' ) {
my %args = map { split ':' } map { s/\*$//r } split /\s+/, $advanced_query->{operand};
if ($args{lat} and $args{lng} and $args{distance}) {
- $filter{geo_distance} = {
- distance => $args{distance},
- geolocation => {
- lat => $args{lat},
- lon => $args{lng},
- }
- };
+ push @should,
+ {
+ geo_distance => {
+ distance => $args{distance},
+ geolocation => {
+ lat => $args{lat},
+ lon => $args{lng},
+ }
+ }
+ },
+ {
+ geo_distance => {
+ distance => $args{distance},
+ geolocation_area => {
+ lat => $args{lat},
+ lon => $args{lng},
+ }
+ }
+ };
} elsif ($args{boundingbox}) {
my ($top_left_lat, $top_left_lon, $bottom_right_lat, $bottom_right_lon) = split ',', $args{boundingbox};
- $filter{geo_bounding_box} = {
- 'geolocation' => {
- 'top_left' => {
- 'lat' => $top_left_lat,
- 'lon' => $top_left_lon,
- },
- 'bottom_right' => {
- 'lat' => $bottom_right_lat,
- 'lon' => $bottom_right_lon,
+ push @should,
+ {
+ geo_bounding_box => {
+ 'geolocation' => {
+ 'top_left' => {
+ 'lat' => $top_left_lat,
+ 'lon' => $top_left_lon,
+ },
+ 'bottom_right' => {
+ 'lat' => $bottom_right_lat,
+ 'lon' => $bottom_right_lon,
+ }
+ },
}
},
- };
+ {
+ geo_bounding_box => {
+ 'geolocation_area' => {
+ 'top_left' => {
+ 'lat' => $top_left_lat,
+ 'lon' => $top_left_lon,
+ },
+ 'bottom_right' => {
+ 'lat' => $bottom_right_lat,
+ 'lon' => $bottom_right_lon,
+ }
+ },
+ }
+ };
} else {
warn "Unrecognized parameter set for geolocation queries: " . join(', ', keys %args);
}
@@ -1448,10 +1476,12 @@ sub _rebuild_to_es_advanced_query {
$res->{query} = {
bool => {
- must => { query_string => $query_string },
- filter => \%filter,
+ should => \@should,
}
};
+ if ($query_string->{query}) {
+ $res->{query}{bool}{must} = { query_string => $query_string };
+ }
return $res;
}
diff --git a/admin/searchengine/elasticsearch/field_config.yaml b/admin/searchengine/elasticsearch/field_config.yaml
index 41adcef7ed..eb3d2ff81d 100644
--- a/admin/searchengine/elasticsearch/field_config.yaml
+++ b/admin/searchengine/elasticsearch/field_config.yaml
@@ -43,6 +43,8 @@ search:
normalizer: icu_folding_normalizer
geo_point:
type: geo_point
+ geo_shape:
+ type: geo_shape
default:
type: text
analyzer: analyzer_standard
diff --git a/admin/searchengine/elasticsearch/mappings.yaml b/admin/searchengine/elasticsearch/mappings.yaml
index 929f2f9410..e66df5e567 100644
--- a/admin/searchengine/elasticsearch/mappings.yaml
+++ b/admin/searchengine/elasticsearch/mappings.yaml
@@ -1892,6 +1892,42 @@ biblios:
opac: 1
staff_client: 1
type: ''
+ geolocation_westernmost:
+ label: geolocation_westernmost
+ mappings:
+ - facet: ''
+ marc_field: 034d
+ marc_type: marc21
+ sort: 0
+ suggestible: ''
+ type: geo_shape
+ geolocation_easternmost:
+ label: geolocation_easternmost
+ mappings:
+ - facet: ''
+ marc_field: 034e
+ marc_type: marc21
+ sort: 0
+ suggestible: ''
+ type: geo_shape
+ geolocation_northernmost:
+ label: geolocation_northernmost
+ mappings:
+ - facet: ''
+ marc_field: 034f
+ marc_type: marc21
+ sort: 0
+ suggestible: ''
+ type: geo_shape
+ geolocation_southernmost:
+ label: geolocation_southernmost
+ mappings:
+ - facet: ''
+ marc_field: 034g
+ marc_type: marc21
+ sort: 0
+ suggestible: ''
+ type: geo_shape
geolocation_lat:
label: geolocation_lat
mappings:
diff --git a/installer/data/mysql/kohastructure.sql b/installer/data/mysql/kohastructure.sql
index 91c1bec42f..1e1afef7e9 100644
--- a/installer/data/mysql/kohastructure.sql
+++ b/installer/data/mysql/kohastructure.sql
@@ -5656,7 +5656,7 @@ CREATE TABLE `search_field` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) NOT NULL COMMENT 'the name of the field as it will be stored in the search engine',
`label` varchar(255) NOT NULL COMMENT 'the human readable name of the field, for display',
- `type` enum('','string','date','number','boolean','sum','isbn','stdno','year','callnumber','geo_point') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
+ `type` enum('','string','date','number','boolean','sum','isbn','stdno','year','callnumber','geo_point','geo_shape') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
`weight` decimal(5,2) DEFAULT NULL,
`facet_order` tinyint(4) DEFAULT NULL COMMENT 'the order place of the field in facet list if faceted',
`staff_client` tinyint(1) NOT NULL DEFAULT 1,
diff --git a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t
index eef358a0f7..536e390254 100755
--- a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t
+++ b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t
@@ -379,26 +379,36 @@ subtest 'geolocation queries' => sub {
'operand' => 'lat:48.25* lng:16.35* distance:100km*'
});
my $query = $qb->build_query();
- is_deeply $query->{query}{bool}{filter}, {
- geo_distance => {
+ is_deeply $query->{query}{bool}{should}, [
+ { geo_distance => {
distance => '100km',
geolocation => { lat => 48.25, lon => 16.35 },
- }
- }, 'should be able to create "distance from a point" queries';
+ } },
+ { geo_distance => {
+ distance => '100km',
+ geolocation_area => { lat => 48.25, lon => 16.35 },
+ } },
+ ], 'should be able to create "distance from a point" queries';
$qb->_create_query_string({
'field' => 'geolocation',
'operand' => 'boundingbox:1,2,3,4*'
});
$query = $qb->build_query();
- is_deeply $query->{query}{bool}{filter}, {
- geo_bounding_box => {
+ is_deeply $query->{query}{bool}{should}, [
+ { geo_bounding_box => {
geolocation => {
top_left => { lat => 1, lon => 2 },
bottom_right => { lat => 3, lon => 4 },
},
- }
- }, 'should be able to create "within a bounding box" queries';
+ } },
+ { geo_bounding_box => {
+ geolocation_area => {
+ top_left => { lat => 1, lon => 2 },
+ bottom_right => { lat => 3, lon => 4 },
+ },
+ } }
+ ], 'should be able to create "within a bounding box" queries';
};
# Add the bounding box of Austria to a biblio
{
my $biblionumber = 139;
my $biblio = Koha::Biblios->find($biblionumber);
my $framework = $biblio->frameworkcode;
my $record = $biblio->metadata->record;
my @fields;
$fields[0] = MARC::Field->new('034','','',
'd' => 9.530833,
'e' => 17.160556,
'f' => 49.020556,
'g' => 46.3725,
);
$record->append_fields(@fields);
C4::Biblio::ModBiblio($record, $biblionumber, $framework);
}
Now Koha'd in https://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=37985 and in koha-branches: https://github.com/HKS3/koha-branches/tree/geosphere-2405
Last but not least, in the plugin itself: https://github.com/HKS3/HKS3GeoSearch/commit/8e433d8fb3ae755bc1bec6a80b7c60068d9eef87
https://www.loc.gov/marc/bibliographic/bd034.html
We could also use
034d
to034h
to store a rectangle / extend for "Katasterkarten" records (or other records that specify an area and not a point. But I'm currently not sure if Elasticsearch provides an area intersect search (Postgis does...)Anyway, this is more note for the far future :-)