HKS3 / HKS3GeoSearch

koha plugin to display a map of the geographical location of a book
3 stars 0 forks source link

Idea: use 034d-h to index areas #5

Open domm opened 3 months ago

domm commented 3 months ago

https://www.loc.gov/marc/bibliographic/bd034.html

We could also use 034d to 034h to store a rectangle / extend for "Katasterkarten" records (or other records that specify an area and not a point. But I'm currently not sure if Elasticsearch provides an area intersect search (Postgis does...)

Anyway, this is more note for the far future :-)

tadzik commented 1 month ago

WIP patch. This successfully matches with both point-to-area queries as well as boundindbox-to-area, with ES doing the right thing as far as I can tell:

diff --git a/Koha/SearchEngine/Elasticsearch.pm b/Koha/SearchEngine/Elasticsearch.pm
index f11a833503..319651d19f 100644
--- a/Koha/SearchEngine/Elasticsearch.pm
+++ b/Koha/SearchEngine/Elasticsearch.pm
@@ -215,10 +215,10 @@ sub get_elasticsearch_mappings {
                     $es_type = 'cn_sort';
                 } elsif ($type eq 'geo_point') {
                     $es_type = 'geo_point';
-                }
-
-                if ($type eq 'geo_point') {
                     $name =~ s/_(lat|lon)$//;
+                } elsif ($type eq 'geo_shape') {
+                    $es_type = 'geo_shape';
+                    $name =~ s/_(north|south|east|west)ernmost$/_area/;
                 }

                 if ($search) {
@@ -768,6 +768,30 @@ sub marc_records_to_documents {
             delete $record_document->{$field};
         }

+        my %bounds;
+        foreach my $field ( @{ $rules->{geo_shape} } ) {
+            next unless $record_document->{$field};
+            $field =~ /geolocation_(north|south|east|west)ernmost$/;
+            my $direction = $1;
+            $bounds{$direction} = $record_document->{$field};
+            delete $record_document->{$field};
+        }
+        if (%bounds == 4) {
+            my @shapes;
+            for my $i (0..scalar($bounds{north}->@*) - 1) {
+                $shapes[$i] = [
+                    [ 0+$bounds{west}[$i], 0+$bounds{north}[$i] ],
+                    [ 0+$bounds{east}[$i], 0+$bounds{south}[$i] ],
+                ];
+            }
+            $record_document->{geolocation_area} = [map {
+                +{
+                    type => 'envelope',
+                    coordinates => $_,
+                }
+            } @shapes];
+        }
+
         # Remove duplicate values and collapse sort fields
         foreach my $field (keys %{$record_document}) {
             if (ref($record_document->{$field}) eq 'ARRAY') {
@@ -1140,6 +1164,9 @@ sub _get_marc_mapping_rules {
         elsif ($type eq 'geo_point') {
             push @{$rules->{geo_point}}, $name;
         }
+        elsif ($type eq 'geo_shape') {
+            push @{$rules->{geo_shape}}, $name;
+        }
         elsif ($type eq 'boolean') {
             # boolean gets special handling, if value doesn't exist for a field,
             # it is set to false
diff --git a/Koha/SearchEngine/Elasticsearch/Indexer.pm b/Koha/SearchEngine/Elasticsearch/Indexer.pm
index 1f914b8a65..641dc1b57b 100644
--- a/Koha/SearchEngine/Elasticsearch/Indexer.pm
+++ b/Koha/SearchEngine/Elasticsearch/Indexer.pm
@@ -138,6 +138,9 @@ sub update_index {
                 body => \@body
             );
             if ($response->{errors}) {
+                my @errors = grep { $_->{index}{status} >= 400 } $response->{items}->@*;
+                use DDP;
+                p @errors;
                 carp "One or more ElasticSearch errors occurred when indexing documents";
             }
         } catch {
diff --git a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
index 9e1574ede9..fbd1a09b4a 100644
--- a/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
+++ b/Koha/SearchEngine/Elasticsearch/QueryBuilder.pm
@@ -1409,35 +1409,63 @@ sub _is_safe_to_auto_truncate {
 sub _rebuild_to_es_advanced_query {
     my ($res) = @_;
     my $query_string = $res->{query}->{query_string};
-    $query_string->{query} = '*' unless $query_string->{query};
     delete $res->{query}->{query_string};

-    my %filter;
+    my @should;
     for my $advanced_query (@$es_advanced_searches) {
         if ( $advanced_query->{field} eq 'geolocation' ) {
             my %args = map { split ':' } map { s/\*$//r } split /\s+/, $advanced_query->{operand};
             if ($args{lat} and $args{lng} and $args{distance}) {
-                $filter{geo_distance} = {
-                    distance    => $args{distance},
-                    geolocation => {
-                        lat => $args{lat},
-                        lon => $args{lng},
-                    }
-                };
+                push @should,
+                    {
+                        geo_distance => {
+                            distance    => $args{distance},
+                            geolocation => {
+                                lat => $args{lat},
+                                lon => $args{lng},
+                            }
+                        }
+                    },
+                    {
+                        geo_distance => {
+                            distance    => $args{distance},
+                            geolocation_area => {
+                                lat => $args{lat},
+                                lon => $args{lng},
+                            }
+                        }
+                    };
             } elsif ($args{boundingbox}) {
                 my ($top_left_lat, $top_left_lon, $bottom_right_lat, $bottom_right_lon) = split ',', $args{boundingbox};
-                $filter{geo_bounding_box} = {
-                    'geolocation' => {
-                        'top_left' => {
-                          'lat' => $top_left_lat,
-                          'lon' => $top_left_lon,
-                        },
-                        'bottom_right' => {
-                          'lat' => $bottom_right_lat,
-                          'lon' => $bottom_right_lon,
+                push @should,
+                    {
+                        geo_bounding_box => {
+                            'geolocation' => {
+                                'top_left' => {
+                                  'lat' => $top_left_lat,
+                                  'lon' => $top_left_lon,
+                                },
+                                'bottom_right' => {
+                                  'lat' => $bottom_right_lat,
+                                  'lon' => $bottom_right_lon,
+                                }
+                            },
                         }
                     },
-                };
+                    {
+                        geo_bounding_box => {
+                            'geolocation_area' => {
+                                'top_left' => {
+                                  'lat' => $top_left_lat,
+                                  'lon' => $top_left_lon,
+                                },
+                                'bottom_right' => {
+                                  'lat' => $bottom_right_lat,
+                                  'lon' => $bottom_right_lon,
+                                }
+                            },
+                        }
+                    };
             } else {
                 warn "Unrecognized parameter set for geolocation queries: " . join(', ', keys %args);
             }
@@ -1448,10 +1476,12 @@ sub _rebuild_to_es_advanced_query {

     $res->{query} = {
         bool => {
-            must   => { query_string => $query_string },
-            filter => \%filter,
+            should => \@should,
         }
     };
+    if ($query_string->{query}) {
+        $res->{query}{bool}{must} = { query_string => $query_string };
+    }

     return $res;
 }
diff --git a/admin/searchengine/elasticsearch/field_config.yaml b/admin/searchengine/elasticsearch/field_config.yaml
index 41adcef7ed..eb3d2ff81d 100644
--- a/admin/searchengine/elasticsearch/field_config.yaml
+++ b/admin/searchengine/elasticsearch/field_config.yaml
@@ -43,6 +43,8 @@ search:
         normalizer: icu_folding_normalizer
   geo_point:
      type: geo_point
+  geo_shape:
+     type: geo_shape
   default:
     type: text
     analyzer: analyzer_standard
diff --git a/admin/searchengine/elasticsearch/mappings.yaml b/admin/searchengine/elasticsearch/mappings.yaml
index 929f2f9410..e66df5e567 100644
--- a/admin/searchengine/elasticsearch/mappings.yaml
+++ b/admin/searchengine/elasticsearch/mappings.yaml
@@ -1892,6 +1892,42 @@ biblios:
     opac: 1
     staff_client: 1
     type: ''
+  geolocation_westernmost:
+    label: geolocation_westernmost
+    mappings:
+      - facet: ''
+        marc_field: 034d
+        marc_type: marc21
+        sort: 0
+        suggestible: ''
+    type: geo_shape
+  geolocation_easternmost:
+    label: geolocation_easternmost
+    mappings:
+      - facet: ''
+        marc_field: 034e
+        marc_type: marc21
+        sort: 0
+        suggestible: ''
+    type: geo_shape
+  geolocation_northernmost:
+    label: geolocation_northernmost
+    mappings:
+      - facet: ''
+        marc_field: 034f
+        marc_type: marc21
+        sort: 0
+        suggestible: ''
+    type: geo_shape
+  geolocation_southernmost:
+    label: geolocation_southernmost
+    mappings:
+      - facet: ''
+        marc_field: 034g
+        marc_type: marc21
+        sort: 0
+        suggestible: ''
+    type: geo_shape
   geolocation_lat:
     label: geolocation_lat
     mappings:
diff --git a/installer/data/mysql/kohastructure.sql b/installer/data/mysql/kohastructure.sql
index 91c1bec42f..1e1afef7e9 100644
--- a/installer/data/mysql/kohastructure.sql
+++ b/installer/data/mysql/kohastructure.sql
@@ -5656,7 +5656,7 @@ CREATE TABLE `search_field` (
   `id` int(11) NOT NULL AUTO_INCREMENT,
   `name` varchar(255) NOT NULL COMMENT 'the name of the field as it will be stored in the search engine',
   `label` varchar(255) NOT NULL COMMENT 'the human readable name of the field, for display',
-  `type` enum('','string','date','number','boolean','sum','isbn','stdno','year','callnumber','geo_point') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
+  `type` enum('','string','date','number','boolean','sum','isbn','stdno','year','callnumber','geo_point','geo_shape') NOT NULL COMMENT 'what type of data this holds, relevant when storing it in the search engine',
   `weight` decimal(5,2) DEFAULT NULL,
   `facet_order` tinyint(4) DEFAULT NULL COMMENT 'the order place of the field in facet list if faceted',
   `staff_client` tinyint(1) NOT NULL DEFAULT 1,
diff --git a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t
index eef358a0f7..536e390254 100755
--- a/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t
+++ b/t/Koha/SearchEngine/Elasticsearch/QueryBuilder.t
@@ -379,26 +379,36 @@ subtest 'geolocation queries' => sub {
             'operand'  => 'lat:48.25* lng:16.35* distance:100km*'
     });
     my $query = $qb->build_query();
-    is_deeply $query->{query}{bool}{filter}, {
-        geo_distance => {
+    is_deeply $query->{query}{bool}{should}, [
+        { geo_distance => {
             distance => '100km',
             geolocation => { lat => 48.25, lon => 16.35 },
-        }
-    }, 'should be able to create "distance from a point" queries';
+        } },
+        { geo_distance => {
+            distance => '100km',
+            geolocation_area => { lat => 48.25, lon => 16.35 },
+        } },
+    ], 'should be able to create "distance from a point" queries';

     $qb->_create_query_string({
         'field'    => 'geolocation',
         'operand'  => 'boundingbox:1,2,3,4*'
     });
     $query = $qb->build_query();
-    is_deeply $query->{query}{bool}{filter}, {
-        geo_bounding_box => {
+    is_deeply $query->{query}{bool}{should}, [
+        { geo_bounding_box => {
             geolocation => {
                 top_left => { lat => 1, lon => 2 },
                 bottom_right => { lat => 3, lon => 4 },
             },
-        }
-    }, 'should be able to create "within a bounding box" queries';
+        } },
+        { geo_bounding_box => {
+            geolocation_area => {
+                top_left => { lat => 1, lon => 2 },
+                bottom_right => { lat => 3, lon => 4 },
+            },
+        } }
+    ], 'should be able to create "within a bounding box" queries';
 };
 # Add the bounding box of Austria to a biblio
 {
   my $biblionumber = 139;
   my $biblio = Koha::Biblios->find($biblionumber);
   my $framework = $biblio->frameworkcode;
   my $record = $biblio->metadata->record;
   my @fields;
   $fields[0] = MARC::Field->new('034','','',
      'd' => 9.530833,
      'e' => 17.160556,
      'f' => 49.020556,
      'g' => 46.3725,
   );
   $record->append_fields(@fields);
   C4::Biblio::ModBiblio($record, $biblionumber, $framework);
}
tadzik commented 1 month ago

Now Koha'd in https://bugs.koha-community.org/bugzilla3/show_bug.cgi?id=37985 and in koha-branches: https://github.com/HKS3/koha-branches/tree/geosphere-2405

Last but not least, in the plugin itself: https://github.com/HKS3/HKS3GeoSearch/commit/8e433d8fb3ae755bc1bec6a80b7c60068d9eef87