pelias / api

HTTP API for Pelias Geocoder
http://pelias.io
MIT License
221 stars 162 forks source link

Pelias fails to find address if city is added to query #1594

Closed fl0cke closed 2 years ago

fl0cke commented 2 years ago

I have observed some strange behaviour with search queries on my new Pelias setup. I have imported the dataset for germany and are trying to geocode the following address: Ackermannstr. 12a Frankfurt am Main

Executing the query GET /v1/search?text=Ackermannstr. 12a Frankfurt am Main&sources=osm&size=1 yields the following result:

{
  "geocoding": {
    "version": "0.2",
    "query": {
      "text": "Ackermannstr. 12a Frankfurt am Main",
      "size": 1,
      "sources": [
        "openstreetmap"
      ],
      "private": false,
      "lang": {
        "name": "German",
        "iso6391": "de",
        "iso6393": "deu",
        "via": "header",
        "defaulted": false
      },
      "querySize": 20,
      "parser": "libpostal",
      "parsed_text": {
        "street": "ackermannstr.",
        "housenumber": "12a",
        "city": "frankfurt am main"
      }
    },
    "engine": {
      "name": "Pelias",
      "author": "Mapzen",
      "version": "1.0"
    },
    "timestamp": 1641556303027
  },
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          8.584764,
          50.041821
        ]
      },
      "properties": {
        "id": "101913837",
        "gid": "whosonfirst:locality:101913837",
        "layer": "locality",
        "source": "whosonfirst",
        "source_id": "101913837",
        "country_code": "DE",
        "name": "Frankfurt am Main",
        "confidence": 0.6,
        "match_type": "fallback",
        "accuracy": "centroid",
        "country": "Deutschland",
        "country_gid": "whosonfirst:country:85633111",
        "country_a": "DEU",
        "region": "Hessen",
        "region_gid": "whosonfirst:region:85682531",
        "region_a": "HE",
        "macrocounty": "Darmstadt",
        "macrocounty_gid": "whosonfirst:macrocounty:404227581",
        "county": "Frankfurt",
        "county_gid": "whosonfirst:county:102063589",
        "county_a": "FA",
        "localadmin": "Frankfurt am Main",
        "localadmin_gid": "whosonfirst:localadmin:1377692799",
        "locality": "Frankfurt am Main",
        "locality_gid": "whosonfirst:locality:101913837",
        "continent": "Europa",
        "continent_gid": "whosonfirst:continent:102191581",
        "label": "Frankfurt am Main, HE, Deutschland"
      },
      "bbox": [
        8.472633,
        50.015574,
        8.800535,
        50.226257
      ]
    }
  ],
  "bbox": [
    8.472633,
    50.015574,
    8.800535,
    50.226257
  ]
}

As you can see, the address is not found and instead Pelias falls back to returning the locality. However, when i remove the city from the query ( GET /v1/search?text=Ackermannstr. 12a&sources=osm&size=1), it returns the correct result:

{
  "geocoding": {
    "version": "0.2",
    "query": {
      "text": "Ackermannstr. 12a",
      "size": 1,
      "sources": [
        "openstreetmap"
      ],
      "private": false,
      "lang": {
        "name": "German",
        "iso6391": "de",
        "iso6393": "deu",
        "via": "header",
        "defaulted": false
      },
      "querySize": 20,
      "parser": "pelias",
      "parsed_text": {
        "subject": "12a Ackermannstr.",
        "street": "Ackermannstr.",
        "housenumber": "12a"
      }
    },
    "engine": {
      "name": "Pelias",
      "author": "Mapzen",
      "version": "1.0"
    },
    "timestamp": 1641556309332
  },
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "geometry": {
        "type": "Point",
        "coordinates": [
          8.627754,
          50.097648
        ]
      },
      "properties": {
        "id": "way/154520993",
        "gid": "openstreetmap:address:way/154520993",
        "layer": "address",
        "source": "openstreetmap",
        "source_id": "way/154520993",
        "country_code": "DE",
        "name": "Ackermannstraße 12",
        "housenumber": "12",
        "street": "Ackermannstraße",
        "postalcode": "60326",
        "confidence": 1,
        "match_type": "exact",
        "accuracy": "point",
        "country": "Deutschland",
        "country_gid": "whosonfirst:country:85633111",
        "country_a": "DEU",
        "region": "Hessen",
        "region_gid": "whosonfirst:region:85682531",
        "region_a": "HE",
        "macrocounty": "Darmstadt",
        "macrocounty_gid": "whosonfirst:macrocounty:404227581",
        "county": "Frankfurt",
        "county_gid": "whosonfirst:county:102063589",
        "county_a": "FA",
        "localadmin": "Frankfurt am Main",
        "localadmin_gid": "whosonfirst:localadmin:1377692799",
        "locality": "Frankfurt am Main",
        "locality_gid": "whosonfirst:locality:101913837",
        "neighbourhood": "Goldstein",
        "neighbourhood_gid": "whosonfirst:neighbourhood:85795343",
        "label": "Ackermannstraße 12, Frankfurt am Main, HE, Deutschland"
      }
    }
  ],
  "bbox": [
    8.627754,
    50.097648,
    8.627754,
    50.097648
  ]
}

The only difference i can spot is the parser that is used for extracting the address components from the query. But both produce correct components, so i'm not sure what's going on here. Do you have any idea?

missinglink commented 2 years ago

agh, funny, this is actually the issue I was planning to tackle today and I spent an hour figuring that out 😆

for now the workaround is to use a different form of the street name, any of the following work:

you can follow this issue for updates on a proper fix: https://github.com/pelias/model/issues/144

fl0cke commented 2 years ago

Haha, i'm glad to hear that you are already working on it, since this is a pretty big issue for german street names, which are almost always written with the str. suffix. I can confirm that using a different form for the street name works, so i'll be using that as a workaround until the final fix lands. Thanks for you work!

missinglink commented 2 years ago

There is a PR open now, it still needs a bit more work before merging but seems to be much better at producing all permutations of German street names.

The list of terms is fairly small for now, basically just -str, -plz & -mkt at the moment.

Do you see a need to add any of these?

missinglink commented 2 years ago

This is a pretty good list IMO but lists the abbreviation for 'Platz' as '-pl' which seems wrong to me? Isn't it more commonly written -plz?

fl0cke commented 2 years ago

Not sure if i'm the right person to answer this questions, but personally i haven't seen most of these abbreviations anywhere before. But that doesn't mean they aren't more commonly used in other parts of Germany or other German speaking countries. So if it doesn't add to the complexity of the implementation or otherwise mess with the matching of non abbreviated street names, i'd say just add them?!

The list from libostal looks like the better one to me, and a quick google search seems to confirm the abbreviation of "Platz" is indeed "pl" 👍.

missinglink commented 2 years ago

there is a build up on dev right now which looks much better @fl0cke

fl0cke commented 2 years ago

@missinglink excellent! Which package do i need to update for this to work on my machine? Edit: found it...

missinglink commented 2 years ago

I need to update three docker images (openstreetmap/openaddresses/polylines) & probably also csv-importer to make sure they are all bringing in the latest version of the pelias/model dependency, I'll do that tomorrow and let you know.

missinglink commented 2 years ago

okay the docker images have been updated, you should be able to run pelias compose pull (if using the pelias/docker method) or otherwise pull the latest docker images to get the new code.

fl0cke commented 2 years ago

I just updated our server and everything seems to work as expected. Thanks!

fcruzel commented 1 year ago

Hi! I have a CSV dataset with Spanish addresses that the API sometimes fails to provide when the city is included in the query.

I.e. the CSV contains the next address:

"source","lat","lon","name","street","number","postalcode","layer","id","popularity","name_json"
[...]
"registro_portales","28.71157","-13.866","CALLE AVUTARDA 63 LA OLIVA","CALLE AVUTARDA","00063","35660","address","572d2954-1916-4b8b-a21d-00d3db3d9d60","1",""
[...]

I expect that address to be returned with the query CALLE AVUTARDA 63 LA OLIVA. However, this is the response I get from the API:

/api/v1/search?text=CALLE AVUTARDA 63 LA OLIVA&sources=registro_portales ```json { "geocoding": { "version": "0.2", "attribution": "http://estadisticas.arte-consultores.com/attribution", "query": { "text": "CALLE AVUTARDA 63 LA OLIVA", "size": 10, "sources": [ "registro_portales" ], "private": false, "focus.point.lat": 28, "focus.point.lon": -15.5, "lang": { "name": "English", "iso6391": "en", "iso6393": "eng", "via": "default", "defaulted": true }, "querySize": 20, "parser": "libpostal", "parsed_text": { "street": "calle avutarda", "housenumber": "63", "city": "la oliva" } }, "engine": { "name": "Pelias", "author": "Mapzen", "version": "1.0" }, "timestamp": 1681477078591 }, "type": "FeatureCollection", "features": [ { "type": "Feature", "geometry": { "type": "Point", "coordinates": [ -16.731082, 28.070588 ] }, "properties": { "id": "101912309", "gid": "whosonfirst:locality:101912309", "layer": "locality", "source": "whosonfirst", "source_id": "101912309", "country_code": "ES", "name": "La Oliva", "confidence": 0.6, "match_type": "fallback", "distance": 121.217, "accuracy": "centroid", "country": "España", "country_gid": "whosonfirst:country:85633129", "country_a": "ESP", "macroregion": "Canarias", "macroregion_gid": "whosonfirst:macroregion:404227369", "region": "Fuerteventura", "region_gid": "whosonfirst:region:85682653", "macrocounty": "Fuerteventura", "macrocounty_gid": "whosonfirst:macrocounty:404228145", "county": "Fuerteventura - Norte", "county_gid": "whosonfirst:county:102066754", "locality": "La Oliva", "locality_gid": "whosonfirst:locality:101912309", "continent": "Europa", "continent_gid": "whosonfirst:continent:102191581", "label": "La Oliva, España" }, "bbox": [ -14.0435852765063, 28.5637089952685, -13.8098522523282, 28.7669027115658 ] } ], "bbox": [ -14.0435852765063, 28.5637089952685, -13.8098522523282, 28.7669027115658 ] } ```

As you can see, the expected result is nowhere to be seen. Even got a result from a source not included in the sources query parameter.

However, when the city is omitted the expected result is actually included in the response.

(Others addresses from the CSV are returned, that's why I limit the size to 1)

/api/v1/search?text=CALLE AVUTARDA 63&sources=registro_portales&size=1 ```json { "geocoding": { "version": "0.2", "attribution": "http://estadisticas.arte-consultores.com/attribution", "query": { "text": "CALLE AVUTARDA 63", "size": 1, "sources": [ "registro_portales" ], "private": false, "focus.point.lat": 28, "focus.point.lon": -15.5, "lang": { "name": "English", "iso6391": "en", "iso6393": "eng", "via": "default", "defaulted": true }, "querySize": 20, "parser": "pelias", "parsed_text": { "subject": "63 CALLE AVUTARDA", "street": "CALLE AVUTARDA", "housenumber": "63" } }, "engine": { "name": "Pelias", "author": "Mapzen", "version": "1.0" }, "timestamp": 1681477374752 }, "type": "FeatureCollection", "features": [ { "type": "Feature", "geometry": { "type": "Point", "coordinates": [ -13.866, 28.71157 ] }, "properties": { "id": "572d2954-1916-4b8b-a21d-00d3db3d9d60", "gid": "registro_portales:address:572d2954-1916-4b8b-a21d-00d3db3d9d60", "layer": "address", "source": "registro_portales", "source_id": "572d2954-1916-4b8b-a21d-00d3db3d9d60", "country_code": "ES", "name": "CALLE AVUTARDA 63 LA OLIVA", "housenumber": "00063", "street": "CALLE AVUTARDA", "postalcode": "35660", "confidence": 1, "match_type": "exact", "distance": 178.595, "accuracy": "point", "country": "España", "country_gid": "whosonfirst:country:85633129", "country_a": "ESP", "macroregion": "Canarias", "macroregion_gid": "whosonfirst:macroregion:404227369", "region": "Fuerteventura", "region_gid": "whosonfirst:region:85682653", "macrocounty": "Fuerteventura", "macrocounty_gid": "whosonfirst:macrocounty:404228145", "county": "Fuerteventura - Norte", "county_gid": "whosonfirst:county:102066754", "locality": "La Oliva", "locality_gid": "whosonfirst:locality:101912309", "continent": "Europa", "continent_gid": "whosonfirst:continent:102191581", "label": "CALLE AVUTARDA 63 LA OLIVA, La Oliva, España" } } ], "bbox": [ -13.866, 28.71157, -13.866, 28.71157 ] } ```

Other queries that works are:

Other queries that doesn't work are:

I've also noticed the main difference between both responses is the parser: when libpostal is used the address is not found and another location from another source is returned instead. With the pelias parser it works as expected. Can you help me find how to make it work with the city included in the query?