Merge pull request #2250 from lonvia/save-transliterated-housenumbers

Switch to saving transliterated housenumbers in placex
This commit is contained in:
Sarah Hoffmann 2021-04-05 15:48:22 +02:00 committed by GitHub
commit 5d69c7ade1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 113 additions and 11 deletions

View File

@ -621,7 +621,7 @@ class SearchDescription
$aOrder[0] .= ' SELECT place_id';
$aOrder[0] .= ' FROM placex';
$aOrder[0] .= ' WHERE parent_place_id = search_name.place_id';
$aOrder[0] .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$aOrder[0] .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
$aOrder[0] .= ' LIMIT 1';
$aOrder[0] .= ') ';
// also housenumbers from interpolation lines table are needed
@ -751,7 +751,7 @@ class SearchDescription
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$sSQL = 'SELECT place_id FROM placex ';
$sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$sSQL .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);

View File

@ -164,7 +164,10 @@ BEGIN
-- POI objects in the placex table
IF place IS NULL THEN
SELECT parent_place_id as place_id, country_code,
housenumber, postcode,
coalesce(address->'housenumber',
address->'streetnumber',
address->'conscriptionnumber')::text as housenumber,
postcode,
class, type,
name, address,
centroid
@ -178,7 +181,7 @@ BEGIN
-- place we should be using instead.
IF place IS NULL THEN
select coalesce(linked_place_id, place_id) as place_id, country_code,
housenumber, postcode,
null::text as housenumber, postcode,
class, type,
null as name, address,
null as centroid

View File

@ -47,6 +47,25 @@ END;
$$
LANGUAGE plpgsql;
-- Create housenumber tokens from an OSM addr:housenumber.
-- The housnumber is split at comma and semicolon as necessary.
-- The function returns the normalized form of the housenumber suitable
-- for comparison.
CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
RETURNS TEXT
AS $$
DECLARE
normtext TEXT;
BEGIN
SELECT array_to_string(array_agg(trans), ';')
INTO normtext
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
FROM (SELECT make_standard_name(h) as lookup_word
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
return normtext;
END;
$$ LANGUAGE plpgsql STABLE STRICT;
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
RETURNS INTEGER

View File

@ -666,20 +666,17 @@ BEGIN
NEW.housenumber := NULL;
IF NEW.address is not NULL THEN
IF NEW.address ? 'conscriptionnumber' THEN
i := getorcreate_housenumber_id(make_standard_name(NEW.address->'conscriptionnumber'));
IF NEW.address ? 'streetnumber' THEN
i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber'));
NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber');
ELSE
NEW.housenumber := NEW.address->'conscriptionnumber';
END IF;
ELSEIF NEW.address ? 'streetnumber' THEN
NEW.housenumber := NEW.address->'streetnumber';
i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber'));
ELSEIF NEW.address ? 'housenumber' THEN
NEW.housenumber := NEW.address->'housenumber';
i := getorcreate_housenumber_id(make_standard_name(NEW.housenumber));
END IF;
NEW.housenumber := create_housenumber_id(NEW.housenumber);
addr_street := NEW.address->'street';
addr_place := NEW.address->'place';

View File

@ -5,7 +5,7 @@ CREATE TABLE word_frequencies AS
GROUP BY id);
select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-- copy the word frequencies
update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;

View File

@ -130,3 +130,29 @@ def add_nominatim_property_table(conn, config, **_):
value TEXT);
GRANT SELECT ON TABLE nominatim_properties TO "{}";
""".format(config.DATABASE_WEBUSER))
@_migration(3, 6, 0, 0)
def change_housenumber_transliteration(conn, **_):
""" Transliterate housenumbers.
The database schema switched from saving raw housenumbers in
placex.housenumber to saving transliterated ones.
"""
with conn.cursor() as cur:
cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
RETURNS TEXT AS $$
DECLARE
normtext TEXT;
BEGIN
SELECT array_to_string(array_agg(trans), ';')
INTO normtext
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
FROM (SELECT make_standard_name(h) as lookup_word
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
return normtext;
END;
$$ LANGUAGE plpgsql STABLE STRICT;""")
cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
cur.execute("""UPDATE placex
SET housenumber = create_housenumber_id(housenumber)
WHERE housenumber is not null""")

View File

@ -10,7 +10,7 @@ Version information for Nominatim.
# and must always be increased when there is a change to the database or code
# that requires a migration.
# Released versions always have a database patch level of 0.
NOMINATIM_VERSION = (3, 6, 0, 0)
NOMINATIM_VERSION = (3, 6, 0, 1)
POSTGRESQL_REQUIRED_VERSION = (9, 3)
POSTGIS_REQUIRED_VERSION = (2, 2)

View File

@ -137,7 +137,7 @@ Feature: Import and search of names
| ID | osm_type | osm_id |
| 0 | R | 1 |
Scenario: Unprintable characters in postcodes are ignored
Scenario: Unprintable characters in postcodes are ignored
Given the named places
| osm | class | type | address |
| N234 | amenity | prison | 'postcode' : u'1234\u200e' |
@ -146,3 +146,60 @@ Feature: Import and search of names
Then results contain
| ID | osm_type |
| 0 | P |
Scenario Outline: Housenumbers with special characters are found
Given the grid
| 1 | | | | 2 |
| | | 9 | | |
And the places
| osm | class | type | name | geometry |
| W1 | highway | primary | Main St | 1,2 |
And the places
| osm | class | type | housenr | geometry |
| N1 | building | yes | <nr> | 9 |
When importing
And searching for "Main St <nr>"
Then results contain
| osm_type | osm_id | name |
| N | 1 | <nr>, Main St |
Examples:
| nr |
| 1 |
| 3456 |
| 1 a |
| 56b |
| 1 A |
| 2 |
| 1Б |
| 1 к1 |
| 23-123 |
Scenario Outline: Housenumbers in lists are found
Given the grid
| 1 | | | | 2 |
| | | 9 | | |
And the places
| osm | class | type | name | geometry |
| W1 | highway | primary | Main St | 1,2 |
And the places
| osm | class | type | housenr | geometry |
| N1 | building | yes | <nr-list> | 9 |
When importing
And searching for "Main St <nr>"
Then results contain
| osm_type | osm_id | name |
| N | 1 | <nr-list>, Main St |
Examples:
| nr-list | nr |
| 1,2,3 | 1 |
| 1,2,3 | 2 |
| 1, 2, 3 | 3 |
| 45 ;67;3 | 45 |
| 45 ;67;3 | 67 |
| 1a;1k | 1a |
| 1a;1k | 1k |
| 34/678 | 34 |
| 34/678 | 678 |
| 34/678 | 34/678 |