mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-10-27 03:29:24 +03:00
Merge pull request #2250 from lonvia/save-transliterated-housenumbers
Switch to saving transliterated housenumbers in placex
This commit is contained in:
commit
5d69c7ade1
@ -621,7 +621,7 @@ class SearchDescription
|
||||
$aOrder[0] .= ' SELECT place_id';
|
||||
$aOrder[0] .= ' FROM placex';
|
||||
$aOrder[0] .= ' WHERE parent_place_id = search_name.place_id';
|
||||
$aOrder[0] .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
|
||||
$aOrder[0] .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
|
||||
$aOrder[0] .= ' LIMIT 1';
|
||||
$aOrder[0] .= ') ';
|
||||
// also housenumbers from interpolation lines table are needed
|
||||
@ -751,7 +751,7 @@ class SearchDescription
|
||||
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
|
||||
$sSQL = 'SELECT place_id FROM placex ';
|
||||
$sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
|
||||
$sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
|
||||
$sSQL .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
|
||||
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
@ -164,7 +164,10 @@ BEGIN
|
||||
-- POI objects in the placex table
|
||||
IF place IS NULL THEN
|
||||
SELECT parent_place_id as place_id, country_code,
|
||||
housenumber, postcode,
|
||||
coalesce(address->'housenumber',
|
||||
address->'streetnumber',
|
||||
address->'conscriptionnumber')::text as housenumber,
|
||||
postcode,
|
||||
class, type,
|
||||
name, address,
|
||||
centroid
|
||||
@ -178,7 +181,7 @@ BEGIN
|
||||
-- place we should be using instead.
|
||||
IF place IS NULL THEN
|
||||
select coalesce(linked_place_id, place_id) as place_id, country_code,
|
||||
housenumber, postcode,
|
||||
null::text as housenumber, postcode,
|
||||
class, type,
|
||||
null as name, address,
|
||||
null as centroid
|
||||
|
@ -47,6 +47,25 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
-- Create housenumber tokens from an OSM addr:housenumber.
|
||||
-- The housnumber is split at comma and semicolon as necessary.
|
||||
-- The function returns the normalized form of the housenumber suitable
|
||||
-- for comparison.
|
||||
CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
|
||||
RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
normtext TEXT;
|
||||
BEGIN
|
||||
SELECT array_to_string(array_agg(trans), ';')
|
||||
INTO normtext
|
||||
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
|
||||
FROM (SELECT make_standard_name(h) as lookup_word
|
||||
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
|
||||
|
||||
return normtext;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE STRICT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
|
||||
RETURNS INTEGER
|
||||
|
@ -666,20 +666,17 @@ BEGIN
|
||||
NEW.housenumber := NULL;
|
||||
IF NEW.address is not NULL THEN
|
||||
IF NEW.address ? 'conscriptionnumber' THEN
|
||||
i := getorcreate_housenumber_id(make_standard_name(NEW.address->'conscriptionnumber'));
|
||||
IF NEW.address ? 'streetnumber' THEN
|
||||
i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber'));
|
||||
NEW.housenumber := (NEW.address->'conscriptionnumber') || '/' || (NEW.address->'streetnumber');
|
||||
ELSE
|
||||
NEW.housenumber := NEW.address->'conscriptionnumber';
|
||||
END IF;
|
||||
ELSEIF NEW.address ? 'streetnumber' THEN
|
||||
NEW.housenumber := NEW.address->'streetnumber';
|
||||
i := getorcreate_housenumber_id(make_standard_name(NEW.address->'streetnumber'));
|
||||
ELSEIF NEW.address ? 'housenumber' THEN
|
||||
NEW.housenumber := NEW.address->'housenumber';
|
||||
i := getorcreate_housenumber_id(make_standard_name(NEW.housenumber));
|
||||
END IF;
|
||||
NEW.housenumber := create_housenumber_id(NEW.housenumber);
|
||||
|
||||
addr_street := NEW.address->'street';
|
||||
addr_place := NEW.address->'place';
|
||||
|
@ -5,7 +5,7 @@ CREATE TABLE word_frequencies AS
|
||||
GROUP BY id);
|
||||
|
||||
select count(getorcreate_postcode_id(v)) from (select distinct address->'postcode' as v from place where address ? 'postcode') as w where v is not null;
|
||||
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
||||
select count(create_housenumber_id(v)) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
|
||||
|
||||
-- copy the word frequencies
|
||||
update word set search_name_count = count from word_frequencies wf where wf.id = word.word_id;
|
||||
|
@ -130,3 +130,29 @@ def add_nominatim_property_table(conn, config, **_):
|
||||
value TEXT);
|
||||
GRANT SELECT ON TABLE nominatim_properties TO "{}";
|
||||
""".format(config.DATABASE_WEBUSER))
|
||||
|
||||
@_migration(3, 6, 0, 0)
|
||||
def change_housenumber_transliteration(conn, **_):
|
||||
""" Transliterate housenumbers.
|
||||
|
||||
The database schema switched from saving raw housenumbers in
|
||||
placex.housenumber to saving transliterated ones.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION create_housenumber_id(housenumber TEXT)
|
||||
RETURNS TEXT AS $$
|
||||
DECLARE
|
||||
normtext TEXT;
|
||||
BEGIN
|
||||
SELECT array_to_string(array_agg(trans), ';')
|
||||
INTO normtext
|
||||
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
|
||||
FROM (SELECT make_standard_name(h) as lookup_word
|
||||
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
|
||||
return normtext;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE STRICT;""")
|
||||
cur.execute("DELETE FROM word WHERE class = 'place' and type = 'house'")
|
||||
cur.execute("""UPDATE placex
|
||||
SET housenumber = create_housenumber_id(housenumber)
|
||||
WHERE housenumber is not null""")
|
||||
|
@ -10,7 +10,7 @@ Version information for Nominatim.
|
||||
# and must always be increased when there is a change to the database or code
|
||||
# that requires a migration.
|
||||
# Released versions always have a database patch level of 0.
|
||||
NOMINATIM_VERSION = (3, 6, 0, 0)
|
||||
NOMINATIM_VERSION = (3, 6, 0, 1)
|
||||
|
||||
POSTGRESQL_REQUIRED_VERSION = (9, 3)
|
||||
POSTGIS_REQUIRED_VERSION = (2, 2)
|
||||
|
@ -137,7 +137,7 @@ Feature: Import and search of names
|
||||
| ID | osm_type | osm_id |
|
||||
| 0 | R | 1 |
|
||||
|
||||
Scenario: Unprintable characters in postcodes are ignored
|
||||
Scenario: Unprintable characters in postcodes are ignored
|
||||
Given the named places
|
||||
| osm | class | type | address |
|
||||
| N234 | amenity | prison | 'postcode' : u'1234\u200e' |
|
||||
@ -146,3 +146,60 @@ Feature: Import and search of names
|
||||
Then results contain
|
||||
| ID | osm_type |
|
||||
| 0 | P |
|
||||
|
||||
Scenario Outline: Housenumbers with special characters are found
|
||||
Given the grid
|
||||
| 1 | | | | 2 |
|
||||
| | | 9 | | |
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| W1 | highway | primary | Main St | 1,2 |
|
||||
And the places
|
||||
| osm | class | type | housenr | geometry |
|
||||
| N1 | building | yes | <nr> | 9 |
|
||||
When importing
|
||||
And searching for "Main St <nr>"
|
||||
Then results contain
|
||||
| osm_type | osm_id | name |
|
||||
| N | 1 | <nr>, Main St |
|
||||
|
||||
Examples:
|
||||
| nr |
|
||||
| 1 |
|
||||
| 3456 |
|
||||
| 1 a |
|
||||
| 56b |
|
||||
| 1 A |
|
||||
| 2號 |
|
||||
| 1Б |
|
||||
| 1 к1 |
|
||||
| 23-123 |
|
||||
|
||||
Scenario Outline: Housenumbers in lists are found
|
||||
Given the grid
|
||||
| 1 | | | | 2 |
|
||||
| | | 9 | | |
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| W1 | highway | primary | Main St | 1,2 |
|
||||
And the places
|
||||
| osm | class | type | housenr | geometry |
|
||||
| N1 | building | yes | <nr-list> | 9 |
|
||||
When importing
|
||||
And searching for "Main St <nr>"
|
||||
Then results contain
|
||||
| osm_type | osm_id | name |
|
||||
| N | 1 | <nr-list>, Main St |
|
||||
|
||||
Examples:
|
||||
| nr-list | nr |
|
||||
| 1,2,3 | 1 |
|
||||
| 1,2,3 | 2 |
|
||||
| 1, 2, 3 | 3 |
|
||||
| 45 ;67;3 | 45 |
|
||||
| 45 ;67;3 | 67 |
|
||||
| 1a;1k | 1a |
|
||||
| 1a;1k | 1k |
|
||||
| 34/678 | 34 |
|
||||
| 34/678 | 678 |
|
||||
| 34/678 | 34/678 |
|
||||
|
Loading…
Reference in New Issue
Block a user