Merge pull request #2027 from lonvia/remove-duplicate-admin-boundaries

Handle duplicated admin boundaries
This commit is contained in:
Sarah Hoffmann 2020-10-28 11:11:42 +01:00 committed by GitHub
commit a888f6ff93
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 89 additions and 38 deletions

View File

@ -934,6 +934,8 @@ class Geocode
} else {
$aResult['foundorder'] += 0.01;
}
// - rank
$aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']);
// Adjust importance for the number of exact string matches in the result
$iCountWords = 0;

View File

@ -144,6 +144,7 @@
"municipality" : 18
},
"boundary" : {
"administrative5" : [10, 0],
"administrative7" : [13, 0],
"administrative8" : 14
}

View File

@ -494,33 +494,6 @@ END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION get_parent_address_level(geom GEOMETRY, in_level SMALLINT)
RETURNS SMALLINT
AS $$
DECLARE
address_rank SMALLINT;
BEGIN
IF in_level <= 3 or in_level > 15 THEN
address_rank := 3;
ELSE
SELECT rank_address INTO address_rank
FROM placex
WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
and admin_level < in_level
and geometry ~ geom and _ST_Covers(geometry, geom)
ORDER BY admin_level desc LIMIT 1;
END IF;
IF address_rank is NULL or address_rank <= 3 THEN
RETURN 3;
END IF;
RETURN address_rank;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION placex_update()
RETURNS TRIGGER
AS $$
@ -610,14 +583,36 @@ BEGIN
and NEW.osm_type = 'R' and NEW.rank_address > 0
THEN
-- First, check that admin boundaries do not overtake each other rank-wise.
parent_address_level := get_parent_address_level(NEW.centroid, NEW.admin_level);
IF parent_address_level >= NEW.rank_address THEN
IF parent_address_level >= 24 THEN
NEW.rank_address := 25;
parent_address_level := 3;
FOR location IN
SELECT rank_address,
(CASE WHEN extratags ? 'wikidata' and NEW.extratags ? 'wikidata'
and extratags->'wikidata' = NEW.extratags->'wikidata'
THEN ST_Equals(geometry, NEW.geometry)
ELSE false END) as is_same
FROM placex
WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative'
and admin_level < NEW.admin_level and admin_level > 3
and rank_address > 0
and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid)
ORDER BY admin_level desc LIMIT 1
LOOP
IF location.is_same THEN
-- Looks like the same boundary is replicated on multiple admin_levels.
-- Usual tagging in Poland. Remove our boundary from addresses.
NEW.rank_address := 0;
ELSE
NEW.rank_address := parent_address_level + 2;
parent_address_level := location.rank_address;
IF location.rank_address >= NEW.rank_address THEN
IF location.rank_address >= 24 THEN
NEW.rank_address := 25;
ELSE
NEW.rank_address := location.rank_address + 2;
END IF;
END IF;
END IF;
END IF;
END LOOP;
IF NEW.rank_address > 9 THEN
-- Second check that the boundary is not completely contained in a
-- place area with a higher address rank
@ -630,7 +625,7 @@ BEGIN
and ST_Relate(geometry, NEW.geometry, 'T*T***FF*') -- contains but not equal
ORDER BY rank_address desc LIMIT 1
LOOP
NEW.rank_address := location.rank_address + 2;
NEW.rank_address := location.rank_address + 2;
END LOOP;
END IF;
ELSEIF NEW.class = 'place' and NEW.osm_type = 'N'

View File

@ -74,15 +74,15 @@ Feature: Rank assignment
| R21 | boundary | administrative | 9 | municipality | (0 0, 0 1, 1 1, 1 0, 0 0) |
| R22 | boundary | administrative | 9 | suburb | (0 0, 0 1, 1 1, 1 0, 0 0) |
When importing
Then place_addressline contains
| object | address | cached_rank_address |
| R21 | R20 | 16 |
| R22 | R20 | 16 |
Then placex contains
| object | rank_search | rank_address |
| R20 | 16 | 16 |
| R21 | 18 | 18 |
| R22 | 18 | 20 |
Then place_addressline contains
| object | address | cached_rank_address |
| R21 | R20 | 16 |
| R22 | R20 | 16 |
Scenario: Admin levels cannot overtake each other due to place address ranks
Given the named places
@ -101,6 +101,20 @@ Feature: Rank assignment
| R21 | R20 | 16 |
| R22 | R20 | 16 |
Scenario: Admin levels cannot overtake each other due to place address ranks even when slightly misaligned
Given the named places
| osm | class | type | admin | extra+place | geometry |
| R20 | boundary | administrative | 6 | town | (0 0, 0 2, 2 2, 2 0, 0 0) |
| R21 | boundary | administrative | 8 | | (0 0, -0.0001 1, 1 1, 1 0, 0 0) |
When importing
Then placex contains
| object | rank_search | rank_address |
| R20 | 12 | 16 |
| R21 | 16 | 18 |
Then place_addressline contains
| object | address | cached_rank_address |
| R21 | R20 | 16 |
Scenario: Admin levels must not be larger than 25
Given the named places
| osm | class | type | admin | extra+place | geometry |
@ -146,3 +160,42 @@ Feature: Rank assignment
| object | rank_search | rank_address |
| R10 | 16 | 16 |
| R20 | 12 | 12 |
Scenario: adjacent admin_levels are considered the same object when they have the same wikidata
Given the named places
| osm | class | type | admin | extra+wikidata | geometry |
| N20 | place | square | 15 | Q123 | 0.1 0.1 |
| R23 | boundary | administrative | 10 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) |
| R21 | boundary | administrative | 9 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) |
| R22 | boundary | administrative | 8 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) |
When importing
Then placex contains
| object | rank_search | rank_address |
| R23 | 20 | 0 |
| R21 | 18 | 0 |
| R22 | 16 | 16 |
Then place_addressline contains
| object | address | cached_rank_address |
| N20 | R22 | 16 |
Then place_addressline doesn't contain
| object | address |
| N20 | R21 |
| N20 | R23 |
Scenario: adjacent admin_levels are considered different objects when they have different wikidata
Given the named places
| osm | class | type | admin | extra+wikidata | geometry |
| N20 | place | square | 15 | Q123 | 0.1 0.1 |
| R21 | boundary | administrative | 9 | Q4441 | (0 0, 0 1, 1 1, 1 0, 0 0) |
| R22 | boundary | administrative | 8 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) |
When importing
Then placex contains
| object | rank_search | rank_address |
| R21 | 18 | 18 |
| R22 | 16 | 16 |
Then place_addressline contains
| object | address | cached_rank_address |
| N20 | R22 | 16 |
| N20 | R21 | 18 |