Make rank assignments configurable

The initial search and address rank is saved in a table
that is set up from a json configuration file. Ranks may
be assigned on a country level according to class and
type of the object. Special handling that depends on the
geometry or OSM type is still hard-coded in placex insert.

The new default config file mimicks the current assignment
as close as possible. A couple of exceptions have been
removed, most notably the exception for Irish townlands.
This commit is contained in:
Sarah Hoffmann 2018-11-23 23:02:32 +01:00
parent f0088ca2be
commit e10d11c6c7
7 changed files with 259 additions and 149 deletions

86
data/address-levels.json Normal file
View File

@ -0,0 +1,86 @@
[
{ "tags" : {
"place" : {
"sea" : [2, 0],
"continent" : [2, 0],
"country" : [4, 4],
"state" : [8, 8],
"region" : [18, 0],
"county" : 12,
"city" : 16,
"island" : [17, 0],
"town" : [18, 16],
"village" : [19, 16],
"hamlet" : [19, 16],
"municipality" : [19, 16],
"district" : [19, 16],
"unincorporated_area" : [19, 16],
"borough" : [19, 16],
"suburb" : 20,
"croft" : 20,
"subdivision" : 20,
"isolated_dwelling" : 20,
"farm" : [20, 0],
"locality" : [20, 0],
"islet" : [20, 0],
"mountain_pass" : [20, 0],
"neighbourhood" : 22,
"houses" : [28, 0]
},
"boundary" : {
"administrative2" : 4,
"administrative3" : 6,
"administrative4" : 8,
"administrative5" : 10,
"administrative6" : 12,
"administrative7" : 14,
"administrative8" : 16,
"administrative9" : 18,
"administrative10" : 20,
"administrative11" : 22,
"administrative12" : 24
},
"landuse" : {
"residential" : 22,
"farm" : 22,
"farmyard" : 22,
"industrial" : 22,
"commercial" : 22,
"allotments" : 22,
"retail" : 22,
"" : [22, 0]
},
"leisure" : {
"park" : [24, 0]
},
"natural" : {
"peak" : [18, 0],
"volcano" : [18, 0],
"mountain_range" : [18, 0],
"sea" : [4, 0],
"" : [22, 0]
},
"waterway" : {
"" : [17, 0]
},
"highway" : {
"" : 26,
"service" : 27,
"cycleway" : 27,
"path" : 27,
"footway" : 27,
"steps" : 27,
"bridleway" : 27,
"motorway_link" : 27,
"primary_link" : 27,
"trunk_link" : 27,
"secondary_link" : 27,
"tertiary_link" : 27
},
"mountain_pass" : {
"" : [20, 0]
}
}
}
]

View File

@ -0,0 +1,98 @@
<?php
namespace Nominatim\Setup;
/**
* Parses an address level description.
*/
class AddressLevelParser
{
private $aLevels;
public function __construct(string $sDescriptionFile)
{
$sJson = file_get_contents($sDescriptionFile);
$this->aLevels = json_decode($sJson, true);
if (!$this->aLevels) {
switch (json_last_error()) {
case JSON_ERROR_NONE:
break;
case JSON_ERROR_DEPTH:
fail('JSON error - Maximum stack depth exceeded');
break;
case JSON_ERROR_STATE_MISMATCH:
fail('JSON error - Underflow or the modes mismatch');
break;
case JSON_ERROR_CTRL_CHAR:
fail('JSON error - Unexpected control character found');
break;
case JSON_ERROR_SYNTAX:
fail('JSON error - Syntax error, malformed JSON');
break;
case JSON_ERROR_UTF8:
fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
break;
default:
fail('JSON error - Unknown error');
break;
}
}
}
/**
* Dump the description into a database table.
*
* @param object $oDB Database conneciton to use.
* @param string $sTable Name of table to create.
*
* @return null
*
* A new table is created. Any previously existing table is dropped.
* The table has the following columns:
* country, class, type, rank_search, rank_address.
*/
public function createTable($oDB, $sTable)
{
chksql($oDB->query('DROP TABLE IF EXISTS '.$sTable));
$sSql = 'CREATE TABLE '.$sTable;
$sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
$sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
chksql($oDB->query($sSql));
$sSql = 'CREATE UNIQUE INDEX ON '.$sTable.'(country_code, class, type)';
chksql($oDB->query($sSql));
$sSql = 'INSERT INTO '.$sTable.' VALUES ';
foreach ($this->aLevels as $aLevel) {
$aCountries = array();
if (isset($aLevel['countries'])) {
foreach ($aLevel['countries'] as $sCountry) {
$aCountries[$sCountry] = getDBQuoted($sCountry);
}
} else {
$aCountries['NULL'] = 'NULL';
}
foreach ($aLevel['tags'] as $sKey => $aValues) {
foreach ($aValues as $sValue => $mRanks) {
$aFields = array(
getDBQuoted($sKey),
$sValue ? getDBQuoted($sValue) : 'NULL'
);
if (is_array($mRanks)) {
$aFields[] = (string) $mRanks[0];
$aFields[] = (string) $mRanks[1];
} else {
$aFields[] = (string) $mRanks;
$aFields[] = (string) $mRanks;
}
$sLine = ','.join(',', $aFields).'),';
foreach ($aCountries as $sCountries) {
$sSql .= '('.$sCountries.$sLine;
}
}
}
}
chksql($oDB->query(rtrim($sSql, ',')));
}
}

View File

@ -2,6 +2,8 @@
namespace Nominatim\Setup;
require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php');
class SetupFunctions
{
protected $iCacheMemory;
@ -272,6 +274,9 @@ class SetupFunctions
if ($bReverseOnly) {
$this->pgExec('DROP TABLE search_name');
}
$oAlParser = new AddressLevelParser(CONST_Address_Level_Config);
$oAlParser->createTable($this->oDB, 'address_levels');
}
public function createPartitionTables()

View File

@ -49,6 +49,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
@define('CONST_Pyosmium_Binary', '@PYOSMIUM_PATH@');
@define('CONST_Tiger_Data_Path', CONST_ExtraDataPath.'/tiger');
@define('CONST_Wikipedia_Data_Path', CONST_ExtraDataPath);
@define('CONST_Address_Level_Config', CONST_ExtraDataPath.'/address-levels.json');
// osm2pgsql settings
@define('CONST_Osm2pgsql_Flatnode_File', null);

View File

@ -817,11 +817,12 @@ DECLARE
i INTEGER;
postcode TEXT;
result BOOLEAN;
is_area BOOLEAN;
country_code VARCHAR(2);
default_language VARCHAR(10);
diameter FLOAT;
classtable TEXT;
line RECORD;
classtype TEXT;
BEGIN
--DEBUG: RAISE WARNING '% % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
@ -848,11 +849,10 @@ BEGIN
IF NEW.osm_type = 'X' THEN
-- E'X'ternal records should already be in the right format so do nothing
ELSE
NEW.rank_search := 30;
NEW.rank_address := NEW.rank_search;
is_area := ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon');
-- By doing in postgres we have the country available to us - currently only used for postcode
IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
IF NEW.class in ('place','boundary')
AND NEW.type in ('postcode','postal_code') THEN
IF NEW.address IS NULL OR NOT NEW.address ? 'postcode' THEN
-- most likely just a part of a multipolygon postcode boundary, throw it away
@ -864,134 +864,58 @@ BEGIN
SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode')
INTO NEW.rank_search, NEW.rank_address;
IF NOT ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
IF NOT is_area THEN
NEW.rank_address := 0;
END IF;
ELSEIF NEW.class = 'place' THEN
IF NEW.type in ('continent', 'sea') THEN
NEW.rank_search := 2;
NEW.rank_address := 0;
NEW.country_code := NULL;
ELSEIF NEW.type in ('country') THEN
NEW.rank_search := 4;
IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
NEW.rank_address := NEW.rank_search;
ELSE
NEW.rank_address := 0;
END IF;
ELSEIF NEW.type in ('state') THEN
NEW.rank_search := 8;
IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
NEW.rank_address := NEW.rank_search;
ELSE
NEW.rank_address := 0;
END IF;
ELSEIF NEW.type in ('region') THEN
NEW.rank_search := 18; -- dropped from previous value of 10
NEW.rank_address := 0; -- So badly miss-used that better to just drop it!
ELSEIF NEW.type in ('county') THEN
NEW.rank_search := 12;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('city') THEN
NEW.rank_search := 16;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('island') THEN
NEW.rank_search := 17;
NEW.rank_address := 0;
ELSEIF NEW.type in ('town') THEN
NEW.rank_search := 18;
NEW.rank_address := 16;
ELSEIF NEW.type in ('village','hamlet','municipality','district','unincorporated_area','borough') THEN
NEW.rank_search := 19;
NEW.rank_address := 16;
ELSEIF NEW.type in ('suburb','croft','subdivision','isolated_dwelling') THEN
NEW.rank_search := 20;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('farm','locality','islet','mountain_pass') THEN
NEW.rank_search := 20;
NEW.rank_address := 0;
-- Irish townlands, tagged as place=locality and locality=townland
IF (NEW.extratags -> 'locality') = 'townland' THEN
NEW.rank_address := 20;
END IF;
ELSEIF NEW.type in ('neighbourhood') THEN
NEW.rank_search := 22;
NEW.rank_address := 22;
ELSEIF NEW.type in ('house','building') THEN
NEW.rank_search := 30;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('houses') THEN
-- can't guarantee all required nodes loaded yet due to caching in osm2pgsql
NEW.rank_search := 28;
NEW.rank_address := 0;
END IF;
ELSEIF NEW.class = 'boundary' THEN
IF ST_GeometryType(NEW.geometry) NOT IN ('ST_Polygon','ST_MultiPolygon') THEN
-- RAISE WARNING 'invalid boundary %',NEW.osm_id;
ELSEIF NEW.class = 'boundary' AND NOT is_area THEN
return NULL;
END IF;
NEW.rank_search := NEW.admin_level * 2;
IF NEW.type = 'administrative' THEN
NEW.rank_address := NEW.rank_search;
ELSE
NEW.rank_address := 0;
END IF;
ELSEIF NEW.class = 'landuse' AND ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') THEN
NEW.rank_search := 22;
IF NEW.type in ('residential', 'farm', 'farmyard', 'industrial', 'commercial', 'allotments', 'retail') THEN
NEW.rank_address := NEW.rank_search;
ELSE
NEW.rank_address := 0;
END IF;
ELSEIF NEW.class = 'leisure' and NEW.type in ('park') THEN
NEW.rank_search := 24;
NEW.rank_address := 0;
ELSEIF NEW.class = 'natural' and NEW.type in ('peak','volcano','mountain_range') THEN
NEW.rank_search := 18;
NEW.rank_address := 0;
ELSEIF NEW.class = 'natural' and NEW.type = 'sea' THEN
NEW.rank_search := 4;
NEW.rank_address := NEW.rank_search;
-- any feature more than 5 square miles is probably worth indexing
ELSEIF ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_Area(NEW.geometry) > 0.1 THEN
NEW.rank_search := 22;
NEW.rank_address := 0;
ELSEIF NEW.class = 'railway' AND NEW.type in ('rail') THEN
RETURN NULL;
ELSEIF NEW.class = 'waterway' THEN
IF NEW.osm_type = 'R' THEN
NEW.rank_search := 16;
return NULL;
ELSEIF NEW.osm_type = 'N' AND NEW.class = 'highway' THEN
NEW.rank_search = 30;
NEW.rank_address = 0;
ELSEIF NEW.class = 'landuse' AND NOT is_area THEN
NEW.rank_search = 30;
NEW.rank_address = 0;
ELSE
NEW.rank_search := 17;
END IF;
NEW.rank_address := 0;
ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' AND NEW.type in ('service','cycleway','path','footway','steps','bridleway','motorway_link','primary_link','trunk_link','secondary_link','tertiary_link') THEN
NEW.rank_search := 27;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' THEN
NEW.rank_search := 26;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.class = 'mountain_pass' THEN
NEW.rank_search := 20;
NEW.rank_address := 0;
-- do table lookup stuff
IF NEW.class = 'boundary' and NEW.type = 'administrative' THEN
classtype = NEW.type || NEW.admin_level::TEXT;
ELSE
classtype = NEW.type;
END IF;
SELECT l.rank_search, l.rank_address FROM address_levels l
WHERE (l.country_code = NEW.country_code or l.country_code is NULL)
AND l.class = NEW.class AND (l.type = classtype or l.type is NULL)
ORDER BY l.country_code, l.class, l.type LIMIT 1
INTO NEW.rank_search, NEW.rank_address;
END IF;
IF NEW.rank_search > 30 THEN
IF NEW.rank_search is NULL THEN
NEW.rank_search := 30;
END IF;
IF NEW.rank_address > 30 THEN
IF NEW.rank_address is NULL THEN
NEW.rank_address := 30;
END IF;
END IF;
-- some postcorrections
IF NEW.class = 'place' THEN
IF NEW.type in ('continent', 'sea', 'country', 'state') AND NEW.osm_type = 'N' THEN
NEW.rank_address := 0;
END IF;
ELSEIF NEW.class = 'waterway' AND NEW.osm_type = 'R' THEN
-- Slightly promote waterway relations so that they are processed
-- before their members.
NEW.rank_search := NEW.rank_search - 1;
END IF;
IF (NEW.extratags -> 'capital') = 'yes' THEN
NEW.rank_search := NEW.rank_search - 1;
END IF;
END IF;
-- a country code make no sense below rank 4 (country)
IF NEW.rank_search < 4 THEN
NEW.country_code := NULL;

View File

@ -26,8 +26,8 @@ Feature: Import into placex
| R1 | boundary | administrative | 2 | de | (-100 40, -101 40, -101 41, -100 41, -100 40) |
When importing
Then placex contains
| object | addr+country | country_code |
| R1 | de | de |
| object | rank_search| addr+country | country_code |
| R1 | 4 | de | de |
Scenario: Illegal country code tag for countries is ignored
Given the named places
@ -157,9 +157,6 @@ Feature: Import into placex
| N36 | place | house |
| N37 | place | building |
| N38 | place | houses |
And the named places
| osm | class | type | extra+locality |
| N100 | place | locality | townland |
And the named places
| osm | class | type | extra+capital |
| N101 | place | city | yes |
@ -191,7 +188,6 @@ Feature: Import into placex
| N32 | 20 | 0 |
| N33 | 20 | 0 |
| N34 | 20 | 0 |
| N100 | 20 | 20 |
| N101 | 15 | 16 |
| N35 | 22 | 22 |
| N36 | 30 | 30 |
@ -222,8 +218,8 @@ Feature: Import into placex
| object | rank_search | rank_address |
| R20 | 4 | 4 |
| R21 | 30 | 30 |
| R22 | 12 | 0 |
| R23 | 20 | 0 |
| R22 | 30 | 30 |
| R23 | 30 | 30 |
| R40 | 4 | 4 |
| R41 | 8 | 8 |
@ -243,7 +239,7 @@ Feature: Import into placex
When importing
Then placex contains
| object | rank_search | rank_address |
| N1 | 30 | 30 |
| N1 | 30 | 0 |
| W1 | 26 | 26 |
| W2 | 26 | 26 |
| W3 | 26 | 26 |
@ -264,8 +260,8 @@ Feature: Import into placex
When importing
Then placex contains
| object | rank_search | rank_address |
| N2 | 30 | 30 |
| W2 | 30 | 30 |
| N2 | 30 | 0 |
| W2 | 30 | 0 |
| W4 | 22 | 22 |
| R2 | 22 | 22 |
| R3 | 22 | 0 |
@ -289,11 +285,11 @@ Feature: Import into placex
| object | rank_search | rank_address |
| N2 | 18 | 0 |
| N4 | 18 | 0 |
| N5 | 30 | 30 |
| N5 | 22 | 0 |
| W2 | 18 | 0 |
| R3 | 18 | 0 |
| R4 | 22 | 0 |
| R5 | 4 | 4 |
| R6 | 4 | 4 |
| W3 | 30 | 30 |
| R5 | 4 | 0 |
| R6 | 4 | 0 |
| W3 | 22 | 0 |

View File

@ -34,7 +34,7 @@ Feature: Update of simple objects
When importing
Then placex contains
| object | rank_address |
| R1 | 0 |
| R1 | 30 |
| W1 | 30 |
When marking for delete R1,W1
Then placex has no entry for W1
@ -103,4 +103,4 @@ Feature: Update of simple objects
| W1 | boundary | historic | Haha | 5 | (1, 2, 4, 3, 1) |
Then placex contains
| object | rank_address |
| W1 | 0 |
| W1 | 30 |