From caa8210112d608609f6825dff6893b6e9c83b7c6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 27 Nov 2018 00:17:00 +0100 Subject: [PATCH 01/11] Switch to configurable style for osm2pgsql Includes the full style, which is the same as now (minus sidwalk exclusion) and a minimal style for boundaries only. --- lib/setup/SetupClass.php | 22 ++- settings/defaults.php | 1 + settings/import-admin.style | 64 +++++++ settings/import-full.style | 233 +++++++++++++++++++++++++ test/bdd/osm2pgsql/import/tags.feature | 12 +- utils/update.php | 2 +- 6 files changed, 315 insertions(+), 19 deletions(-) create mode 100644 settings/import-admin.style create mode 100644 settings/import-full.style diff --git a/lib/setup/SetupClass.php b/lib/setup/SetupClass.php index d7de0145..e6b07998 100755 --- a/lib/setup/SetupClass.php +++ b/lib/setup/SetupClass.php @@ -10,7 +10,7 @@ class SetupFunctions protected $iInstances; protected $sModulePath; protected $aDSNInfo; - protected $sVerbose; + protected $bVerbose; protected $sIgnoreErrors; protected $bEnableDiffUpdates; protected $bEnableDebugStatements; @@ -46,7 +46,7 @@ class SetupFunctions } // setting member variables based on command line options stored in $aCMDResult - $this->sVerbose = $aCMDResult['verbose']; + $this->bVerbose = $aCMDResult['verbose']; //setting default values which are not set by the update.php array if (isset($aCMDResult['ignore-errors'])) { @@ -189,6 +189,8 @@ class SetupFunctions fail("osm2pgsql not found in '$osm2pgsql'"); } + $osm2pgsql .= ' -S '.CONST_Import_Style; + if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) { $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } @@ -405,7 +407,7 @@ class SetupFunctions $sSQL .= " and not (class='place' and type='houses' and osm_type='W'"; $sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')"; $sSQL .= ' and ST_IsValid(geometry)'; - if ($this->sVerbose) echo "$sSQL\n"; + if ($this->bVerbose) echo "$sSQL\n"; if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) { fail(pg_last_error($aDBInstances[$i]->connection)); } @@ -417,7 +419,7 @@ class SetupFunctions $sSQL .= ' (osm_id, address, linegeo)'; $sSQL .= ' SELECT osm_id, address, geometry from place where '; $sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'"; - if ($this->sVerbose) echo "$sSQL\n"; + if ($this->bVerbose) echo "$sSQL\n"; if (!pg_send_query($aDBInstances[$iLoadThreads]->connection, $sSQL)) { fail(pg_last_error($aDBInstances[$iLoadThreads]->connection)); } @@ -708,7 +710,7 @@ class SetupFunctions if (!$bFound) array_push($aDropTables, $sTable); } foreach ($aDropTables as $sDrop) { - if ($this->sVerbose) echo "Dropping table $sDrop\n"; + if ($this->bVerbose) echo "Dropping table $sDrop\n"; @pg_query($this->oDB->connection, "DROP TABLE $sDrop CASCADE"); // ignore warnings/errors as they might be caused by a table having // been deleted already by CASCADE @@ -716,7 +718,7 @@ class SetupFunctions if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) { if (file_exists(CONST_Osm2pgsql_Flatnode_File)) { - if ($this->sVerbose) echo 'Deleting '.CONST_Osm2pgsql_Flatnode_File."\n"; + if ($this->bVerbose) echo 'Deleting '.CONST_Osm2pgsql_Flatnode_File."\n"; unlink(CONST_Osm2pgsql_Flatnode_File); } } @@ -740,7 +742,7 @@ class SetupFunctions runSQLScript( $sScript, $bfatal, - $this->sVerbose, + $this->bVerbose, $this->sIgnoreErrors ); } @@ -794,7 +796,7 @@ class SetupFunctions if (!file_exists($sFilename)) fail('unable to find '.$sFilename); $sCMD = 'psql -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database']; - if (!$this->sVerbose) { + if (!$this->bVerbose) { $sCMD .= ' -q'; } if (isset($this->aDSNInfo['hostspec'])) { @@ -857,6 +859,10 @@ class SetupFunctions private function runWithPgEnv($sCmd) { + if ($this->bVerbose) { + echo "Execute: $sCmd\n"; + } + $aProcEnv = null; if (isset($this->aDSNInfo['password'])) { diff --git a/settings/defaults.php b/settings/defaults.php index 2b3ddeb2..2a2aea36 100644 --- a/settings/defaults.php +++ b/settings/defaults.php @@ -51,6 +51,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true); @define('CONST_Wikipedia_Data_Path', CONST_ExtraDataPath); @define('CONST_Phrase_Config', CONST_BasePath.'/settings/phrase_settings.php'); @define('CONST_Address_Level_Config', CONST_BasePath.'/settings/address-levels.json'); +@define('CONST_Import_Style', CONST_BasePath.'/settings/import-full.style'); // osm2pgsql settings @define('CONST_Osm2pgsql_Flatnode_File', null); diff --git a/settings/import-admin.style b/settings/import-admin.style new file mode 100644 index 00000000..89a1eb16 --- /dev/null +++ b/settings/import-admin.style @@ -0,0 +1,64 @@ +[ +{ + "keys" : ["name:prefix", "name:suffix", "name:botanical", "*wikidata"], + "values" : { + "" : "skip" + } +}, +{ + "keys" : ["ref", "int_ref", "nat_ref", "reg_ref", "loc_ref", "old_ref", + "iata", "icao", "pcode"], + "values" : { + "" : "ref" + } +}, +{ + "keys" : ["name", "name:*", "int_name", "int_name:*", "nat_name", "nat_name:*", + "reg_name", "reg_name:*", "loc_name", "loc_name:*", + "old_name", "old_name:*", "alt_name", "alt_name:*", "alt_name_*", + "official_name", "official_name:*", "place_name", "place_name:*", + "short_name", "short_name:*", "brand"], + "values" : { + "" : "name" + } +}, +{ + "keys" : ["landuse"], + "values" : { + "cemetry" : "skip", + "" : "fallback" + } +}, +{ + "keys" : ["boundary"], + "values" : { + "administrative" : "main" + } +}, +{ + "keys" : ["place"], + "values" : { + "" : "main" + } +}, +{ + "keys" : ["postal_code", "postcode", "addr:postcode", + "tiger:zip_left", "tiger:zip_right"], + "values" : { + "" : "postcode" + } +}, +{ + "keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country", + "addr:country", "addr:country", "addr:country_code"], + "values" : { + "" : "country" + } +}, +{ + "keys" : ["addr:*", "is_in:*", "tiger:county"], + "values" : { + "" : "address" + } +} +] diff --git a/settings/import-full.style b/settings/import-full.style new file mode 100644 index 00000000..f84b9305 --- /dev/null +++ b/settings/import-full.style @@ -0,0 +1,233 @@ +[ +{ + "keys" : ["*source"], + "values" : { + "" : "skip" + } +}, +{ + "keys" : ["name:prefix", "name:suffix", "name:botanical", "wikidata", + "*:wikidata"], + "values" : { + "" : "extra" + } +}, +{ + "keys" : ["ref", "int_ref", "nat_ref", "reg_ref", "loc_ref", "old_ref", + "iata", "icao", "pcode", "pcode:*"], + "values" : { + "" : "ref" + } +}, +{ + "keys" : ["name", "name:*", "int_name", "int_name:*", "nat_name", "nat_name:*", + "reg_name", "reg_name:*", "loc_name", "loc_name:*", + "old_name", "old_name:*", "alt_name", "alt_name:*", "alt_name_*", + "official_name", "official_name:*", "place_name", "place_name:*", + "short_name", "short_name:*", "brand"], + "values" : { + "" : "name" + } +}, +{ + "keys" : ["addr:housename"], + "values" : { + "" : "name,house" + } +}, +{ + "keys" : ["emergency"], + "values" : { + "fire_hydrant" : "skip", + "yes" : "skip", + "no" : "skip", + "" : "main" + } +}, +{ + "keys" : ["historic", "military"], + "values" : { + "no" : "skip", + "yes" : "skip", + "" : "main" + } +}, +{ + "keys" : ["natural"], + "values" : { + "yes" : "skip", + "no" : "skip", + "coastline" : "skip", + "" : "main,with_name" + } +}, +{ + "keys" : ["landuse"], + "values" : { + "cemetry" : "main,with_name", + "" : "main,fallback,with_name" + } +}, +{ + "keys" : ["highway"], + "values" : { + "no" : "skip", + "turning_circle" : "skip", + "mini_roundabout" : "skip", + "noexit" : "skip", + "crossing" : "skip", + "traffic_signals" : "main,with_name", + "service" : "main,with_name", + "cycleway" : "main,with_name", + "path" : "main,with_name", + "footway" : "main,with_name", + "steps" : "main,with_name", + "bridleway" : "main,with_name", + "track" : "main,with_name", + "byway": "main,with_name", + "motorway_link" : "main,with_name", + "trunk_link" : "main,with_name", + "primary_link" : "main,with_name", + "secondary_link" : "main,with_name", + "tertiary_link" : "main,with_name", + "" : "main" + } +}, +{ + "keys" : ["railway"], + "values" : { + "level_crossing" : "skip", + "no" : "skip", + "" : "main,with_name" + } +}, +{ + "keys" : ["man_made"], + "values" : { + "survey_point" : "skip", + "cutline" : "skip", + "" : "main" + } +}, +{ + "keys" : ["aerialway"], + "values" : { + "pylon" : "skip", + "no" : "skip", + "" : "main" + } +}, +{ + "keys" : ["boundary"], + "values" : { + "" : "main,with_name" + } +}, +{ + "keys" : ["amenity"], + "values" : { + "restaurant" : "main,operator", + "fuel" : "main,operator" + } +}, +{ + "keys" : ["aeroway", "amenity", "club", "craft", "leisure", + "office", "mountain_pass"], + "values" : { + "no" : "skip", + "" : "main" + } +}, +{ + "keys" : ["shop"], + "values" : { + "no" : "skip", + "" : "main,operator" + } +}, +{ + "keys" : ["tourism"], + "values" : { + "yes" : "skip", + "no" : "skip", + "" : "main,operator" + } +}, +{ + "keys" : ["bridge", "tunnel"], + "values" : { + "" : "main,with_name_key" + } +}, +{ + "keys" : ["waterway"], + "values" : { + "riverbank" : "skip", + "" : "main,with_name" + } +}, +{ + "keys" : ["place"], + "values" : { + "" : "main" + } +}, +{ + "keys" : ["junction"], + "values" : { + "" : "main,fallback,with_name" + } +}, +{ + "keys" : ["postal_code", "postcode", "addr:postcode", + "tiger:zip_left", "tiger:zip_right"], + "values" : { + "" : "postcode" + } +}, +{ + "keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country", + "addr:country", "addr:country", "addr:country_code"], + "values" : { + "" : "country" + } +}, +{ + "keys" : ["addr:housenumber", "addr:conscriptionnumber", "addr:streetnumber"], + "values" : { + "" : "address,house" + } +}, +{ + "keys" : ["addr:*", "is_in:*", "tiger:county", "is_in"], + "values" : { + "" : "address" + } +}, +{ + "keys" : ["building"], + "values" : { + "no" : "skip", + "" : "main,fallback,with_name" + } +}, +{ + "keys" : ["tracktype", "traffic_calming", "service", "cuisine", "capital", + "dispensing", "religion", "denomination", "sport", + "internet_access", "lanes", "surface", "smoothness", "width", + "est_width", "incline", "opening_hours", "collection_times", + "service_times", "disused", "wheelchair", "sac_scale", + "trail_visibility", "mtb:scale", "mtb:description", "wood", + "drive_through", "drive_in", "access", "vehicle", "bicyle", + "foot", "goods", "hgv", "motor_vehicle", "motor_car", "oneway", + "date_on", "date_off", "day_on", "day_off", "hour_on", "hour_off", + "maxweight", "maxheight", "maxspeed", "fee", "toll", "charge", + "population", "description", "image", "attribution", "fax", + "email", "url", "website", "phone", "real_ale", "smoking", + "food", "camera", "brewery", "locality", "wikipedia", + "wikipedia:*", "access:*", "contact:*", "drink:*", "toll:*"], + "values" : { + "" : "extra" + } +} +] diff --git a/test/bdd/osm2pgsql/import/tags.feature b/test/bdd/osm2pgsql/import/tags.feature index 2e44e145..94506fec 100644 --- a/test/bdd/osm2pgsql/import/tags.feature +++ b/test/bdd/osm2pgsql/import/tags.feature @@ -231,14 +231,6 @@ Feature: Tag evaluation | boundary | administrative | | waterway | stream | - Scenario: Footways are not included if they are sidewalks - When loading osm data - """ - n2 Thighway=footway,name=To%20%Hell,footway=sidewalk - n23 Thighway=footway,name=x - """ - Then place has no entry for N2 - Scenario: named junctions are included if there is no other tag When loading osm data """ @@ -538,9 +530,9 @@ Feature: Tag evaluation Then place contains | object | class | type | | N10 | tourism | hotel | - | N12 | building| yes | + | N12 | building| shed | | N13 | building| yes | - | N14 | building| yes | + | N14 | place | postcode | And place has no entry for N10:building And place has no entry for N11 diff --git a/utils/update.php b/utils/update.php index c933d26e..3086a70b 100644 --- a/utils/update.php +++ b/utils/update.php @@ -63,7 +63,7 @@ if ($iCacheMemory + 500 > getTotalMemoryMB()) { $iCacheMemory = getCacheMemoryMB(); echo "WARNING: resetting cache memory to $iCacheMemory\n"; } -$sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port']; +$sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -S '.CONST_Import_Style.' -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port']; if (isset($aDSNInfo['username']) && $aDSNInfo['username']) { $sOsm2pgsqlCmd .= ' -U ' . $aDSNInfo['username']; } From a90ace7fa111bcff511eafc7457ac3aca310f96d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 2 Dec 2018 17:38:25 +0100 Subject: [PATCH 02/11] add documentation for new import style --- docs/develop/Import.md | 170 +++++++++++++++++++++++++++++++++++++++++ docs/mkdocs.yml | 1 + 2 files changed, 171 insertions(+) create mode 100644 docs/develop/Import.md diff --git a/docs/develop/Import.md b/docs/develop/Import.md new file mode 100644 index 00000000..ddddb1a8 --- /dev/null +++ b/docs/develop/Import.md @@ -0,0 +1,170 @@ +# OSM Data Import + +OSM data is initially imported using osm2pgsql. Nominatim uses its own data +output style 'gazetteer', which differs from the output style created for +map rendering. + +## Database Layout + +The gazetteer style produces a single table `place` with the following rows: + + * `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation) + * `osm_id` - original OSM ID + * `class` - key of principal tag defining the object type + * `type` - value of principal tag defining the object type + * `name` - collection of tags that contain a name or reference + * `admin_level` - numerical value of the tagged administrative level + * `address` - collection of tags defining the address of an object + * `extratags` - collection of additional interesting tags that are not + directly relevant for searching + * `geometry` - geometry of the object (in WGS84) + +A single OSM object may appear multiple times in this table when it is tagged +with multiple tags that may constitute a principal tag. Take for example a +motorway bridge. In OSM, this would be a way which is tagged with +`highway=motoway` and `bridge=yes`. This way would appear in the `place` table +once with `class` of `highway` and once with a `class` of `bridge`. Thus the +*uique key* for `place` is (`osm_type`, `osm_id`, `class`). + +## Configuring the Import + +How tags are interpreted and assigned to the different `place` columns can be +configured via the import style configuration file (`CONST_Import_style`). This +is a JSON file which contains a list of rules which are matched against every +tag of every object and then assign the tag its specific role. + +### Configuration Rules + +A single rule looks like this: + +```json +{ + "keys" : ["key1", "key2", ...], + "values" : { + "value1" : "prop", + "value2" : "prop1,prop2" + } +} +``` + +A rule first defines a list of keys to apply the rule to. This is always a list +of strings. The string may have four forms. An empty string matches against +any key. A string that ends in an asterisk `*` is a prefix match and accordingly +matches against any key that starts with the given string (minus the `*`). A +suffix match can be defined similarly with a string that starts with a `*`. Any +other string constitutes an exact match. + +The second part of the rules defines a list of values and the properties that +apply to a successful match. Value strings may be either empty, which again +means that thy match against any value, or describe an exact match. Prefix +or suffix matching of values is not possible. + +For a rule to match, it has to find a valid combination of keys and values. The +resulting property is that of the matched values. + +The rules in a configuration file are processed sequentially and the first +match for each tag wins. + +A rule where key and value are the empty string is special. This defines the +fallback when none of the rules matches. The fallback is always used as a last +resort when nothing else matches, no matter where the rule appears in the file. +Defining multiple fallback rules is not allowed. What happens in this case, +is undefined. + +### Tag Properties + +One or more of the following properties may be given for each tag: + +* `main` + + A principal tag. A new row will be added for the object with key and value + as `class` and `type`. + +* `with_name` + + When the tag is a principal tag (`main` property set): only really add a new + row, if there is any name tag found (a reference tag is not sufficient, see + below). + +* `with_name_key` + + When the tag is a principal tag (`main` property set): only really add a new + row, if there is also a name tag that matches the key of the principal tag. + For example, if the main tag is `bridge=yes`, then it will only be added as + an extra row, if there is a tag `bridge:name[:XXX]` for the same object. + If this property is set, all other names that are not domain-specific are + ignored. + +* `fallback` + + When the tag is a principal tag (`main` property set): only really add a new + row, when no other principal tags for this object have been found. Only one + fallback tag can win for an object. + +* `operator` + + When the tag is a principal tag (`main` property set): also include the + `operator` tag in the list of names. This is a special construct for an + out-dated tagging practise in OSM. Fuel stations and chain restaurants + in particular used to have the name of the chain tagged as `operator`. + These days the chain can be more commonly found in the `brand` tag but + there is still enough old data around to warrant this special case. + +* `name` + + Add tag to the list of names. + +* `ref` + + Add tag to the list of names as a reference. At the moment this only means + that the object is not considered to be named for `with_name`. + +* `address` + + At tag to the list of address tags. If the tag starts with `addr:` or + `is_in:`, then this prefix is cut off before adding it to the list. + +* `postcode` + + At the value as a postcode to the address tags. If multiple tags are + candidate for postcodes, one wins out and the others are dropped. + +* `country` + + At the value as a country code to the address tags. The value must be a + two letter country code, otherwise it is ignored. If there are multiple + tags that match, then one wins out and the others are dropped. + +* `house` + + If no principle tags can be found for the object, still add the object with + `class`=`place` and `type`=`house`. Use this for address nodes that have no + other function. + +* `interpolation` + + Add this object as an address interpolation (appears as `class`=`place` and + `type`=`houses` in the database). + +* `extra` + + Add tag to the list of extra tags. + +* `skip` + + Skip the tag completely. Useful when a custom default fallback is defined + or to define exceptions to rules. + +A rule can define as many of these properties for one match as it likes. For +example, if the property is `"main,extra"` then the tag will open a new row +but also have the tag appear in the list of extra tags. + +There are a number of pre-defined styles in the `settings/` directory. It is +advisable to start from one of these styles when defining your own. + +### Changing the Style of Existing Databases + +There is normally no issue changing the style of a database that is already +imported and now kept up-to-date with change files. Just be aware that any +change in the style applies to updates only. If you want to change the data +that is already in the database, then a reimport is necessary. diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index e59ad569..ef39bcb7 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -20,6 +20,7 @@ pages: - 'Troubleshooting' : 'admin/Faq.md' - 'Developers Guide': - 'Overview' : 'develop/overview.md' + - 'OSM Data Import' : 'develop/Import.md' - 'Place Ranking' : 'develop/Ranking.md' - 'External Data Sources': - 'Overview' : 'data-sources/overview.md' From e6d18fc948121db50660751994bb8ba0ba651fa6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 2 Dec 2018 23:05:45 +0100 Subject: [PATCH 03/11] fixup admin import style and add two new ones Remove unnamed landuses and postcode points from importing. The latter will cause all objects with address tags to be imported after all. Not expected in the admin import style. --- settings/import-address.style | 116 ++++++++++++++++++++++++++++++++++ settings/import-admin.style | 9 +-- settings/import-street.style | 85 +++++++++++++++++++++++++ 3 files changed, 202 insertions(+), 8 deletions(-) create mode 100644 settings/import-address.style create mode 100644 settings/import-street.style diff --git a/settings/import-address.style b/settings/import-address.style new file mode 100644 index 00000000..8390f8f0 --- /dev/null +++ b/settings/import-address.style @@ -0,0 +1,116 @@ +[ +{ + "keys" : [ "" ], + "values" : { + "no" : "skip" + } +}, +{ + "keys" : ["name:prefix", "name:suffix", "name:botanical", "*wikidata"], + "values" : { + "" : "skip" + } +}, +{ + "keys" : ["ref", "int_ref", "nat_ref", "reg_ref", "loc_ref", "old_ref", + "iata", "icao", "pcode"], + "values" : { + "" : "ref" + } +}, +{ + "keys" : ["name", "name:*", "int_name", "int_name:*", "nat_name", "nat_name:*", + "reg_name", "reg_name:*", "loc_name", "loc_name:*", + "old_name", "old_name:*", "alt_name", "alt_name:*", "alt_name_*", + "official_name", "official_name:*", "place_name", "place_name:*", + "short_name", "short_name:*", "brand"], + "values" : { + "" : "name" + } +}, +{ + "keys" : ["landuse"], + "values" : { + "cemetry" : "skip", + "" : "fallback,with-name" + } +}, +{ + "keys" : ["boundary"], + "values" : { + "administrative" : "main" + } +}, +{ + "keys" : ["place"], + "values" : { + "" : "main" + } +}, +{ + "keys" : ["addr:housename"], + "values" : { + "" : "name,house" + } +}, +{ + "keys" : ["addr:housenumber", "addr:conscriptionnumber", "addr:streetnumber"], + "values" : { + "" : "address,house" + } +}, +{ + "keys" : ["addr:*", "is_in:*", "tiger:county", "is_in"], + "values" : { + "" : "address" + } +}, +{ + "keys" : ["postal_code", "postcode", "addr:postcode", + "tiger:zip_left", "tiger:zip_right"], + "values" : { + "" : "postcode" + } +}, +{ + "keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country", + "addr:country", "addr:country", "addr:country_code"], + "values" : { + "" : "country" + } +}, +{ + "keys" : ["addr:*", "is_in:*", "tiger:county"], + "values" : { + "" : "address" + } +}, +{ + "keys" : ["highway"], + "values" : { + "motorway" : "main", + "trunk" : "main", + "primary" : "main", + "secondary" : "main", + "tertiary" : "main", + "unclassified" : "main", + "residential" : "main", + "living_street" : "main", + "pedestrian" : "main", + "road" : "main", + "service" : "main,with_name", + "cycleway" : "main,with_name", + "path" : "main,with_name", + "footway" : "main,with_name", + "steps" : "main,with_name", + "bridleway" : "main,with_name", + "track" : "main,with_name", + "byway": "main,with_name", + "motorway_link" : "main,with_name", + "trunk_link" : "main,with_name", + "primary_link" : "main,with_name", + "secondary_link" : "main,with_name", + "tertiary_link" : "main,with_name" + } +} +] diff --git a/settings/import-admin.style b/settings/import-admin.style index 89a1eb16..f6251260 100644 --- a/settings/import-admin.style +++ b/settings/import-admin.style @@ -26,7 +26,7 @@ "keys" : ["landuse"], "values" : { "cemetry" : "skip", - "" : "fallback" + "" : "fallback,with-name" } }, { @@ -41,13 +41,6 @@ "" : "main" } }, -{ - "keys" : ["postal_code", "postcode", "addr:postcode", - "tiger:zip_left", "tiger:zip_right"], - "values" : { - "" : "postcode" - } -}, { "keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country", "addr:country", "addr:country", "addr:country_code"], diff --git a/settings/import-street.style b/settings/import-street.style new file mode 100644 index 00000000..db2cd2a8 --- /dev/null +++ b/settings/import-street.style @@ -0,0 +1,85 @@ +[ +{ + "keys" : ["name:prefix", "name:suffix", "name:botanical", "*wikidata"], + "values" : { + "" : "skip" + } +}, +{ + "keys" : ["ref", "int_ref", "nat_ref", "reg_ref", "loc_ref", "old_ref", + "iata", "icao", "pcode"], + "values" : { + "" : "ref" + } +}, +{ + "keys" : ["name", "name:*", "int_name", "int_name:*", "nat_name", "nat_name:*", + "reg_name", "reg_name:*", "loc_name", "loc_name:*", + "old_name", "old_name:*", "alt_name", "alt_name:*", "alt_name_*", + "official_name", "official_name:*", "place_name", "place_name:*", + "short_name", "short_name:*", "brand"], + "values" : { + "" : "name" + } +}, +{ + "keys" : ["landuse"], + "values" : { + "cemetry" : "skip", + "" : "fallback,with-name" + } +}, +{ + "keys" : ["boundary"], + "values" : { + "administrative" : "main" + } +}, +{ + "keys" : ["place"], + "values" : { + "" : "main" + } +}, +{ + "keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country", + "addr:country", "addr:country", "addr:country_code"], + "values" : { + "" : "country" + } +}, +{ + "keys" : ["addr:*", "is_in:*", "tiger:county"], + "values" : { + "" : "address" + } +}, +{ + "keys" : ["highway"], + "values" : { + "motorway" : "main", + "trunk" : "main", + "primary" : "main", + "secondary" : "main", + "tertiary" : "main", + "unclassified" : "main", + "residential" : "main", + "living_street" : "main", + "pedestrian" : "main", + "road" : "main", + "service" : "main,with_name", + "cycleway" : "main,with_name", + "path" : "main,with_name", + "footway" : "main,with_name", + "steps" : "main,with_name", + "bridleway" : "main,with_name", + "track" : "main,with_name", + "byway": "main,with_name", + "motorway_link" : "main,with_name", + "trunk_link" : "main,with_name", + "primary_link" : "main,with_name", + "secondary_link" : "main,with_name", + "tertiary_link" : "main,with_name" + } +} +] From 31bf7443a6bde7d0842ed381df615e6040965010 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 2 Dec 2018 23:09:11 +0100 Subject: [PATCH 04/11] fix typo --- settings/import-address.style | 2 +- settings/import-admin.style | 2 +- settings/import-street.style | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/settings/import-address.style b/settings/import-address.style index 8390f8f0..b79b5bc9 100644 --- a/settings/import-address.style +++ b/settings/import-address.style @@ -32,7 +32,7 @@ "keys" : ["landuse"], "values" : { "cemetry" : "skip", - "" : "fallback,with-name" + "" : "fallback,with_name" } }, { diff --git a/settings/import-admin.style b/settings/import-admin.style index f6251260..df7b31d2 100644 --- a/settings/import-admin.style +++ b/settings/import-admin.style @@ -26,7 +26,7 @@ "keys" : ["landuse"], "values" : { "cemetry" : "skip", - "" : "fallback,with-name" + "" : "fallback,with_name" } }, { diff --git a/settings/import-street.style b/settings/import-street.style index db2cd2a8..42419275 100644 --- a/settings/import-street.style +++ b/settings/import-street.style @@ -26,7 +26,7 @@ "keys" : ["landuse"], "values" : { "cemetry" : "skip", - "" : "fallback,with-name" + "" : "fallback,with_name" } }, { From 4c10294a29b4019588fbed4b0c8c595b2f909583 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 4 Dec 2018 22:22:19 +0100 Subject: [PATCH 05/11] document import style variants --- docs/admin/Import-and-Update.md | 57 +++++++++++++++++++++++++++------ 1 file changed, 47 insertions(+), 10 deletions(-) diff --git a/docs/admin/Import-and-Update.md b/docs/admin/Import-and-Update.md index 179b729e..bebc3645 100644 --- a/docs/admin/Import-and-Update.md +++ b/docs/admin/Import-and-Update.md @@ -58,10 +58,56 @@ Nominatim can use postcodes from an external source to improve searches that inv cd $NOMINATIM_SOURCE_DIR/data wget https://www.nominatim.org/data/gb_postcode_data.sql.gz +## Choosing the Data to Import + +In its default setup, Nominatim is configured to import the full OSM data +set for the entire planet. Such a setup requires a powerful machine with +at least 32GB of RAM and around 800GB of SSD hard disks. Depending on your +use case there are various ways to reduce the amount of data imported. This +section discusses these methods. They can also be combined. + +### Using an extract + +If you only need geocoding for a smaller region, then precomputed extracts +are a good way to reduce the database size. +[Geofabrik](https://download.geofabrik.de) offers extracts for most countries +and also daily updates which can be used with the update process described +below. There are also +[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading) + +Please be aware that some extracts are not exactly cut along the country +boundaries. As a result some parts of the boundary may be missing and +Nominatim cannot compute the areas as a result. + +### Reverse-only Imports + +If you only want to use the Nominatim database for reverse lookups or +if you plan to use the installation only for exports to a +[photon](http://photon.komoot.de/) database, then you can set up a database +without search indexes. Add `--reverse-only` to your setup command above. + +This saves about 5% of disk space. + +### Filtering Imported Data + +Nominatim normally sets up a full search database containing administrative +boundaries, places, streets, addresses and POI data. There are also other +import styles available which only read selected data: + + + +style Import time DB size after drop +admin 5h 189 GB 20 GB +street +address +full + +You can also customize the styles further. For an description of the +style format see [the developement section](../develop/Import.md). ## Initial import of the data -**Important:** first try the import with a small excerpt, for example from +**Important:** first try the import with a small extract, for example from [Geofabrik](https://download.geofabrik.de). Download the data to import and load the data with the following command: @@ -98,15 +144,6 @@ you also need to enable these key phrases like this: Note that this command downloads the phrases from the wiki link above. -### Reverse-only Imports - -If you only want to use the Nominatim database for reverse lookups or -if you plan to use the installation only for exports to a -[photon](http://photon.komoot.de/) database, then you can set up a database -without search indexes. Add `--reverse-only` to your setup command above. - -This saves about 5% of disk space. - ## Installing Tiger housenumber data for the US From 8e2e852b898b3ebba6e3c243b08ba02f7f44d4fc Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 2 Jan 2019 14:50:55 +0100 Subject: [PATCH 06/11] add postcodes and interpolations to osm2pgsql style --- settings/import-address.style | 4 ++-- settings/import-admin.style | 13 +++++++++++++ settings/import-full.style | 6 ++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/settings/import-address.style b/settings/import-address.style index b79b5bc9..1b5eda61 100644 --- a/settings/import-address.style +++ b/settings/import-address.style @@ -60,9 +60,9 @@ } }, { - "keys" : ["addr:*", "is_in:*", "tiger:county", "is_in"], + "keys" : ["addr:interpolation"], "values" : { - "" : "address" + "" : "interpolation,address" } }, { diff --git a/settings/import-admin.style b/settings/import-admin.style index df7b31d2..02a54f12 100644 --- a/settings/import-admin.style +++ b/settings/import-admin.style @@ -53,5 +53,18 @@ "values" : { "" : "address" } +}, +{ + "keys" : ["postal_code", "postcode", "addr:postcode", + "tiger:zip_left", "tiger:zip_right"], + "values" : { + "" : "postcode" + } +}, +{ + "keys" : ["capital"], + "values" : { + "" : "extra" + } } ] diff --git a/settings/import-full.style b/settings/import-full.style index f84b9305..3a743001 100644 --- a/settings/import-full.style +++ b/settings/import-full.style @@ -198,6 +198,12 @@ "" : "address,house" } }, +{ + "keys" : ["addr:interpolation"], + "values" : { + "" : "interpolation,address" + } +}, { "keys" : ["addr:*", "is_in:*", "tiger:county", "is_in"], "values" : { From e24ea7c1bb92c863055c0509c3ccf086aced483d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 3 Jan 2019 21:08:16 +0100 Subject: [PATCH 07/11] add tests for import of interpolations --- test/bdd/osm2pgsql/import/tags.feature | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/bdd/osm2pgsql/import/tags.feature b/test/bdd/osm2pgsql/import/tags.feature index 94506fec..766bc7f1 100644 --- a/test/bdd/osm2pgsql/import/tags.feature +++ b/test/bdd/osm2pgsql/import/tags.feature @@ -544,3 +544,20 @@ Feature: Tag evaluation Then place contains | object | class | type | address | | N290393920 | place | house| 'city' : 'Perpignan', 'country' : 'FR', 'housenumber' : '43\\', 'postcode' : '66000', 'street' : 'Rue Pierre Constant d`Ivry' | + + Scenario: odd interpolation + When loading osm data + """ + n4 Taddr:housenumber=3 x0 y0 + n5 Taddr:housenumber=15 x0 y0.00001 + w12 Taddr:interpolation=odd Nn4,n5 + w13 Taddr:interpolation=even Nn4,n5 + w14 Taddr:interpolation=-3 Nn4,n5 + """ + Then place contains + | object | class | type | address | + | N4 | place | house | 'housenumber' : '3' | + | N5 | place | house | 'housenumber' : '15' | + | W12 | place | houses | 'interpolation' : 'odd' | + | W13 | place | houses | 'interpolation' : 'even' | + | W14 | place | houses | 'interpolation' : '-3' | From f1fe70656f88c558a6f4bf6a09adde4ef59487fb Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 3 Jan 2019 21:08:38 +0100 Subject: [PATCH 08/11] more style docs --- docs/admin/Import-and-Update.md | 57 +++++++++++++++++++++++++-------- docs/develop/Import.md | 2 +- 2 files changed, 45 insertions(+), 14 deletions(-) diff --git a/docs/admin/Import-and-Update.md b/docs/admin/Import-and-Update.md index bebc3645..4cd46ffb 100644 --- a/docs/admin/Import-and-Update.md +++ b/docs/admin/Import-and-Update.md @@ -60,7 +60,7 @@ Nominatim can use postcodes from an external source to improve searches that inv ## Choosing the Data to Import -In its default setup, Nominatim is configured to import the full OSM data +In its default setup Nominatim is configured to import the full OSM data set for the entire planet. Such a setup requires a powerful machine with at least 32GB of RAM and around 800GB of SSD hard disks. Depending on your use case there are various ways to reduce the amount of data imported. This @@ -69,15 +69,31 @@ section discusses these methods. They can also be combined. ### Using an extract If you only need geocoding for a smaller region, then precomputed extracts -are a good way to reduce the database size. -[Geofabrik](https://download.geofabrik.de) offers extracts for most countries -and also daily updates which can be used with the update process described +are a good way to reduce the database size and import time. +[Geofabrik](https://download.geofabrik.de) offers extracts for most countries. +They even have daily updates which can be used with the update process described below. There are also -[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading) +[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading). -Please be aware that some extracts are not exactly cut along the country -boundaries. As a result some parts of the boundary may be missing and -Nominatim cannot compute the areas as a result. +Please be aware that some extracts are not cut exactly along the country +boundaries. As a result some parts of the boundary may be missing which means +that cannot compute the areas for some administrative areas. + +### Dropping Data Required for Dynamic Updates + +About half of the data in Nominatim's database is not really used for serving +the API. It is only there to allow the data to be updated from the latest +changes from OSM. For many uses these dynamic updates are not really required. +If you don't plan to apply updates, the dynamic part of the database can be +safely dropped using the following command: + +``` +./utils/setup.php --drop +``` + +Note that you still need to provide for sufficient disk space for the initial +import. So this option is particularly interesting if you plan to transfer the +database or reuse the space later. ### Reverse-only Imports @@ -94,13 +110,28 @@ Nominatim normally sets up a full search database containing administrative boundaries, places, streets, addresses and POI data. There are also other import styles available which only read selected data: +* **settings/import-admin.style** + Only import administrative boundaries and places. +* **settings/import-street.style** + Like the admin style but also adds streets. +* **settings/import-address.style** + Import all data necessary to compute addresses down to house number level. +* **settings/import-full.style** + Default style that also includes points of interest. +The style can be changed with the configuration `CONST_Import_Style`. -style Import time DB size after drop -admin 5h 189 GB 20 GB -street -address -full +To give you an idea of the impact of using the different style, the table +below gives rough estimates of the final database size after import of a +2018 planet and after using the `--drop` option. It also shows the time +needed for the import on a machine with 32GB RAM, 4 CPUS and SSDs. + +style | Import time | DB size | after drop +----------|--------------|------------|------------ +admin | 5h | 189 GB | 20 GB +street | 42h | 396 GB | 174 GB +address | 59h | 497 GB | 251 GB +full | 80h | GB | GB You can also customize the styles further. For an description of the style format see [the developement section](../develop/Import.md). diff --git a/docs/develop/Import.md b/docs/develop/Import.md index ddddb1a8..e733c6fd 100644 --- a/docs/develop/Import.md +++ b/docs/develop/Import.md @@ -22,7 +22,7 @@ The gazetteer style produces a single table `place` with the following rows: A single OSM object may appear multiple times in this table when it is tagged with multiple tags that may constitute a principal tag. Take for example a motorway bridge. In OSM, this would be a way which is tagged with -`highway=motoway` and `bridge=yes`. This way would appear in the `place` table +`highway=motorway` and `bridge=yes`. This way would appear in the `place` table once with `class` of `highway` and once with a `class` of `bridge`. Thus the *uique key* for `place` is (`osm_type`, `osm_id`, `class`). From f9a098743b1481405c8b682c8d9c42e69681bb7e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 6 Jan 2019 19:20:32 +0100 Subject: [PATCH 09/11] update osm2pgsql (custom style) --- osm2pgsql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osm2pgsql b/osm2pgsql index 433ad388..56c8f56b 160000 --- a/osm2pgsql +++ b/osm2pgsql @@ -1 +1 @@ -Subproject commit 433ad388b88d74235e9fc4dd12fa608719b32904 +Subproject commit 56c8f56b43e1464361d535eeb61e2335d1d983e4 From 5dc10bd5a2bd65e1c305d6cf9d9f7295dc9e18ea Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 8 Jan 2019 22:20:42 +0100 Subject: [PATCH 10/11] add final missing import numbers --- docs/admin/Import-and-Update.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/admin/Import-and-Update.md b/docs/admin/Import-and-Update.md index 4cd46ffb..1e55e856 100644 --- a/docs/admin/Import-and-Update.md +++ b/docs/admin/Import-and-Update.md @@ -124,14 +124,16 @@ The style can be changed with the configuration `CONST_Import_Style`. To give you an idea of the impact of using the different style, the table below gives rough estimates of the final database size after import of a 2018 planet and after using the `--drop` option. It also shows the time -needed for the import on a machine with 32GB RAM, 4 CPUS and SSDs. +needed for the import on a machine with 32GB RAM, 4 CPUS and SSDs. Note that +the given sizes are just an estimate meant for comparison of style requirements. +Your planet import is likely to be larger as the OSM data grows with time. style | Import time | DB size | after drop ----------|--------------|------------|------------ -admin | 5h | 189 GB | 20 GB -street | 42h | 396 GB | 174 GB -address | 59h | 497 GB | 251 GB -full | 80h | GB | GB +admin | 5h | 190 GB | 20 GB +street | 42h | 400 GB | 180 GB +address | 59h | 500 GB | 260 GB +full | 80h | 590 GB | 320 GB You can also customize the styles further. For an description of the style format see [the developement section](../develop/Import.md). From 58e461e4c76aa4aeb879ce87285dd2c2e50b3ffd Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 8 Jan 2019 23:46:18 +0100 Subject: [PATCH 11/11] postcodes also need fallback --- settings/import-address.style | 2 +- settings/import-full.style | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/settings/import-address.style b/settings/import-address.style index 1b5eda61..0a866c95 100644 --- a/settings/import-address.style +++ b/settings/import-address.style @@ -69,7 +69,7 @@ "keys" : ["postal_code", "postcode", "addr:postcode", "tiger:zip_left", "tiger:zip_right"], "values" : { - "" : "postcode" + "" : "postcode,fallback" } }, { diff --git a/settings/import-full.style b/settings/import-full.style index 3a743001..de91fd9c 100644 --- a/settings/import-full.style +++ b/settings/import-full.style @@ -182,7 +182,7 @@ "keys" : ["postal_code", "postcode", "addr:postcode", "tiger:zip_left", "tiger:zip_right"], "values" : { - "" : "postcode" + "" : "postcode,fallback" } }, {