initial flex import scripts

Only implements the extratags style for the moment. Tests pass
for the same behaviour as the gazetteer output. Updates still need
to be done.
This commit is contained in:
Sarah Hoffmann 2022-11-03 17:15:01 +01:00
parent de2a3bd5f8
commit 51ed55cc32
7 changed files with 537 additions and 14 deletions

View File

@ -63,7 +63,6 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
endif()
set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
set(BUILD_TESTS off)
set(WITH_LUA off CACHE BOOL "")
add_subdirectory(osm2pgsql)
set(BUILD_TESTS ${BUILD_TESTS_SAVED})
endif()

View File

@ -184,6 +184,7 @@ class NominatimArgs:
return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path,
osm2pgsql_cache=self.osm2pgsql_cache or default_cache,
osm2pgsql_style=self.config.get_import_style_file(),
osm2pgsql_style_path=self.config.config_dir,
threads=self.threads or default_threads,
dsn=self.config.get_libpq_dsn(),
flatnode_file=str(self.config.get_path('FLATNODE_FILE') or ''),

View File

@ -10,6 +10,7 @@ Helper functions for executing external programs.
from typing import Any, Union, Optional, Mapping, IO
from pathlib import Path
import logging
import os
import subprocess
import urllib.request as urlrequest
from urllib.parse import urlencode
@ -120,9 +121,16 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
'--log-progress', 'true',
'--number-processes', str(options['threads']),
'--cache', str(options['osm2pgsql_cache']),
'--output', 'gazetteer',
'--style', str(options['osm2pgsql_style'])
]
if str(options['osm2pgsql_style']).endswith('.lua'):
env['LUA_PATH'] = ';'.join((str(options['osm2pgsql_style_path'] / 'flex-base.lua'),
os.environ.get('LUAPATH', ';')))
cmd.extend(('--output', 'flex'))
else:
cmd.extend(('--output', 'gazetteer'))
if options['append']:
cmd.append('--append')
else:

382
settings/flex-base.lua Normal file
View File

@ -0,0 +1,382 @@
-- Core functions for Nominatim import flex style.
--
-- The single place table.
place_table = osm2pgsql.define_table{
name = "place",
ids = { type = 'any', id_column = 'osm_id', type_column = 'osm_type' },
columns = {
{ column = 'class', type = 'text', not_null = true },
{ column = 'type', type = 'text', not_null = true },
{ column = 'admin_level', type = 'smallint' },
{ column = 'name', type = 'hstore' },
{ column = 'address', type = 'hstore' },
{ column = 'extratags', type = 'hstore' },
{ column = 'geometry', type = 'geometry', projection = 'WGS84', not_null = true },
}
}
------------- Place class ------------------------------------------
local Place = {}
Place.__index = Place
function Place.new(object, geom_func)
local self = setmetatable({}, Place)
self.object = object
self.geom_func = geom_func
self.admin_level = tonumber(self.object:grab_tag('admin_level'))
if self.admin_level == nil
or self.admin_level <= 0 or self.admin_level > 15
or math.floor(self.admin_level) ~= self.admin_level then
self.admin_level = 15
end
self.num_entries = 0
self.has_name = false
self.names = {}
self.address = {}
self.extratags = {}
return self
end
function Place:delete(data)
if data.match ~= nil then
for k, v in pairs(self.object.tags) do
if data.match(k, v) then
self.object.tags[k] = nil
end
end
end
end
function Place:grab_extratags(data)
local count = 0
if data.match ~= nil then
for k, v in pairs(self.object.tags) do
if data.match(k, v) then
self.object.tags[k] = nil
self.extratags[k] = v
count = count + 1
end
end
end
return count
end
function Place:grab_address(data)
local count = 0
if data.match ~= nil then
for k, v in pairs(self.object.tags) do
if data.match(k, v) then
self.object.tags[k] = nil
if data.include_on_name == true then
self.has_name = true
end
if data.out_key ~= nil then
self.address[data.out_key] = v
return 1
end
if k:sub(1, 5) == 'addr:' then
self.address[k:sub(6)] = v
elseif k:sub(1, 6) == 'is_in:' then
self.address[k:sub(7)] = v
else
self.address[k] = v
end
count = count + 1
end
end
end
return count
end
function Place:set_address(key, value)
self.address[key] = value
end
function Place:grab_name(data)
local count = 0
if data.match ~= nil then
for k, v in pairs(self.object.tags) do
if data.match(k, v) then
self.object.tags[k] = nil
self.names[k] = v
if data.include_on_name ~= false then
self.has_name = true
end
count = count + 1
end
end
end
return count
end
function Place:grab_tag(key)
return self.object:grab_tag(key)
end
function Place:tags()
return self.object.tags
end
function Place:write_place(k, v, mtype, save_extra_mains)
if mtype == nil then
return 0
end
v = v or self.object.tags[k]
if v == nil then
return 0
end
if type(mtype) == 'table' then
mtype = mtype[v] or mtype[1]
end
if mtype == 'always' or (self.has_name and mtype == 'named') then
return self:write_row(k, v, save_extra_mains)
end
if mtype == 'named_with_key' then
local names = {}
local prefix = k .. ':name'
for namek, namev in pairs(self.object.tags) do
if namek:sub(1, #prefix) == prefix
and (#namek == #prefix
or namek:sub(#prefix + 1, #prefix + 1) == ':') then
names[namek:sub(#k + 2)] = namev
end
end
if next(names) ~= nil then
local saved_names = self.names
self.names = names
local results = self:write_row(k, v, save_extra_mains)
self.names = saved_names
return results
end
end
return 0
end
function Place:write_row(k, v, save_extra_mains)
if self.geometry == nil then
self.geometry = self.geom_func(self.object)
end
if self.geometry:is_null() then
return 0
end
if save_extra_mains then
for extra_k, extra_v in pairs(self.object.tags) do
if extra_k ~= k then
self.extratags[extra_k] = extra_v
end
end
end
place_table:insert{
class = k,
type = v,
admin_level = self.admin_level,
name = next(self.names) and self.names,
address = next(self.address) and self.address,
extratags = next(self.extratags) and self.extratags,
geometry = self.geometry
}
if save_extra_mains then
for k, v in pairs(self.object.tags) do
self.extratags[k] = nil
end
end
self.num_entries = self.num_entries + 1
return 1
end
function tag_match(data)
if data == nil or next(data) == nil then
return nil
end
local tests = {}
if data.keys ~= nil then
for _, key in pairs(data.keys) do
if key:sub(1, 1) == '*' then
if #key > 1 then
local suffix = key:sub(2)
tests[#tests + 1] = function (k, v)
return k:sub(-#suffix) == suffix
end
end
elseif key:sub(#key, #key) == '*' then
local prefix = key:sub(1, #key - 1)
tests[#tests + 1] = function (k, v)
return k:sub(1, #prefix) == prefix
end
else
tests[#tests + 1] = function (k, v)
return k == key
end
end
end
end
if data.tags ~= nil then
local tags = {}
for k, vlist in pairs(data.tags) do
tags[k] = {}
for _, v in pairs(vlist) do
tags[k][v] = true
end
end
tests[#tests + 1] = function (k, v)
return tags[k] ~= nil and tags[k][v] ~= nil
end
end
return function (k, v)
for _, func in pairs(tests) do
if func(k, v) then
return true
end
end
return false
end
end
-- Process functions for all data types
function osm2pgsql.process_node(object)
local function geom_func(o)
return o:as_point()
end
process_tags(Place.new(object, geom_func))
end
function osm2pgsql.process_way(object)
local function geom_func(o)
local geom = o:as_polygon()
if geom:is_null() then
geom = o:as_linestring()
end
return geom
end
process_tags(Place.new(object, geom_func))
end
function relation_as_multipolygon(o)
return o:as_multipolygon()
end
function relation_as_multiline(o)
return o:as_multilinestring():line_merge()
end
function osm2pgsql.process_relation(object)
local geom_func = RELATION_TYPES[object.tags.type]
if geom_func ~= nil then
process_tags(Place.new(object, geom_func))
end
end
function process_tags(o)
local fallback
o:delete{match = PRE_DELETE}
o:grab_extratags{match = PRE_EXTRAS}
-- Exception for boundary/place double tagging
if o.object.tags.boundary == 'administrative' then
o:grab_extratags{match = function (k, v)
return k == 'place' and v:sub(1,3) ~= 'isl'
end}
end
-- address keys
o:grab_address{match=function (k, v) return COUNTRY_TAGS(k, v) and #v == 2 end,
out_key='country'}
if o:grab_name{match=HOUSENAME_TAGS} > 0 then
fallback = {'place', 'house'}
end
if o:grab_address{match=HOUSENUMBER_TAGS, include_on_name = true} > 0 and fallback == nil then
fallback = {'place', 'house'}
end
if o:grab_address{match=POSTCODES, out_key='postcode'} > 0 and fallback == nil then
fallback = {'place', 'postcode'}
end
local is_interpolation = o:grab_address{match=INTERPOLATION_TAGS} > 0
if ADD_TIGER_COUNTY then
local v = o:grab_tag('tiger:county')
if v ~= nil then
v, num = v:gsub(',.*', ' county')
if num == 0 then
v = v .. ' county'
end
o:set_address('tiger:county', v)
end
end
o:grab_address{match=ADDRESS_TAGS}
if is_interpolation then
o:write_place('place', 'houses', 'always', SAVE_EXTRA_MAINS)
return
end
-- name keys
o:grab_name{match = NAMES}
o:grab_name{match = REFS, include_on_name = false}
o:delete{match = POST_DELETE}
o:grab_extratags{match = POST_EXTRAS}
-- collect main keys
local num_mains = 0
for k, v in pairs(o:tags()) do
num_mains = num_mains + o:write_place(k, v, MAIN_KEYS[k], SAVE_EXTRA_MAINS)
end
if num_mains == 0 then
for tag, mtype in pairs(MAIN_FALLBACK_KEYS) do
if o:write_place(tag, nil, mtype, SAVE_EXTRA_MAINS) > 0 then
return
end
end
if fallback ~= nil then
o:write_place(fallback[1], fallback[2], 'always', SAVE_EXTRA_MAINS)
end
end
end

View File

@ -0,0 +1,130 @@
require('flex-base')
RELATION_TYPES = {
multipolygon = relation_as_multipolygon,
boundary = relation_as_multipolygon,
waterway = relation_as_multiline
}
MAIN_KEYS = {
emergency = 'always',
historic = 'always',
military = 'always',
natural = 'named',
landuse = 'named',
highway = {'always',
street_lamp = 'named',
traffic_signals = 'named',
service = 'named',
cycleway = 'named',
path = 'named',
footway = 'named',
steps = 'named',
bridleway = 'named',
track = 'named',
motorway_link = 'named',
trunk_link = 'named',
primary_link = 'named',
secondary_link = 'named',
tertiary_link = 'named'},
railway = 'named',
man_made = 'always',
aerialway = 'always',
boundary = {'named',
postal_code = 'named'},
aeroway = 'always',
amenity = 'always',
club = 'always',
craft = 'always',
leisure = 'always',
office = 'always',
mountain_pass = 'always',
shop = 'always',
tourism = 'always',
bridge = 'named_with_key',
tunnel = 'named_with_key',
waterway = 'named',
place = 'always'
}
MAIN_FALLBACK_KEYS = {
building = 'named',
landuse = 'named',
junction = 'named',
healthcare = 'named'
}
PRE_DELETE = tag_match{keys = {'note', 'note:*', 'source', 'source*', 'attribution',
'comment', 'fixme', 'FIXME', 'created_by', 'NHD:*',
'nhd:*', 'gnis:*', 'geobase:*', 'KSJ2:*', 'yh:*',
'osak:*', 'naptan:*', 'CLC:*', 'import', 'it:fvg:*',
'type', 'lacounty:*', 'ref:ruian:*', 'building:ruian:type',
'ref:linz:*', 'is_in:postcode'},
tags = {emergency = {'yes', 'no', 'fire_hydrant'},
historic = {'yes', 'no'},
military = {'yes', 'no'},
natural = {'yes', 'no', 'coastline'},
highway = {'no', 'turning_circle', 'mini_roundabout',
'noexit', 'crossing', 'give_way', 'stop'},
railway = {'level_crossing', 'no', 'rail'},
man_made = {'survey_point', 'cutline'},
aerialway = {'pylon', 'no'},
aeroway = {'no'},
amenity = {'no'},
club = {'no'},
craft = {'no'},
leisure = {'no'},
office = {'no'},
mountain_pass = {'no'},
shop = {'no'},
tourism = {'yes', 'no'},
bridge = {'no'},
tunnel = {'no'},
waterway = {'riverbank'},
building = {'no'},
boundary = {'place'}}
}
POST_DELETE = tag_match{keys = {'tiger:*'}}
PRE_EXTRAS = tag_match{keys = {'*:prefix', '*:suffix', 'name:prefix:*', 'name:suffix:*',
'name:etymology', 'name:signed', 'name:botanical',
'wikidata', '*:wikidata',
'addr:street:name', 'addr:street:type'}
}
NAMES = tag_match{keys = {'name', 'name:*',
'int_name', 'int_name:*',
'nat_name', 'nat_name:*',
'reg_name', 'reg_name:*',
'loc_name', 'loc_name:*',
'old_name', 'old_name:*',
'alt_name', 'alt_name:*', 'alt_name_*',
'official_name', 'official_name:*',
'place_name', 'place_name:*',
'short_name', 'short_name:*', 'brand'}}
REFS = tag_match{keys = {'ref', 'int_ref', 'nat_ref', 'reg_ref', 'loc_ref', 'old_ref',
'iata', 'icao', 'pcode', 'pcode:*', 'ISO3166-2'}}
POSTCODES = tag_match{keys = {'postal_code', 'postcode', 'addr:postcode',
'tiger:zip_left', 'tiger:zip_right'}}
COUNTRY_TAGS = tag_match{keys = {'country_code', 'ISO3166-1',
'addr:country_code', 'is_in:country_code',
'addr:country', 'is_in:country'}}
HOUSENAME_TAGS = tag_match{keys = {'addr:housename'}}
HOUSENUMBER_TAGS = tag_match{keys = {'addr:housenumber', 'addr:conscriptionnumber',
'addr:streetnumber'}}
INTERPOLATION_TAGS = tag_match{keys = {'addr:interpolation'}}
ADDRESS_TAGS = tag_match{keys = {'addr:*', 'is_in:*'}}
ADD_TIGER_COUNTY = true
SAVE_EXTRA_MAINS = true

View File

@ -106,10 +106,10 @@ Feature: Tag evaluation
n7002 Thighway=primary,bridge=yes,bridge:name=1
"""
Then place contains exactly
| object | class | type | name | extratags |
| N7001 | highway | primary | 'name': '1' | - |
| N7002:highway | highway | primary | - | 'bridge:name': '1'|
| N7002:bridge | bridge | yes | 'name': '1' | 'bridge:name': '1'|
| object | class | type | name | extratags+bridge:name |
| N7001 | highway | primary | 'name': '1' | - |
| N7002:highway | highway | primary | - | 1 |
| N7002:bridge | bridge | yes | 'name': '1' | 1 |
Scenario: Global fallback and skipping
@ -153,13 +153,15 @@ Feature: Tag evaluation
n10002 Tboundary=natural,place=city,name=B
n10003 Tboundary=administrative,place=island,name=C
"""
Then place contains exactly
Then place contains
| object | class | type | extratags |
| N10001 | boundary | administrative | 'place': 'city' |
| N10002:boundary | boundary | natural | - |
| N10002:place | place | city | - |
| N10003:boundary | boundary | administrative | - |
| N10003:place | place | island | - |
And place contains
| object | class | type |
| N10002:boundary | boundary | natural |
| N10002:place | place | city |
| N10003:boundary | boundary | administrative |
| N10003:place | place | island |
Scenario: Shorten tiger:county tags
@ -200,6 +202,6 @@ Feature: Tag evaluation
n13002 Taddr:interpolation=even,place=city
"""
Then place contains exactly
| object | class | type | extratags | address |
| N13001 | place | houses | - | 'interpolation': 'odd' |
| N13002 | place | houses | 'place': 'city' | 'interpolation': 'even' |
| object | class | type | address |
| N13001 | place | houses | 'interpolation': 'odd' |
| N13002 | place | houses | 'interpolation': 'even' |

View File

@ -18,6 +18,7 @@ def get_osm2pgsql_options(nominatim_env, fname, append):
osm2pgsql=str(nominatim_env.build_dir / 'osm2pgsql' / 'osm2pgsql'),
osm2pgsql_cache=50,
osm2pgsql_style=str(nominatim_env.get_test_config().get_import_style_file()),
osm2pgsql_style_path=nominatim_env.get_test_config().config_dir,
threads=1,
dsn=nominatim_env.get_libpq_dsn(),
flatnode_file='',