mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-09-11 19:17:17 +03:00
Code cleaning, tests simplification and use of python3-icu package
This commit is contained in:
parent
919469c8fe
commit
ff34198569
3
.github/actions/build-nominatim/action.yml
vendored
3
.github/actions/build-nominatim/action.yml
vendored
@ -6,8 +6,7 @@ runs:
|
||||
steps:
|
||||
- name: Install prerequisites
|
||||
run: |
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2
|
||||
sudo pip install PyICU
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
|
||||
shell: bash
|
||||
|
||||
- name: Download dependencies
|
||||
|
@ -1,6 +1,7 @@
|
||||
[MASTER]
|
||||
|
||||
extension-pkg-whitelist=osmium
|
||||
ignored-modules=icu
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
|
@ -2,9 +2,7 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.insert(0, '@NOMINATIM_LIBDIR@/lib-python')
|
||||
#Add config directory to the python path for module importation
|
||||
sys.path.insert(1, '@NOMINATIM_CONFIGDIR@/..')
|
||||
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
|
||||
|
||||
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
|
||||
|
||||
|
@ -16,4 +16,4 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
|
||||
$jsonFile = fopen($jsonPhraseSettingsFile, 'w');
|
||||
fwrite($jsonFile, json_encode($data));
|
||||
fclose($jsonFile);
|
||||
}
|
||||
}
|
||||
|
@ -5,10 +5,9 @@ import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import json
|
||||
from os.path import isfile
|
||||
from icu import Transliterator # pylint: disable-msg=no-name-in-module
|
||||
from icu import Transliterator
|
||||
from psycopg2.sql import Identifier, Literal, SQL
|
||||
from nominatim.tools.exec_utils import get_url
|
||||
|
||||
@ -32,7 +31,7 @@ def import_from_wiki(args, db_connection, languages=None):
|
||||
languages = _get_languages(args.config) if not languages else languages
|
||||
|
||||
#array for pairs of class/type
|
||||
pairs = dict()
|
||||
class_type_pairs = set()
|
||||
|
||||
transliterator = Transliterator.createFromRules("special-phrases normalizer",
|
||||
args.config.TERM_NORMALIZATION)
|
||||
@ -63,14 +62,14 @@ def import_from_wiki(args, db_connection, languages=None):
|
||||
continue
|
||||
|
||||
#add class/type to the pairs dict
|
||||
pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
|
||||
class_type_pairs.add((phrase_class, phrase_type))
|
||||
|
||||
_process_amenity(
|
||||
db_connection, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, phrase_operator
|
||||
)
|
||||
|
||||
_create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
|
||||
_create_place_classtype_table_and_indexes(db_connection, args.config, class_type_pairs)
|
||||
db_connection.commit()
|
||||
LOG.warning('Import done.')
|
||||
|
||||
@ -118,12 +117,8 @@ def _check_sanity(lang, phrase_class, phrase_type, pattern):
|
||||
Check sanity of given inputs in case somebody added garbage in the wiki.
|
||||
If a bad class/type is detected the system will exit with an error.
|
||||
"""
|
||||
try:
|
||||
if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
|
||||
sys.exit()
|
||||
except SystemExit:
|
||||
if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
|
||||
LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
|
||||
raise
|
||||
|
||||
|
||||
def _process_amenity(db_connection, phrase_label, normalized_label,
|
||||
@ -147,7 +142,7 @@ def _process_amenity(db_connection, phrase_label, normalized_label,
|
||||
(phrase_label, normalized_label, phrase_class, phrase_type))
|
||||
|
||||
|
||||
def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
|
||||
def _create_place_classtype_table_and_indexes(db_connection, config, class_type_pairs):
|
||||
"""
|
||||
Create table place_classtype for each given pair.
|
||||
Also create indexes on place_id and centroid.
|
||||
@ -161,7 +156,7 @@ def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
|
||||
with db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
||||
|
||||
for _, pair in pairs.items():
|
||||
for pair in class_type_pairs.items():
|
||||
phrase_class = pair[0]
|
||||
phrase_type = pair[1]
|
||||
|
||||
@ -188,53 +183,54 @@ def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, p
|
||||
"""
|
||||
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
|
||||
"""
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
with db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL(f"""
|
||||
CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace}
|
||||
db_cursor.execute(SQL("""
|
||||
CREATE TABLE IF NOT EXISTS {{}} {}
|
||||
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
|
||||
WHERE class = {{}} AND type = {{}}""")
|
||||
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
|
||||
Literal(phrase_class), Literal(phrase_type)))
|
||||
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
|
||||
.format(Identifier(table_name), Literal(phrase_class),
|
||||
Literal(phrase_type)))
|
||||
|
||||
|
||||
def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
|
||||
"""
|
||||
Create indexes on centroid and place_id for the place_classtype table.
|
||||
"""
|
||||
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
||||
base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
#Index on centroid
|
||||
if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
|
||||
if not db_connection.index_exists(index_prefix + 'centroid'):
|
||||
with db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL(f"""
|
||||
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
|
||||
.format(Identifier(
|
||||
f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
|
||||
Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
|
||||
db_cursor.execute(SQL("""
|
||||
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
|
||||
.format(Identifier(index_prefix + 'centroid'),
|
||||
Identifier(base_table)), sql_tablespace)
|
||||
|
||||
#Index on place_id
|
||||
if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
|
||||
if not db_connection.index_exists(index_prefix + 'place_id'):
|
||||
with db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL(f"""
|
||||
CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
|
||||
.format(Identifier(
|
||||
f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
|
||||
Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
|
||||
db_cursor.execute(SQL(
|
||||
"""CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
|
||||
.format(Identifier(index_prefix + 'place_id'),
|
||||
Identifier(base_table)))
|
||||
|
||||
|
||||
def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
|
||||
"""
|
||||
Grant access on read to the table place_classtype for the webuser.
|
||||
"""
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
with db_connection.cursor() as db_cursor:
|
||||
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
|
||||
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
|
||||
Identifier(config.DATABASE_WEBUSER)))
|
||||
.format(Identifier(table_name), Identifier(config.DATABASE_WEBUSER)))
|
||||
|
||||
def _convert_php_settings_if_needed(args, file_path):
|
||||
"""
|
||||
Convert php settings file of special phrases to json file if it is still in php format.
|
||||
"""
|
||||
file, extension = os.path.splitext(file_path)
|
||||
json_file_path = f'{file}.json'
|
||||
json_file_path = file + '.json'
|
||||
if extension == '.php' and not isfile(json_file_path):
|
||||
try:
|
||||
subprocess.run(['/usr/bin/env', 'php', '-Cq',
|
||||
|
@ -77,7 +77,7 @@ NOMINATIM_TIGER_DATA_PATH=
|
||||
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
||||
|
||||
# Configuration file for special phrase import.
|
||||
# When unset, the internal default settings from 'settings/phrase_settings.py'
|
||||
# When unset, the internal default settings from 'settings/phrase-settings.json'
|
||||
# are used.
|
||||
NOMINATIM_PHRASE_CONFIG=
|
||||
|
||||
|
@ -4,75 +4,27 @@
|
||||
import pytest
|
||||
from nominatim.tools.special_phrases import _create_place_classtype_indexes, _create_place_classtype_table, _get_wiki_content, _grant_access_to_webuser, _process_amenity
|
||||
|
||||
def test_get_wiki_content():
|
||||
assert _get_wiki_content('fr')
|
||||
def test_process_amenity_with_operator(temp_db_conn, getorcreate_amenityoperator_funcs):
|
||||
_process_amenity(temp_db_conn, '', '', '', '', 'near')
|
||||
_process_amenity(temp_db_conn, '', '', '', '', 'in')
|
||||
|
||||
def execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type):
|
||||
_process_amenity(temp_db_conn, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, '')
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute(f"""
|
||||
SELECT * FROM word
|
||||
WHERE word_token=' {normalized_label}'
|
||||
AND word='{normalized_label}'
|
||||
AND class='{phrase_class}'
|
||||
AND type='{phrase_type}'
|
||||
AND type='{phrase_type}'""")
|
||||
return temp_db_cursor.fetchone()
|
||||
|
||||
def execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, phrase_operator):
|
||||
_process_amenity(temp_db_conn, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, phrase_operator)
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute(f"""
|
||||
SELECT * FROM word
|
||||
WHERE word_token=' {normalized_label}'
|
||||
AND word='{normalized_label}'
|
||||
AND class='{phrase_class}'
|
||||
AND type='{phrase_type}'
|
||||
AND operator='{phrase_operator}'""")
|
||||
return temp_db_cursor.fetchone()
|
||||
|
||||
def test_process_amenity_with_near_operator(temp_db_conn, word_table, amenity_operator_funcs):
|
||||
phrase_label = ' label '
|
||||
normalized_label = 'label'
|
||||
phrase_class = 'class'
|
||||
phrase_type = 'type'
|
||||
|
||||
assert execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type)
|
||||
assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, 'near')
|
||||
assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label,
|
||||
phrase_class, phrase_type, 'in')
|
||||
|
||||
def index_exists(db_connect, index):
|
||||
""" Check that an index with the given name exists in the database.
|
||||
"""
|
||||
with db_connect.cursor() as cur:
|
||||
cur.execute("""SELECT tablename FROM pg_indexes
|
||||
WHERE indexname = %s and schemaname = 'public'""", (index, ))
|
||||
if cur.rowcount == 0:
|
||||
return False
|
||||
return True
|
||||
def test_process_amenity_without_operator(temp_db_conn, getorcreate_amenity_funcs):
|
||||
_process_amenity(temp_db_conn, '', '', '', '', '')
|
||||
|
||||
def test_create_place_classtype_indexes(temp_db_conn):
|
||||
phrase_class = 'class'
|
||||
phrase_type = 'type'
|
||||
table_name = f'place_classtype_{phrase_class}_{phrase_type}'
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute("CREATE EXTENSION postgis;")
|
||||
temp_db_cursor.execute(f'CREATE TABLE {table_name}(place_id BIGINT, centroid GEOMETRY)')
|
||||
temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
|
||||
|
||||
_create_place_classtype_indexes(temp_db_conn, '', phrase_class, phrase_type)
|
||||
|
||||
centroid_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid')
|
||||
place_id_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id')
|
||||
centroid_index_exists = temp_db_conn.index_exists(index_prefix + 'centroid')
|
||||
place_id_index_exists = temp_db_conn.index_exists(index_prefix + 'place_id')
|
||||
|
||||
assert centroid_index_exists and place_id_index_exists
|
||||
|
||||
@ -93,10 +45,10 @@ def test_create_place_classtype_table(temp_db_conn, placex_table):
|
||||
def test_grant_access_to_web_user(temp_db_conn, def_config):
|
||||
phrase_class = 'class'
|
||||
phrase_type = 'type'
|
||||
table_name = f'place_classtype_{phrase_class}_{phrase_type}'
|
||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||
|
||||
with temp_db_conn.cursor() as temp_db_cursor:
|
||||
temp_db_cursor.execute(f'CREATE TABLE {table_name}()')
|
||||
temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
|
||||
|
||||
_grant_access_to_webuser(temp_db_conn, def_config, phrase_class, phrase_type)
|
||||
|
||||
@ -110,65 +62,28 @@ def test_grant_access_to_web_user(temp_db_conn, def_config):
|
||||
assert result
|
||||
|
||||
@pytest.fixture
|
||||
def amenity_operator_funcs(temp_db_cursor):
|
||||
def make_strandard_name_func(temp_db_cursor):
|
||||
temp_db_cursor.execute(f"""
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
|
||||
AS $$
|
||||
DECLARE
|
||||
o TEXT;
|
||||
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
|
||||
BEGIN
|
||||
RETURN trim(name); --Basically return only the trimed name for the tests
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;""")
|
||||
|
||||
@pytest.fixture
|
||||
def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
|
||||
temp_db_cursor.execute(f"""
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
RETURNS void as $$
|
||||
BEGIN END;
|
||||
$$ LANGUAGE plpgsql""")
|
||||
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
|
||||
normalized_word TEXT,
|
||||
lookup_class text,
|
||||
lookup_type text,
|
||||
op text)
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
DECLARE
|
||||
lookup_token TEXT;
|
||||
return_word_id INTEGER;
|
||||
BEGIN
|
||||
lookup_token := ' '||trim(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and word = normalized_word
|
||||
and class = lookup_class and type = lookup_type and operator = op
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
||||
lookup_class, lookup_type, null, 0, op);
|
||||
END IF;
|
||||
RETURN return_word_id;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;""")
|
||||
@pytest.fixture
|
||||
def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
|
||||
temp_db_cursor.execute(f"""
|
||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
|
||||
lookup_class text, lookup_type text, op text)
|
||||
RETURNS void as $$
|
||||
BEGIN END;
|
||||
$$ LANGUAGE plpgsql""")
|
@ -29,13 +29,13 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
|
||||
libbz2-dev libpq-dev libproj-dev \
|
||||
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
|
||||
postgresql-contrib-10 postgresql-10-postgis-scripts \
|
||||
php php-pgsql php-intl python3-pip \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev
|
||||
php php-pgsql php-intl libicu-dev python3-pip \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
|
||||
|
||||
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
|
||||
# install the latest version from pip:
|
||||
|
||||
pip3 install python-dotenv PyICU
|
||||
pip3 install python-dotenv
|
||||
|
||||
#
|
||||
# System Configuration
|
||||
|
@ -32,11 +32,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
|
||||
libbz2-dev libpq-dev libproj-dev \
|
||||
postgresql-server-dev-12 postgresql-12-postgis-3 \
|
||||
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
|
||||
php php-pgsql php-intl python3-dotenv \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev
|
||||
#Python packages:
|
||||
|
||||
pip3 install PyICU
|
||||
php php-pgsql php-intl libicu-dev python3-dotenv \
|
||||
python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
|
||||
|
||||
#
|
||||
# System Configuration
|
||||
|
Loading…
Reference in New Issue
Block a user