move update code for postcode and word count to Python

Adds also tests for the new function to execute a SQL script.
This commit is contained in:
Sarah Hoffmann 2021-01-22 23:25:37 +01:00
parent e6d9485c4a
commit e6c2842b66
6 changed files with 114 additions and 44 deletions

View File

@ -104,14 +104,12 @@ if ($fPostgresVersion >= 11.0) {
);
}
$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('index');
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
if ($aResult['quiet']) {
$oIndexCmd->addParams('--quiet');
$oNominatimCmd->addParams('--quiet');
}
if ($aResult['verbose']) {
$oIndexCmd->addParams('--verbose');
$oNominatimCmd->addParams('--verbose');
}
$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
@ -220,9 +218,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
}
if ($aResult['calculate-postcodes']) {
info('Update postcodes centroids');
$sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
runSQLScript($sTemplate, true, true);
(clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run();
}
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
@ -271,15 +267,11 @@ if ($bHaveDiff) {
}
if ($aResult['recompute-word-counts']) {
info('Recompute frequency of full-word search terms');
$sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
runSQLScript($sTemplate, true, true);
(clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run();
}
if ($aResult['index']) {
$oCmd = (clone $oIndexCmd)
->addParams('--minrank', $aResult['index-rank']);
$oCmd->run();
(clone $oNominatimCmd)->addParams('index', '--minrank', $aResult['index-rank'])->run();
}
if ($aResult['update-address-levels']) {
@ -421,7 +413,8 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
if (!$aResult['no-index']) {
$fCMDStartTime = time();
$oThisIndexCmd = clone($oIndexCmd);
$oThisIndexCmd = clone($oNominatimCmd);
$oThisIndexCmd->addParams('index');
echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run();
if ($iErrorLevel) {

View File

@ -8,9 +8,13 @@ import argparse
import logging
from pathlib import Path
import psycopg2
from .config import Configuration
from .tools.exec_utils import run_legacy_script, run_api_script
LOG = logging.getLogger()
def _num_system_cpus():
try:
cpus = len(os.sched_getaffinity(0))
@ -366,32 +370,35 @@ class UpdateRefresh:
@staticmethod
def run(args):
if args.postcodes:
run_legacy_script('update.php', '--calculate-postcodes',
nominatim_env=args, throw_on_fail=True)
if args.word_counts:
run_legacy_script('update.php', '--recompute-word-counts',
nominatim_env=args, throw_on_fail=True)
if args.address_levels:
run_legacy_script('update.php', '--update-address-levels',
nominatim_env=args, throw_on_fail=True)
if args.functions:
params = ['setup.php', '--create-functions', '--create-partition-functions']
if args.diffs:
params.append('--enable-diff-updates')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)
# Attention: importance MUST come after wiki data import.
if args.importance:
run_legacy_script('update.php', '--recompute-importance',
nominatim_env=args, throw_on_fail=True)
if args.website:
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
import nominatim.tools.refresh
with psycopg2.connect(args.config.get_libpq_dsn()) as conn:
if args.postcodes:
LOG.warning("Update postcodes centroid")
nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')
nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
if args.address_levels:
run_legacy_script('update.php', '--update-address-levels',
nominatim_env=args, throw_on_fail=True)
if args.functions:
params = ['setup.php', '--create-functions', '--create-partition-functions']
if args.diffs:
params.append('--enable-diff-updates')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)
# Attention: importance MUST come after wiki data import.
if args.importance:
run_legacy_script('update.php', '--recompute-importance',
nominatim_env=args, throw_on_fail=True)
if args.website:
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
return 0

11
nominatim/db/utils.py Normal file
View File

@ -0,0 +1,11 @@
"""
Helper functions for handling DB accesses.
"""
def execute_file(conn, fname):
""" Read an SQL file and run its contents against the given connection.
"""
with fname.open('r') as fdesc:
sql = fdesc.read()
with conn.cursor() as cur:
cur.execute(sql)

View File

@ -0,0 +1,16 @@
"""
Functions for bringing auxiliary data in the database up-to-date.
"""
from ..db.utils import execute_file
def update_postcodes(conn, datadir):
""" Recalculate postcode centroids and add, remove and update entries in the
location_postcode table. `conn` is an opne connection to the database.
"""
execute_file(conn, datadir / 'sql' / 'update-postcodes.sql')
def recompute_word_counts(conn, datadir):
""" Compute the frequency of full-word search terms.
"""
execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')

View File

@ -6,6 +6,7 @@ import pytest
import nominatim.cli
import nominatim.indexer.indexer
import nominatim.tools.refresh
def call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='build/module',
@ -99,21 +100,30 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks):
@pytest.mark.parametrize("command,params", [
('postcodes', ('update.php', '--calculate-postcodes')),
('word-counts', ('update.php', '--recompute-word-counts')),
('address-levels', ('update.php', '--update-address-levels')),
('functions', ('setup.php',)),
('wiki-data', ('setup.php', '--import-wikipedia-articles')),
('importance', ('update.php', '--recompute-importance')),
('website', ('setup.php', '--setup-website')),
])
def test_refresh_command(mock_run_legacy, command, params):
def test_refresh_legacy_command(mock_run_legacy, command, params):
assert 0 == call_nominatim('refresh', '--' + command)
assert mock_run_legacy.called == 1
assert len(mock_run_legacy.last_args) >= len(params)
assert mock_run_legacy.last_args[:len(params)] == params
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
('word-counts', 'recompute_word_counts'),
])
def test_refresh_command(monkeypatch, command, func):
func_mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
assert 0 == call_nominatim('refresh', '--' + command)
assert func_mock.called == 1
def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')

View File

@ -0,0 +1,33 @@
"""
Tests for DB utility functions in db.utils
"""
import psycopg2
import pytest
import nominatim.db.utils as db_utils
def test_execute_file_success(temp_db, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
with psycopg2.connect('dbname=' + temp_db) as conn:
db_utils.execute_file(conn, tmpfile)
with conn.cursor() as cur:
cur.execute('SELECT * FROM test')
assert cur.rowcount == 1
assert cur.fetchone()[0] == 56
def test_execute_file_bad_file(temp_db, tmp_path):
with psycopg2.connect('dbname=' + temp_db) as conn:
with pytest.raises(FileNotFoundError):
db_utils.execute_file(conn, tmp_path / 'test2.sql')
def test_execute_file_bad_sql(temp_db, tmp_path):
tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)')
with psycopg2.connect('dbname=' + temp_db) as conn:
with pytest.raises(psycopg2.ProgrammingError):
db_utils.execute_file(conn, tmpfile)