port check-database function to python

This change also adapts the hints to use the nominatim tool.
Slightly changed checks, so that they are just as effective on
a frozen database.
This commit is contained in:
Sarah Hoffmann 2021-02-18 17:32:30 +01:00
parent a60c34bded
commit b169e4c88c
7 changed files with 322 additions and 278 deletions

View File

@ -252,76 +252,6 @@ class DB
return $this->exec('DROP TABLE IF EXISTS '.$sTableName.' CASCADE') == 0;
}
/**
* Check if an index exists in the database. Optional filtered by tablename
*
* @param string $sTableName
*
* @return boolean
*/
public function indexExists($sIndexName, $sTableName = null)
{
return in_array($sIndexName, $this->getListOfIndices($sTableName));
}
/**
* Returns a list of index names in the database, optional filtered by tablename
*
* @param string $sTableName
*
* @return array
*/
public function getListOfIndices($sTableName = null)
{
// table_name | index_name | column_name
// -----------------------+---------------------------------+--------------
// country_name | idx_country_name_country_code | country_code
// country_osm_grid | idx_country_osm_grid_geometry | geometry
// import_polygon_delete | idx_import_polygon_delete_osmid | osm_id
// import_polygon_delete | idx_import_polygon_delete_osmid | osm_type
// import_polygon_error | idx_import_polygon_error_osmid | osm_id
// import_polygon_error | idx_import_polygon_error_osmid | osm_type
$sSql = <<< END
SELECT
t.relname as table_name,
i.relname as index_name,
a.attname as column_name
FROM
pg_class t,
pg_class i,
pg_index ix,
pg_attribute a
WHERE
t.oid = ix.indrelid
and i.oid = ix.indexrelid
and a.attrelid = t.oid
and a.attnum = ANY(ix.indkey)
and t.relkind = 'r'
and i.relname NOT LIKE 'pg_%'
FILTERS
ORDER BY
t.relname,
i.relname,
a.attname
END;
$aRows = null;
if ($sTableName) {
$sSql = str_replace('FILTERS', 'and t.relname = :tablename', $sSql);
$aRows = $this->getAll($sSql, array(':tablename' => $sTableName));
} else {
$sSql = str_replace('FILTERS', '', $sSql);
$aRows = $this->getAll($sSql);
}
$aIndexNames = array_unique(array_map(function ($aRow) {
return $aRow['index_name'];
}, $aRows));
sort($aIndexNames);
return $aIndexNames;
}
/**
* Tries to connect to the database but on failure doesn't throw an exception.
*

View File

@ -5,197 +5,7 @@ require_once(CONST_LibDir.'/init-cmd.php');
loadSettings(getcwd());
$term_colors = array(
'green' => "\033[92m",
'red' => "\x1B[31m",
'normal' => "\033[0m"
);
(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('admin', '--check-database')
->run();
$print_success = function ($message = 'OK') use ($term_colors) {
echo $term_colors['green'].$message.$term_colors['normal']."\n";
};
$print_fail = function ($message = 'Failed') use ($term_colors) {
echo $term_colors['red'].$message.$term_colors['normal']."\n";
};
$oDB = new Nominatim\DB;
function isReverseOnlyInstallation()
{
global $oDB;
return !$oDB->tableExists('search_name');
}
// Check (guess) if the setup.php included --drop
function isNoUpdateInstallation()
{
global $oDB;
return $oDB->tableExists('placex') && !$oDB->tableExists('planet_osm_rels') ;
}
echo 'Checking database got created ... ';
if ($oDB->checkConnection()) {
$print_success();
} else {
$print_fail();
echo <<< END
Hints:
* Is the database server started?
* Check the NOMINATIM_DATABASE_DSN variable in your local .env
* Try connecting to the database with the same settings
END;
exit(1);
}
echo 'Checking nominatim.so module installed ... ';
$sStandardWord = $oDB->getOne("SELECT make_standard_name('a')");
if ($sStandardWord === 'a') {
$print_success();
} else {
$print_fail();
echo <<< END
The Postgresql extension nominatim.so was not found in the database.
Hints:
* Check the output of the CMmake/make installation step
* Does nominatim.so exist?
* Does nominatim.so exist on the database server?
* Can nominatim.so be accessed by the database user?
END;
exit(1);
}
if (!isNoUpdateInstallation()) {
echo 'Checking place table ... ';
if ($oDB->tableExists('place')) {
$print_success();
} else {
$print_fail();
echo <<< END
* The import didn't finish.
Hints:
* Check the output of the utils/setup.php you ran.
Usually the osm2pgsql step failed. Check for errors related to
* the file you imported not containing any places
* harddrive full
* out of memory (RAM)
* osm2pgsql killed by other scripts, for consuming to much memory
END;
exit(1);
}
}
echo 'Checking indexing status ... ';
$iUnindexed = $oDB->getOne('SELECT count(*) FROM placex WHERE indexed_status > 0');
if ($iUnindexed == 0) {
$print_success();
} else {
$print_fail();
echo <<< END
The indexing didn't finish. There is still $iUnindexed places. See the
question 'Can a stopped/killed import process be resumed?' in the
troubleshooting guide.
END;
exit(1);
}
echo "Search index creation\n";
$aExpectedIndices = array(
// sql/indices.src.sql
'idx_word_word_id',
'idx_place_addressline_address_place_id',
'idx_placex_rank_search',
'idx_placex_rank_address',
'idx_placex_parent_place_id',
'idx_placex_geometry_reverse_lookuppolygon',
'idx_placex_geometry_reverse_placenode',
'idx_osmline_parent_place_id',
'idx_osmline_parent_osm_id',
'idx_postcode_id',
'idx_postcode_postcode'
);
if (!isReverseOnlyInstallation()) {
$aExpectedIndices = array_merge($aExpectedIndices, array(
// sql/indices_search.src.sql
'idx_search_name_nameaddress_vector',
'idx_search_name_name_vector',
'idx_search_name_centroid'
));
}
if (!isNoUpdateInstallation()) {
$aExpectedIndices = array_merge($aExpectedIndices, array(
'idx_placex_pendingsector',
'idx_location_area_country_place_id',
'idx_place_osm_unique',
));
}
foreach ($aExpectedIndices as $sExpectedIndex) {
echo "Checking index $sExpectedIndex ... ";
if ($oDB->indexExists($sExpectedIndex)) {
$print_success();
} else {
$print_fail();
echo <<< END
Hints:
* Run './utils/setup.php --create-search-indices --ignore-errors' to
create missing indices.
END;
exit(1);
}
}
echo 'Checking search indices are valid ... ';
$sSQL = <<< END
SELECT relname
FROM pg_class, pg_index
WHERE pg_index.indisvalid = false
AND pg_index.indexrelid = pg_class.oid;
END;
$aInvalid = $oDB->getCol($sSQL);
if (empty($aInvalid)) {
$print_success();
} else {
$print_fail();
echo <<< END
At least one index is invalid. That can happen, e.g. when index creation was
disrupted and later restarted. You should delete the affected indices and
run the index stage of setup again.
See the question 'Can a stopped/killed import process be resumed?' in the
troubleshooting guide.
Affected indices:
END;
echo join(', ', $aInvalid) . "\n";
exit(1);
}
if (getSettingBool('USE_US_TIGER_DATA')) {
echo 'Checking TIGER table exists ... ';
if ($oDB->tableExists('location_property_tiger')) {
$print_success();
} else {
$print_fail();
echo <<< END
Table 'location_property_tiger' does not exist. Run the TIGER data
import again.
END;
exit(1);
}
}
exit(0);

View File

@ -1,6 +1,8 @@
"""
Implementation of the 'admin' subcommand.
"""
import logging
from ..tools.exec_utils import run_legacy_script
from ..db.connection import connect
@ -9,6 +11,8 @@ from ..db.connection import connect
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
LOG = logging.getLogger()
class AdminFuncs:
"""\
Analyse and maintain the database.
@ -39,14 +43,17 @@ class AdminFuncs:
@staticmethod
def run(args):
from ..tools import admin
if args.warm:
AdminFuncs._warm(args)
if args.check_database:
run_legacy_script('check_import_finished.php', nominatim_env=args)
LOG.warning('Checking database')
from ..tools import check_database
return check_database.check_database(args.config)
if args.analyse_indexing:
LOG.warning('Analysing performance of indexing function')
from ..tools import admin
conn = connect(args.config.get_libpq_dsn())
admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id)
conn.close()
@ -56,6 +63,7 @@ class AdminFuncs:
@staticmethod
def _warm(args):
LOG.warning('Warming database caches')
params = ['warm.php']
if args.target == 'reverse':
params.append('--reverse-only')

View File

@ -7,6 +7,8 @@ import psycopg2
import psycopg2.extensions
import psycopg2.extras
from ..errors import UsageError
class _Cursor(psycopg2.extras.DictCursor):
""" A cursor returning dict-like objects and providing specialised
execution functions.
@ -42,14 +44,34 @@ class _Connection(psycopg2.extensions.connection):
"""
return super().cursor(cursor_factory=cursor_factory, **kwargs)
def table_exists(self, table):
""" Check that a table with the given name exists in the database.
"""
with self.cursor() as cur:
num = cur.scalar("""SELECT count(*) FROM pg_tables
WHERE tablename = %s""", (table, ))
WHERE tablename = %s and schemaname = 'public'""", (table, ))
return num == 1
def index_exists(self, index, table=None):
""" Check that an index with the given name exists in the database.
If table is not None then the index must relate to the given
table.
"""
with self.cursor() as cur:
cur.execute("""SELECT tablename FROM pg_indexes
WHERE indexname = %s and schemaname = 'public'""", (index, ))
if cur.rowcount == 0:
return False
if table is not None:
row = cur.fetchone()
return row[0] == table
return True
def server_version_tuple(self):
""" Return the server version as a tuple of (major, minor).
Converts correctly for pre-10 and post-10 PostgreSQL versions.
@ -64,4 +86,7 @@ def connect(dsn):
""" Open a connection to the database using the specialised connection
factory.
"""
return psycopg2.connect(dsn, connection_factory=_Connection)
try:
return psycopg2.connect(dsn, connection_factory=_Connection)
except psycopg2.OperationalError as err:
raise UsageError("Cannot connect to database: {}".format(err)) from err

View File

@ -0,0 +1,269 @@
"""
Collection of functions that check if the database is complete and functional.
"""
from enum import Enum
from textwrap import dedent
import psycopg2
from ..db.connection import connect
from ..errors import UsageError
CHECKLIST = []
class CheckState(Enum):
""" Possible states of a check. FATAL stops check execution entirely.
"""
OK = 0
FAIL = 1
FATAL = 2
NOT_APPLICABLE = 3
def _check(hint=None):
""" Decorator for checks. It adds the function to the list of
checks to execute and adds the code for printing progress messages.
"""
def decorator(func):
title = func.__doc__.split('\n', 1)[0].strip()
def run_check(conn, config):
print(title, end=' ... ')
ret = func(conn, config)
if isinstance(ret, tuple):
ret, params = ret
else:
params = {}
if ret == CheckState.OK:
print('\033[92mOK\033[0m')
elif ret == CheckState.NOT_APPLICABLE:
print('not applicable')
else:
print('\x1B[31mFailed\033[0m')
if hint:
print(dedent(hint.format(**params)))
return ret
CHECKLIST.append(run_check)
return run_check
return decorator
class _BadConnection: # pylint: disable=R0903
def __init__(self, msg):
self.msg = msg
def close(self):
""" Dummy function to provide the implementation.
"""
def check_database(config):
""" Run a number of checks on the database and return the status.
"""
try:
conn = connect(config.get_libpq_dsn())
except UsageError as err:
conn = _BadConnection(str(err))
overall_result = 0
for check in CHECKLIST:
ret = check(conn, config)
if ret == CheckState.FATAL:
conn.close()
return 1
if ret != CheckState.OK:
overall_result = 1
conn.close()
return overall_result
def _get_indexes(conn):
indexes = ['idx_word_word_id',
'idx_place_addressline_address_place_id',
'idx_placex_rank_search',
'idx_placex_rank_address',
'idx_placex_parent_place_id',
'idx_placex_geometry_reverse_lookuppolygon',
'idx_placex_geometry_reverse_placenode',
'idx_osmline_parent_place_id',
'idx_osmline_parent_osm_id',
'idx_postcode_id',
'idx_postcode_postcode'
]
if conn.table_exists('search_name'):
indexes.extend(('idx_search_name_nameaddress_vector',
'idx_search_name_name_vector',
'idx_search_name_centroid'))
if conn.table_exists('place'):
indexes.extend(('idx_placex_pendingsector',
'idx_location_area_country_place_id',
'idx_place_osm_unique'
))
return indexes
### CHECK FUNCTIONS
#
# Functions are exectured in the order they appear here.
@_check(hint="""\
{error}
Hints:
* Is the database server started?
* Check the NOMINATIM_DATABASE_DSN variable in your local .env
* Try connecting to the database with the same settings
Project directory: {config.project_dir}
Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
""")
def check_connection(conn, config):
""" Checking database connection
"""
if isinstance(conn, _BadConnection):
return CheckState.FATAL, dict(error=conn.msg, config=config)
return CheckState.OK
@_check(hint="""\
placex table not found
Hints:
* Are you connecting to the right database?
* Did the import process finish without errors?
Project directory: {config.project_dir}
Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
""")
def check_placex_table(conn, config):
""" Checking for placex table
"""
if conn.table_exists('placex'):
return CheckState.OK
return CheckState.FATAL, dict(config=config)
@_check(hint="""placex table has no data. Did the import finish sucessfully?""")
def check_placex_size(conn, config): # pylint: disable=W0613
""" Checking for placex content
"""
with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
return CheckState.OK if cnt > 0 else CheckState.FATAL
@_check(hint="""\
The Postgresql extension nominatim.so was not correctly loaded.
Error: {error}
Hints:
* Check the output of the CMmake/make installation step
* Does nominatim.so exist?
* Does nominatim.so exist on the database server?
* Can nominatim.so be accessed by the database user?
""")
def check_module(conn, config): # pylint: disable=W0613
""" Checking that nominatim.so module is installed
"""
with conn.cursor() as cur:
try:
out = cur.scalar("SELECT make_standard_name('a')")
except psycopg2.ProgrammingError as err:
return CheckState.FAIL, dict(error=str(err))
if out != 'a':
return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()')
return CheckState.OK
@_check(hint="""\
The indexing didn't finish. {count} entries are not yet indexed.
To index the remaining entries, run: {index_cmd}
""")
def check_indexing(conn, config): # pylint: disable=W0613
""" Checking indexing status
"""
with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
if cnt == 0:
return CheckState.OK
if conn.index_exists('idx_word_word_id'):
# Likely just an interrupted update.
index_cmd = 'nominatim index'
else:
# Looks like the import process got interupted.
index_cmd = 'nominatim import --continue indexing'
return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
@_check(hint="""\
The following indexes are missing:
{indexes}
Rerun the index creation with: nominatim import --continue db-postprocess
""")
def check_database_indexes(conn, config): # pylint: disable=W0613
""" Checking that database indexes are complete
"""
missing = []
for index in _get_indexes(conn):
if not conn.index_exists(index):
missing.append(index)
if missing:
return CheckState.FAIL, dict(indexes='\n '.join(missing))
return CheckState.OK
@_check(hint="""\
At least one index is invalid. That can happen, e.g. when index creation was
disrupted and later restarted. You should delete the affected indices
and recreate them.
Invalid indexes:
{indexes}
""")
def check_database_index_valid(conn, config): # pylint: disable=W0613
""" Checking that all database indexes are valid
"""
with conn.cursor() as cur:
cur.execute(""" SELECT relname FROM pg_class, pg_index
WHERE pg_index.indisvalid = false
AND pg_index.indexrelid = pg_class.oid""")
broken = list(cur)
if broken:
return CheckState.FAIL, dict(indexes='\n '.join(broken))
return CheckState.OK
@_check(hint="""\
{error}
Run TIGER import again: nominatim add-data --tiger-data <DIR>
""")
def check_tiger_table(conn, config):
""" Checking TIGER external data table.
"""
if not config.get_bool('USE_US_TIGER_DATA'):
return CheckState.NOT_APPLICABLE
if not conn.table_exists('location_property_tiger'):
return CheckState.FAIL, dict(error='TIGER data table not found.')
with conn.cursor() as cur:
if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
return CheckState.FAIL, dict(error='TIGER data table is empty.')
return CheckState.OK

View File

@ -164,14 +164,6 @@ class DBTest extends \PHPUnit\Framework\TestCase
$this->assertTrue($oDB->tableExists('table1'));
$this->assertFalse($oDB->tableExists('table99'));
$this->assertFalse($oDB->tableExists(null));
$this->assertEmpty($oDB->getListOfIndices());
$oDB->exec('CREATE UNIQUE INDEX table1_index ON table1 (id)');
$this->assertEquals(
array('table1_index'),
$oDB->getListOfIndices()
);
$this->assertEmpty($oDB->getListOfIndices('table2'));
}
# select queries

View File

@ -15,9 +15,11 @@ import nominatim.clicmd.api
import nominatim.clicmd.refresh
import nominatim.clicmd.admin
import nominatim.indexer.indexer
import nominatim.tools.admin
import nominatim.tools.check_database
import nominatim.tools.freeze
import nominatim.tools.refresh
import nominatim.tools.replication
import nominatim.tools.freeze
from nominatim.errors import UsageError
from nominatim.db import status
@ -95,8 +97,7 @@ def test_freeze_command(mock_func_factory, temp_db):
@pytest.mark.parametrize("params", [('--warm', ),
('--warm', '--reverse-only'),
('--warm', '--search-only'),
('--check-database', )])
('--warm', '--search-only')])
def test_admin_command_legacy(mock_func_factory, params):
mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
@ -104,6 +105,7 @@ def test_admin_command_legacy(mock_func_factory, params):
assert mock_run_legacy.called == 1
@pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))])
def test_admin_command_tool(temp_db, mock_func_factory, func, params):
mock = mock_func_factory(nominatim.tools.admin, func)
@ -111,6 +113,14 @@ def test_admin_command_tool(temp_db, mock_func_factory, func, params):
assert 0 == call_nominatim('admin', *params)
assert mock.called == 1
def test_admin_command_check_database(mock_func_factory):
mock = mock_func_factory(nominatim.tools.check_database, 'check_database')
assert 0 == call_nominatim('admin', '--check-database')
assert mock.called == 1
@pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
('node', 12), ('way', 8), ('relation', 32)])
def test_add_data_command(mock_run_legacy, name, oid):