mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-29 08:36:24 +03:00
188 lines
8.6 KiB
Python
188 lines
8.6 KiB
Python
# SPDX-License-Identifier: GPL-2.0-only
|
|
#
|
|
# This file is part of Nominatim. (https://nominatim.org)
|
|
#
|
|
# Copyright (C) 2022 by the Nominatim developer community.
|
|
# For a full list of authors see the git log.
|
|
"""
|
|
Implementation of 'refresh' subcommand.
|
|
"""
|
|
from typing import Tuple, Optional
|
|
import argparse
|
|
import logging
|
|
from pathlib import Path
|
|
|
|
from nominatim.config import Configuration
|
|
from nominatim.db.connection import connect
|
|
from nominatim.tokenizer.base import AbstractTokenizer
|
|
from nominatim.clicmd.args import NominatimArgs
|
|
|
|
# Do not repeat documentation of subcommand classes.
|
|
# pylint: disable=C0111
|
|
# Using non-top-level imports to avoid eventually unused imports.
|
|
# pylint: disable=E0012,C0415
|
|
|
|
LOG = logging.getLogger()
|
|
|
|
def _parse_osm_object(obj: str) -> Tuple[str, int]:
|
|
""" Parse the given argument into a tuple of OSM type and ID.
|
|
Raises an ArgumentError if the format is not recognized.
|
|
"""
|
|
if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
|
|
raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
|
|
|
|
return (obj[0].upper(), int(obj[1:]))
|
|
|
|
|
|
class UpdateRefresh:
|
|
"""\
|
|
Recompute auxiliary data used by the indexing process.
|
|
|
|
This sub-commands updates various static data and functions in the database.
|
|
It usually needs to be run after changing various aspects of the
|
|
configuration. The configuration documentation will mention the exact
|
|
command to use in such case.
|
|
|
|
Warning: the 'update' command must not be run in parallel with other update
|
|
commands like 'replication' or 'add-data'.
|
|
"""
|
|
def __init__(self) -> None:
|
|
self.tokenizer: Optional[AbstractTokenizer] = None
|
|
|
|
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
|
group = parser.add_argument_group('Data arguments')
|
|
group.add_argument('--postcodes', action='store_true',
|
|
help='Update postcode centroid table')
|
|
group.add_argument('--word-tokens', action='store_true',
|
|
help='Clean up search terms')
|
|
group.add_argument('--word-counts', action='store_true',
|
|
help='Compute frequency of full-word search terms')
|
|
group.add_argument('--address-levels', action='store_true',
|
|
help='Reimport address level configuration')
|
|
group.add_argument('--functions', action='store_true',
|
|
help='Update the PL/pgSQL functions in the database')
|
|
group.add_argument('--wiki-data', action='store_true',
|
|
help='Update Wikipedia/data importance numbers')
|
|
group.add_argument('--secondary-importance', action='store_true',
|
|
help='Update secondary importance raster data')
|
|
group.add_argument('--importance', action='store_true',
|
|
help='Recompute place importances (expensive!)')
|
|
group.add_argument('--website', action='store_true',
|
|
help='Refresh the directory that serves the scripts for the web API')
|
|
group.add_argument('--data-object', action='append',
|
|
type=_parse_osm_object, metavar='OBJECT',
|
|
help='Mark the given OSM object as requiring an update'
|
|
' (format: [NWR]<id>)')
|
|
group.add_argument('--data-area', action='append',
|
|
type=_parse_osm_object, metavar='OBJECT',
|
|
help='Mark the area around the given OSM object as requiring an update'
|
|
' (format: [NWR]<id>)')
|
|
|
|
group = parser.add_argument_group('Arguments for function refresh')
|
|
group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
|
|
help='Do not enable code for propagating updates')
|
|
group.add_argument('--enable-debug-statements', action='store_true',
|
|
help='Enable debug warning statements in functions')
|
|
|
|
|
|
def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
|
|
from ..tools import refresh, postcodes
|
|
from ..indexer.indexer import Indexer
|
|
|
|
need_function_refresh = args.functions
|
|
|
|
if args.postcodes:
|
|
if postcodes.can_compute(args.config.get_libpq_dsn()):
|
|
LOG.warning("Update postcodes centroid")
|
|
tokenizer = self._get_tokenizer(args.config)
|
|
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
|
args.project_dir, tokenizer)
|
|
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
|
args.threads or 1)
|
|
indexer.index_postcodes()
|
|
else:
|
|
LOG.error("The place table doesn't exist. "
|
|
"Postcode updates on a frozen database is not possible.")
|
|
|
|
if args.word_tokens:
|
|
LOG.warning('Updating word tokens')
|
|
tokenizer = self._get_tokenizer(args.config)
|
|
tokenizer.update_word_tokens()
|
|
|
|
if args.word_counts:
|
|
LOG.warning('Recompute word statistics')
|
|
self._get_tokenizer(args.config).update_statistics(args.config,
|
|
threads=args.threads or 1)
|
|
|
|
if args.address_levels:
|
|
LOG.warning('Updating address levels')
|
|
with connect(args.config.get_libpq_dsn()) as conn:
|
|
refresh.load_address_levels_from_config(conn, args.config)
|
|
|
|
# Attention: must come BEFORE functions
|
|
if args.secondary_importance:
|
|
with connect(args.config.get_libpq_dsn()) as conn:
|
|
# If the table did not exist before, then the importance code
|
|
# needs to be enabled.
|
|
if not conn.table_exists('secondary_importance'):
|
|
args.functions = True
|
|
|
|
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
|
|
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
|
args.project_dir) > 0:
|
|
LOG.fatal('FATAL: Cannot update secondary importance raster data')
|
|
return 1
|
|
need_function_refresh = True
|
|
|
|
if args.wiki_data:
|
|
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
|
|
or args.project_dir)
|
|
LOG.warning('Import wikipedia article importance from %s', data_path)
|
|
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
|
data_path) > 0:
|
|
LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
|
|
return 1
|
|
need_function_refresh = True
|
|
|
|
if need_function_refresh:
|
|
LOG.warning('Create functions')
|
|
with connect(args.config.get_libpq_dsn()) as conn:
|
|
refresh.create_functions(conn, args.config,
|
|
args.diffs, args.enable_debug_statements)
|
|
self._get_tokenizer(args.config).update_sql_functions(args.config)
|
|
|
|
# Attention: importance MUST come after wiki data import and after functions.
|
|
if args.importance:
|
|
LOG.warning('Update importance values for database')
|
|
with connect(args.config.get_libpq_dsn()) as conn:
|
|
refresh.recompute_importance(conn)
|
|
|
|
if args.website:
|
|
webdir = args.project_dir / 'website'
|
|
LOG.warning('Setting up website directory at %s', webdir)
|
|
# This is a little bit hacky: call the tokenizer setup, so that
|
|
# the tokenizer directory gets repopulated as well, in case it
|
|
# wasn't there yet.
|
|
self._get_tokenizer(args.config)
|
|
with connect(args.config.get_libpq_dsn()) as conn:
|
|
refresh.setup_website(webdir, args.config, conn)
|
|
|
|
if args.data_object or args.data_area:
|
|
with connect(args.config.get_libpq_dsn()) as conn:
|
|
for obj in args.data_object or []:
|
|
refresh.invalidate_osm_object(*obj, conn, recursive=False)
|
|
for obj in args.data_area or []:
|
|
refresh.invalidate_osm_object(*obj, conn, recursive=True)
|
|
conn.commit()
|
|
|
|
return 0
|
|
|
|
|
|
def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
|
|
if self.tokenizer is None:
|
|
from ..tokenizer import factory as tokenizer_factory
|
|
|
|
self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
|
|
|
return self.tokenizer
|