mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-12-26 06:22:13 +03:00
Merge pull request #2291 from AntoJvlt/special-phrases-statistics
Special phrases statistics
This commit is contained in:
commit
46e8c6b112
@ -2,7 +2,7 @@
|
|||||||
Implementation of the 'special-phrases' command.
|
Implementation of the 'special-phrases' command.
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
from nominatim.tools.special_phrases import SpecialPhrasesImporter
|
from nominatim.tools import SpecialPhrasesImporter
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
@ -2,3 +2,5 @@
|
|||||||
Module with functions for importing, updating Nominatim databases
|
Module with functions for importing, updating Nominatim databases
|
||||||
as well as general maintenance helpers.
|
as well as general maintenance helpers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from nominatim.tools.special_phrases.special_phrases_importer import SpecialPhrasesImporter
|
||||||
|
0
nominatim/tools/special_phrases/__init__.py
Normal file
0
nominatim/tools/special_phrases/__init__.py
Normal file
138
nominatim/tools/special_phrases/importer_statistics.py
Normal file
138
nominatim/tools/special_phrases/importer_statistics.py
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
"""
|
||||||
|
Contains the class which handles statistics for the
|
||||||
|
import of special phrases.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
|
class SpecialPhrasesImporterStatistics():
|
||||||
|
# pylint: disable-msg=too-many-instance-attributes
|
||||||
|
"""
|
||||||
|
Class handling statistics of the import
|
||||||
|
process of special phrases.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self._set_lang_values_to_0()
|
||||||
|
self._set_global_values_to_0()
|
||||||
|
|
||||||
|
def _set_global_values_to_0(self):
|
||||||
|
"""
|
||||||
|
Set all counts for the global
|
||||||
|
import to 0.
|
||||||
|
"""
|
||||||
|
self.tables_created = 0
|
||||||
|
self.tables_deleted = 0
|
||||||
|
self.tables_ignored = 0
|
||||||
|
self.global_phrases_invalid = 0
|
||||||
|
self.global_phrases_added = 0
|
||||||
|
self.global_phrases_ignored = 0
|
||||||
|
self.global_phrases_deleted = 0
|
||||||
|
|
||||||
|
def _set_lang_values_to_0(self):
|
||||||
|
"""
|
||||||
|
Set all counts for the current
|
||||||
|
lang to 0.
|
||||||
|
"""
|
||||||
|
self.lang_phrases_invalid = 0
|
||||||
|
self.lang_phrases_added = 0
|
||||||
|
self.lang_phrases_ignored = 0
|
||||||
|
|
||||||
|
def notify_one_phrase_invalid(self):
|
||||||
|
"""
|
||||||
|
Add +1 to the count of invalid entries
|
||||||
|
fetched from the wiki.
|
||||||
|
"""
|
||||||
|
self.lang_phrases_invalid += 1
|
||||||
|
self.global_phrases_invalid += 1
|
||||||
|
|
||||||
|
def notify_one_phrase_added(self):
|
||||||
|
"""
|
||||||
|
Add +1 to the count of entries
|
||||||
|
added to the db.
|
||||||
|
"""
|
||||||
|
self.lang_phrases_added += 1
|
||||||
|
self.global_phrases_added += 1
|
||||||
|
|
||||||
|
def notify_one_phrase_ignored(self):
|
||||||
|
"""
|
||||||
|
Add +1 to the count of ignored
|
||||||
|
entries as it was already in the db.
|
||||||
|
"""
|
||||||
|
self.lang_phrases_ignored += 1
|
||||||
|
self.global_phrases_ignored += 1
|
||||||
|
|
||||||
|
def notify_one_phrase_deleted(self):
|
||||||
|
"""
|
||||||
|
Add +1 to the count of phrases deleted
|
||||||
|
from the database.
|
||||||
|
"""
|
||||||
|
self.global_phrases_deleted += 1
|
||||||
|
|
||||||
|
def notify_one_table_created(self):
|
||||||
|
"""
|
||||||
|
Add +1 to the count of created tables.
|
||||||
|
"""
|
||||||
|
self.tables_created += 1
|
||||||
|
|
||||||
|
def notify_one_table_deleted(self):
|
||||||
|
"""
|
||||||
|
Add +1 to the count of deleted tables.
|
||||||
|
"""
|
||||||
|
self.tables_deleted += 1
|
||||||
|
|
||||||
|
def notify_one_table_ignored(self):
|
||||||
|
"""
|
||||||
|
Add +1 to the count of ignored tables.
|
||||||
|
"""
|
||||||
|
self.tables_ignored += 1
|
||||||
|
|
||||||
|
|
||||||
|
def notify_import_done(self):
|
||||||
|
"""
|
||||||
|
Print stats for the whole import process
|
||||||
|
and reset all values.
|
||||||
|
"""
|
||||||
|
LOG.info('====================================================================')
|
||||||
|
LOG.info('Final statistics of the import:')
|
||||||
|
LOG.info('- %s phrases were invalid.', self.global_phrases_invalid)
|
||||||
|
if self.global_phrases_invalid > 0:
|
||||||
|
LOG.info(' Those invalid phrases have been skipped.')
|
||||||
|
LOG.info('- %s phrases were ignored as they are already in the database',
|
||||||
|
self.global_phrases_ignored)
|
||||||
|
LOG.info('- %s phrases were added to the database', self.global_phrases_added)
|
||||||
|
LOG.info('- %s phrases were deleted from the database', self.global_phrases_deleted)
|
||||||
|
if self.global_phrases_deleted > 0:
|
||||||
|
LOG.info(' They were deleted as they are not valid anymore.')
|
||||||
|
LOG.info('- %s tables were ignored as they already exist on the database',
|
||||||
|
self.tables_ignored)
|
||||||
|
LOG.info('- %s tables were created', self.tables_created)
|
||||||
|
LOG.info('- %s tables were deleted from the database', self.tables_deleted)
|
||||||
|
if self.tables_deleted > 0:
|
||||||
|
LOG.info(' They were deleted as they are not valid anymore.')
|
||||||
|
|
||||||
|
if self.global_phrases_invalid > 0:
|
||||||
|
LOG.warning('%s phrases were invalid and have been skipped during the whole process.',
|
||||||
|
self.global_phrases_invalid)
|
||||||
|
|
||||||
|
self._set_global_values_to_0()
|
||||||
|
|
||||||
|
def notify_current_lang_done(self, lang):
|
||||||
|
"""
|
||||||
|
Print stats for the current lang
|
||||||
|
and then reset lang values.
|
||||||
|
"""
|
||||||
|
LOG.info('====================================================================')
|
||||||
|
LOG.info('Statistics for the import of %s:', lang)
|
||||||
|
LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid)
|
||||||
|
if self.lang_phrases_invalid > 0:
|
||||||
|
LOG.info(' Those invalid phrases have been skipped.')
|
||||||
|
LOG.info('- %s phrases were ignored as they are already in the database',
|
||||||
|
self.lang_phrases_ignored)
|
||||||
|
LOG.info('- %s phrases were added to the database', self.lang_phrases_added)
|
||||||
|
LOG.info('====================================================================')
|
||||||
|
|
||||||
|
if self.lang_phrases_invalid > 0:
|
||||||
|
LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.',
|
||||||
|
self.lang_phrases_invalid, lang)
|
||||||
|
|
||||||
|
self._set_lang_values_to_0()
|
@ -14,6 +14,7 @@ from psycopg2.sql import Identifier, Literal, SQL
|
|||||||
|
|
||||||
from nominatim.tools.exec_utils import get_url
|
from nominatim.tools.exec_utils import get_url
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
|
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
|
||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
class SpecialPhrasesImporter():
|
class SpecialPhrasesImporter():
|
||||||
@ -22,6 +23,7 @@ class SpecialPhrasesImporter():
|
|||||||
Class handling the process of special phrases importations.
|
Class handling the process of special phrases importations.
|
||||||
"""
|
"""
|
||||||
def __init__(self, config, phplib_dir, db_connection) -> None:
|
def __init__(self, config, phplib_dir, db_connection) -> None:
|
||||||
|
self.statistics_handler = SpecialPhrasesImporterStatistics()
|
||||||
self.db_connection = db_connection
|
self.db_connection = db_connection
|
||||||
self.config = config
|
self.config = config
|
||||||
self.phplib_dir = phplib_dir
|
self.phplib_dir = phplib_dir
|
||||||
@ -63,14 +65,16 @@ class SpecialPhrasesImporter():
|
|||||||
class_type_pairs = set()
|
class_type_pairs = set()
|
||||||
|
|
||||||
for lang in languages:
|
for lang in languages:
|
||||||
LOG.warning('Import phrases for lang: %s', lang)
|
LOG.warning('Importing phrases for lang: %s...', lang)
|
||||||
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
|
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
|
||||||
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
|
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
|
||||||
|
self.statistics_handler.notify_current_lang_done(lang)
|
||||||
|
|
||||||
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
||||||
self._remove_non_existent_phrases_from_db()
|
self._remove_non_existent_phrases_from_db()
|
||||||
self.db_connection.commit()
|
self.db_connection.commit()
|
||||||
LOG.warning('Import done.')
|
LOG.warning('Import done.')
|
||||||
|
self.statistics_handler.notify_import_done()
|
||||||
|
|
||||||
def _fetch_existing_words_phrases(self):
|
def _fetch_existing_words_phrases(self):
|
||||||
"""
|
"""
|
||||||
@ -204,11 +208,13 @@ class SpecialPhrasesImporter():
|
|||||||
(normalized_label, phrase_class, phrase_type, phrase_operator)
|
(normalized_label, phrase_class, phrase_type, phrase_operator)
|
||||||
)
|
)
|
||||||
class_type_pairs.add((phrase_class, phrase_type))
|
class_type_pairs.add((phrase_class, phrase_type))
|
||||||
|
self.statistics_handler.notify_one_phrase_ignored()
|
||||||
#Dont need to add this phrase as it already exists in the word table.
|
#Dont need to add this phrase as it already exists in the word table.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
#sanity check, in case somebody added garbage in the wiki
|
#sanity check, in case somebody added garbage in the wiki
|
||||||
if not self._check_sanity(lang, phrase_class, phrase_type):
|
if not self._check_sanity(lang, phrase_class, phrase_type):
|
||||||
|
self.statistics_handler.notify_one_phrase_invalid()
|
||||||
continue
|
continue
|
||||||
|
|
||||||
class_type_pairs.add((phrase_class, phrase_type))
|
class_type_pairs.add((phrase_class, phrase_type))
|
||||||
@ -217,6 +223,7 @@ class SpecialPhrasesImporter():
|
|||||||
phrase_label, normalized_label, phrase_class,
|
phrase_label, normalized_label, phrase_class,
|
||||||
phrase_type, phrase_operator
|
phrase_type, phrase_operator
|
||||||
)
|
)
|
||||||
|
self.statistics_handler.notify_one_phrase_added()
|
||||||
|
|
||||||
return class_type_pairs
|
return class_type_pairs
|
||||||
|
|
||||||
@ -263,6 +270,7 @@ class SpecialPhrasesImporter():
|
|||||||
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
|
||||||
|
|
||||||
if table_name in self.table_phrases_to_delete:
|
if table_name in self.table_phrases_to_delete:
|
||||||
|
self.statistics_handler.notify_one_table_ignored()
|
||||||
#Remove this table from the ones to delete as it match a class/type
|
#Remove this table from the ones to delete as it match a class/type
|
||||||
#still existing on the special phrases of the wiki.
|
#still existing on the special phrases of the wiki.
|
||||||
self.table_phrases_to_delete.remove(table_name)
|
self.table_phrases_to_delete.remove(table_name)
|
||||||
@ -278,6 +286,8 @@ class SpecialPhrasesImporter():
|
|||||||
#Grant access on read to the web user.
|
#Grant access on read to the web user.
|
||||||
self._grant_access_to_webuser(phrase_class, phrase_type)
|
self._grant_access_to_webuser(phrase_class, phrase_type)
|
||||||
|
|
||||||
|
self.statistics_handler.notify_one_table_created()
|
||||||
|
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute("DROP INDEX idx_placex_classtype")
|
db_cursor.execute("DROP INDEX idx_placex_classtype")
|
||||||
|
|
||||||
@ -341,6 +351,7 @@ class SpecialPhrasesImporter():
|
|||||||
|
|
||||||
#Delete phrases from the word table which are not on the wiki anymore.
|
#Delete phrases from the word table which are not on the wiki anymore.
|
||||||
for phrase_to_delete in self.words_phrases_to_delete:
|
for phrase_to_delete in self.words_phrases_to_delete:
|
||||||
|
self.statistics_handler.notify_one_phrase_deleted()
|
||||||
if phrase_to_delete[3] == '-':
|
if phrase_to_delete[3] == '-':
|
||||||
query = """
|
query = """
|
||||||
DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
|
DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
|
||||||
@ -357,6 +368,7 @@ class SpecialPhrasesImporter():
|
|||||||
|
|
||||||
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
|
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
|
||||||
for table in self.table_phrases_to_delete:
|
for table in self.table_phrases_to_delete:
|
||||||
|
self.statistics_handler.notify_one_table_deleted()
|
||||||
query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
|
query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
|
||||||
queries_parameters.append((query, ()))
|
queries_parameters.append((query, ()))
|
||||||
|
|
@ -2,13 +2,12 @@
|
|||||||
Tests for import special phrases methods
|
Tests for import special phrases methods
|
||||||
of the class SpecialPhrasesImporter.
|
of the class SpecialPhrasesImporter.
|
||||||
"""
|
"""
|
||||||
from mocks import MockParamCapture
|
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import tempfile
|
import tempfile
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
import pytest
|
import pytest
|
||||||
from nominatim.tools.special_phrases import SpecialPhrasesImporter
|
from nominatim.tools import SpecialPhrasesImporter
|
||||||
|
|
||||||
TEST_BASE_DIR = Path(__file__) / '..' / '..'
|
TEST_BASE_DIR = Path(__file__) / '..' / '..'
|
||||||
|
|
||||||
@ -304,7 +303,7 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
|
|||||||
CREATE TABLE place_classtype_amenity_animal_shelter();
|
CREATE TABLE place_classtype_amenity_animal_shelter();
|
||||||
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
|
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
|
||||||
|
|
||||||
monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
|
monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
|
||||||
special_phrases_importer.import_from_wiki(['en'])
|
special_phrases_importer.import_from_wiki(['en'])
|
||||||
|
|
||||||
class_test = 'aerialway'
|
class_test = 'aerialway'
|
||||||
|
Loading…
Reference in New Issue
Block a user