Merge pull request #2291 from AntoJvlt/special-phrases-statistics

Special phrases statistics
This commit is contained in:
Sarah Hoffmann 2021-04-27 11:57:05 +02:00 committed by GitHub
commit 46e8c6b112
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 156 additions and 5 deletions

View File

@ -2,7 +2,7 @@
Implementation of the 'special-phrases' command.
"""
import logging
from nominatim.tools.special_phrases import SpecialPhrasesImporter
from nominatim.tools import SpecialPhrasesImporter
from nominatim.db.connection import connect
LOG = logging.getLogger()

View File

@ -2,3 +2,5 @@
Module with functions for importing, updating Nominatim databases
as well as general maintenance helpers.
"""
from nominatim.tools.special_phrases.special_phrases_importer import SpecialPhrasesImporter

View File

@ -0,0 +1,138 @@
"""
Contains the class which handles statistics for the
import of special phrases.
"""
import logging
LOG = logging.getLogger()
class SpecialPhrasesImporterStatistics():
# pylint: disable-msg=too-many-instance-attributes
"""
Class handling statistics of the import
process of special phrases.
"""
def __init__(self):
self._set_lang_values_to_0()
self._set_global_values_to_0()
def _set_global_values_to_0(self):
"""
Set all counts for the global
import to 0.
"""
self.tables_created = 0
self.tables_deleted = 0
self.tables_ignored = 0
self.global_phrases_invalid = 0
self.global_phrases_added = 0
self.global_phrases_ignored = 0
self.global_phrases_deleted = 0
def _set_lang_values_to_0(self):
"""
Set all counts for the current
lang to 0.
"""
self.lang_phrases_invalid = 0
self.lang_phrases_added = 0
self.lang_phrases_ignored = 0
def notify_one_phrase_invalid(self):
"""
Add +1 to the count of invalid entries
fetched from the wiki.
"""
self.lang_phrases_invalid += 1
self.global_phrases_invalid += 1
def notify_one_phrase_added(self):
"""
Add +1 to the count of entries
added to the db.
"""
self.lang_phrases_added += 1
self.global_phrases_added += 1
def notify_one_phrase_ignored(self):
"""
Add +1 to the count of ignored
entries as it was already in the db.
"""
self.lang_phrases_ignored += 1
self.global_phrases_ignored += 1
def notify_one_phrase_deleted(self):
"""
Add +1 to the count of phrases deleted
from the database.
"""
self.global_phrases_deleted += 1
def notify_one_table_created(self):
"""
Add +1 to the count of created tables.
"""
self.tables_created += 1
def notify_one_table_deleted(self):
"""
Add +1 to the count of deleted tables.
"""
self.tables_deleted += 1
def notify_one_table_ignored(self):
"""
Add +1 to the count of ignored tables.
"""
self.tables_ignored += 1
def notify_import_done(self):
"""
Print stats for the whole import process
and reset all values.
"""
LOG.info('====================================================================')
LOG.info('Final statistics of the import:')
LOG.info('- %s phrases were invalid.', self.global_phrases_invalid)
if self.global_phrases_invalid > 0:
LOG.info(' Those invalid phrases have been skipped.')
LOG.info('- %s phrases were ignored as they are already in the database',
self.global_phrases_ignored)
LOG.info('- %s phrases were added to the database', self.global_phrases_added)
LOG.info('- %s phrases were deleted from the database', self.global_phrases_deleted)
if self.global_phrases_deleted > 0:
LOG.info(' They were deleted as they are not valid anymore.')
LOG.info('- %s tables were ignored as they already exist on the database',
self.tables_ignored)
LOG.info('- %s tables were created', self.tables_created)
LOG.info('- %s tables were deleted from the database', self.tables_deleted)
if self.tables_deleted > 0:
LOG.info(' They were deleted as they are not valid anymore.')
if self.global_phrases_invalid > 0:
LOG.warning('%s phrases were invalid and have been skipped during the whole process.',
self.global_phrases_invalid)
self._set_global_values_to_0()
def notify_current_lang_done(self, lang):
"""
Print stats for the current lang
and then reset lang values.
"""
LOG.info('====================================================================')
LOG.info('Statistics for the import of %s:', lang)
LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid)
if self.lang_phrases_invalid > 0:
LOG.info(' Those invalid phrases have been skipped.')
LOG.info('- %s phrases were ignored as they are already in the database',
self.lang_phrases_ignored)
LOG.info('- %s phrases were added to the database', self.lang_phrases_added)
LOG.info('====================================================================')
if self.lang_phrases_invalid > 0:
LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.',
self.lang_phrases_invalid, lang)
self._set_lang_values_to_0()

View File

@ -14,6 +14,7 @@ from psycopg2.sql import Identifier, Literal, SQL
from nominatim.tools.exec_utils import get_url
from nominatim.errors import UsageError
from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger()
class SpecialPhrasesImporter():
@ -22,6 +23,7 @@ class SpecialPhrasesImporter():
Class handling the process of special phrases importations.
"""
def __init__(self, config, phplib_dir, db_connection) -> None:
self.statistics_handler = SpecialPhrasesImporterStatistics()
self.db_connection = db_connection
self.config = config
self.phplib_dir = phplib_dir
@ -63,14 +65,16 @@ class SpecialPhrasesImporter():
class_type_pairs = set()
for lang in languages:
LOG.warning('Import phrases for lang: %s', lang)
LOG.warning('Importing phrases for lang: %s...', lang)
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
self.statistics_handler.notify_current_lang_done(lang)
self._create_place_classtype_table_and_indexes(class_type_pairs)
self._remove_non_existent_phrases_from_db()
self.db_connection.commit()
LOG.warning('Import done.')
self.statistics_handler.notify_import_done()
def _fetch_existing_words_phrases(self):
"""
@ -204,11 +208,13 @@ class SpecialPhrasesImporter():
(normalized_label, phrase_class, phrase_type, phrase_operator)
)
class_type_pairs.add((phrase_class, phrase_type))
self.statistics_handler.notify_one_phrase_ignored()
#Dont need to add this phrase as it already exists in the word table.
continue
#sanity check, in case somebody added garbage in the wiki
if not self._check_sanity(lang, phrase_class, phrase_type):
self.statistics_handler.notify_one_phrase_invalid()
continue
class_type_pairs.add((phrase_class, phrase_type))
@ -217,6 +223,7 @@ class SpecialPhrasesImporter():
phrase_label, normalized_label, phrase_class,
phrase_type, phrase_operator
)
self.statistics_handler.notify_one_phrase_added()
return class_type_pairs
@ -263,6 +270,7 @@ class SpecialPhrasesImporter():
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
if table_name in self.table_phrases_to_delete:
self.statistics_handler.notify_one_table_ignored()
#Remove this table from the ones to delete as it match a class/type
#still existing on the special phrases of the wiki.
self.table_phrases_to_delete.remove(table_name)
@ -278,6 +286,8 @@ class SpecialPhrasesImporter():
#Grant access on read to the web user.
self._grant_access_to_webuser(phrase_class, phrase_type)
self.statistics_handler.notify_one_table_created()
with self.db_connection.cursor() as db_cursor:
db_cursor.execute("DROP INDEX idx_placex_classtype")
@ -341,6 +351,7 @@ class SpecialPhrasesImporter():
#Delete phrases from the word table which are not on the wiki anymore.
for phrase_to_delete in self.words_phrases_to_delete:
self.statistics_handler.notify_one_phrase_deleted()
if phrase_to_delete[3] == '-':
query = """
DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
@ -357,6 +368,7 @@ class SpecialPhrasesImporter():
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
for table in self.table_phrases_to_delete:
self.statistics_handler.notify_one_table_deleted()
query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
queries_parameters.append((query, ()))

View File

@ -2,13 +2,12 @@
Tests for import special phrases methods
of the class SpecialPhrasesImporter.
"""
from mocks import MockParamCapture
from nominatim.errors import UsageError
from pathlib import Path
import tempfile
from shutil import copyfile
import pytest
from nominatim.tools.special_phrases import SpecialPhrasesImporter
from nominatim.tools import SpecialPhrasesImporter
TEST_BASE_DIR = Path(__file__) / '..' / '..'
@ -304,7 +303,7 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
CREATE TABLE place_classtype_amenity_animal_shelter();
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
special_phrases_importer.import_from_wiki(['en'])
class_test = 'aerialway'