From ed3dd81d049bc3e0833bfd0a81155d0a52987221 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 14 Sep 2022 15:37:39 +0200 Subject: [PATCH] run final index creation in parallel --- lib-sql/indices.sql | 26 +++++++++++++++----------- nominatim/clicmd/setup.py | 11 ++++++----- nominatim/tools/database_import.py | 6 ++++-- 3 files changed, 25 insertions(+), 18 deletions(-) diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index 9bbc7527..4de0137f 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -10,62 +10,66 @@ CREATE INDEX IF NOT EXISTS idx_place_addressline_address_place_id ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_placex_rank_search ON placex USING BTREE (rank_search) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_placex_rank_address ON placex USING BTREE (rank_address) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}} WHERE parent_place_id IS NOT NULL; - +--- CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon ON placex USING gist (geometry) {{db.tablespace.search_index}} WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon') AND rank_address between 4 and 25 AND type != 'postcode' AND name is not null AND indexed_status = 0 AND linked_place_id is null; - +--- CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}} WHERE parent_place_id is not null; - +--- CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}}; - +--- CREATE INDEX IF NOT EXISTS idx_postcode_postcode ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}}; - -- Indices only needed for updating. {% if not drop %} +--- CREATE INDEX IF NOT EXISTS idx_placex_pendingsector ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}} WHERE indexed_status > 0; - +--- CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}}; - +--- CREATE UNIQUE INDEX IF NOT EXISTS idx_place_osm_unique ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}}; {% endif %} -- Indices only needed for search. - {% if 'search_name' in db.tables %} +--- CREATE INDEX IF NOT EXISTS idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; +--- CREATE INDEX IF NOT EXISTS idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; +--- CREATE INDEX IF NOT EXISTS idx_search_name_centroid ON search_name USING GIST (centroid) {{db.tablespace.search_index}}; {% if postgres.has_index_non_key_column %} +--- CREATE INDEX IF NOT EXISTS idx_placex_housenumber ON placex USING btree (parent_place_id) INCLUDE (housenumber) {{db.tablespace.search_index}} WHERE housenumber is not null; +--- CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr ON location_property_osmline USING btree(parent_place_id) INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}} diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 6ffa7afb..b4dde6fe 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -72,6 +72,8 @@ class SetupAll: from ..tools import database_import, refresh, postcodes, freeze from ..indexer.indexer import Indexer + num_threads = args.threads or psutil.cpu_count() or 1 + country_info.setup_country_config(args.config) if args.continue_at is None: @@ -109,8 +111,7 @@ class SetupAll: database_import.truncate_data_tables(conn) LOG.warning('Load data into placex table') - database_import.load_data(args.config.get_libpq_dsn(), - args.threads or psutil.cpu_count() or 1) + database_import.load_data(args.config.get_libpq_dsn(), num_threads) LOG.warning("Setting up tokenizer") tokenizer = self._get_tokenizer(args.continue_at, args.config) @@ -125,14 +126,14 @@ class SetupAll: with connect(args.config.get_libpq_dsn()) as conn: self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') - indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, - args.threads or psutil.cpu_count() or 1) + indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads) indexer.index_full(analyse=not args.index_noanalyse) LOG.warning('Post-process tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.create_search_indices(conn, args.config, - drop=args.no_updates) + drop=args.no_updates, + threads=num_threads) LOG.warning('Create search index for default country names.') country_info.create_country_names(conn, tokenizer, args.config.get_str_list('LANGUAGES')) diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 447e90f1..f6ebe90d 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -225,7 +225,8 @@ def load_data(dsn: str, threads: int) -> None: cur.execute('ANALYSE') -def create_search_indices(conn: Connection, config: Configuration, drop: bool = False) -> None: +def create_search_indices(conn: Connection, config: Configuration, + drop: bool = False, threads: int = 1) -> None: """ Create tables that have explicit partitioning. """ @@ -243,4 +244,5 @@ def create_search_indices(conn: Connection, config: Configuration, drop: bool = sql = SQLPreprocessor(conn, config) - sql.run_sql_file(conn, 'indices.sql', drop=drop) + sql.run_parallel_sql_file(config.get_libpq_dsn(), + 'indices.sql', min(8, threads), drop=drop)