run final index creation in parallel

This commit is contained in:
Sarah Hoffmann 2022-09-14 15:37:39 +02:00
parent bef1aebf1c
commit ed3dd81d04
3 changed files with 25 additions and 18 deletions

View File

@ -10,62 +10,66 @@
CREATE INDEX IF NOT EXISTS idx_place_addressline_address_place_id
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
---
CREATE INDEX IF NOT EXISTS idx_placex_rank_search
ON placex USING BTREE (rank_search) {{db.tablespace.search_index}};
---
CREATE INDEX IF NOT EXISTS idx_placex_rank_address
ON placex USING BTREE (rank_address) {{db.tablespace.search_index}};
---
CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}}
WHERE parent_place_id IS NOT NULL;
---
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
ON placex USING gist (geometry) {{db.tablespace.search_index}}
WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
AND rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}
WHERE parent_place_id is not null;
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id
ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}};
---
CREATE INDEX IF NOT EXISTS idx_postcode_postcode
ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}};
-- Indices only needed for updating.
{% if not drop %}
---
CREATE INDEX IF NOT EXISTS idx_placex_pendingsector
ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}}
WHERE indexed_status > 0;
---
CREATE INDEX IF NOT EXISTS idx_location_area_country_place_id
ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}};
---
CREATE UNIQUE INDEX IF NOT EXISTS idx_place_osm_unique
ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}};
{% endif %}
-- Indices only needed for search.
{% if 'search_name' in db.tables %}
---
CREATE INDEX IF NOT EXISTS idx_search_name_nameaddress_vector
ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
---
CREATE INDEX IF NOT EXISTS idx_search_name_name_vector
ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}};
---
CREATE INDEX IF NOT EXISTS idx_search_name_centroid
ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
{% if postgres.has_index_non_key_column %}
---
CREATE INDEX IF NOT EXISTS idx_placex_housenumber
ON placex USING btree (parent_place_id)
INCLUDE (housenumber) {{db.tablespace.search_index}}
WHERE housenumber is not null;
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
ON location_property_osmline USING btree(parent_place_id)
INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}

View File

@ -72,6 +72,8 @@ class SetupAll:
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
num_threads = args.threads or psutil.cpu_count() or 1
country_info.setup_country_config(args.config)
if args.continue_at is None:
@ -109,8 +111,7 @@ class SetupAll:
database_import.truncate_data_tables(conn)
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(),
args.threads or psutil.cpu_count() or 1)
database_import.load_data(args.config.get_libpq_dsn(), num_threads)
LOG.warning("Setting up tokenizer")
tokenizer = self._get_tokenizer(args.continue_at, args.config)
@ -125,14 +126,14 @@ class SetupAll:
with connect(args.config.get_libpq_dsn()) as conn:
self._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or psutil.cpu_count() or 1)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
LOG.warning('Post-process tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.create_search_indices(conn, args.config,
drop=args.no_updates)
drop=args.no_updates,
threads=num_threads)
LOG.warning('Create search index for default country names.')
country_info.create_country_names(conn, tokenizer,
args.config.get_str_list('LANGUAGES'))

View File

@ -225,7 +225,8 @@ def load_data(dsn: str, threads: int) -> None:
cur.execute('ANALYSE')
def create_search_indices(conn: Connection, config: Configuration, drop: bool = False) -> None:
def create_search_indices(conn: Connection, config: Configuration,
drop: bool = False, threads: int = 1) -> None:
""" Create tables that have explicit partitioning.
"""
@ -243,4 +244,5 @@ def create_search_indices(conn: Connection, config: Configuration, drop: bool =
sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'indices.sql', drop=drop)
sql.run_parallel_sql_file(config.get_libpq_dsn(),
'indices.sql', min(8, threads), drop=drop)