simplify token precomputation

Rename function to reflect that it is only used for precomputation.
The token IDs are not really needed, so don't bother to compute
the array of tokens.
This commit is contained in:
Sarah Hoffmann 2021-04-19 16:54:22 +02:00
parent d68b02d36a
commit b88b952f56
3 changed files with 6 additions and 26 deletions

View File

@ -29787,7 +29787,7 @@ st 5557484
-- prefill word table
select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-- copy the word frequencies

View File

@ -377,40 +377,26 @@ $$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION make_keywords(src TEXT)
RETURNS INTEGER[]
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
RETURNS INTEGER
AS $$
DECLARE
result INTEGER[];
s TEXT;
w INTEGER;
words TEXT[];
i INTEGER;
j INTEGER;
BEGIN
result := '{}'::INTEGER[];
s := make_standard_name(src);
w := getorcreate_name_id(s, src);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
w := getorcreate_word_id(s);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
words := string_to_array(s, ' ');
IF array_upper(words, 1) IS NOT NULL THEN
FOR j IN 1..array_upper(words, 1) LOOP
IF (words[j] != '') THEN
w = getorcreate_word_id(words[j]);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
w := getorcreate_word_id(words[j]);
END IF;
END LOOP;
END IF;
@ -421,9 +407,6 @@ BEGIN
s := make_standard_name(words[j]);
IF s != '' THEN
w := getorcreate_word_id(s);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF;
END LOOP;
END IF;
@ -433,13 +416,10 @@ BEGIN
s := make_standard_name(s);
IF s != '' THEN
w := getorcreate_name_id(s, src);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF;
END IF;
RETURN result;
RETURN 1;
END;
$$
LANGUAGE plpgsql;

View File

@ -187,7 +187,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory):
@pytest.mark.parametrize("threads", (1, 5))
def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table,
temp_db_cursor, threads):
for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'):
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""".format(func))