Merge pull request #3542 from lonvia/remove-legacy-tokenizer

Remove legacy tokenizer
This commit is contained in:
Sarah Hoffmann 2024-09-24 15:42:40 +02:00 committed by GitHub
commit d856788bf5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
53 changed files with 59 additions and 4339 deletions

View File

@ -13,10 +13,10 @@ ignored-classes=NominatimArgs,closing
# 'too-many-ancestors' is triggered already by deriving from UserDict
# 'not-context-manager' disabled because it causes false positives once
# typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager,use-dict-literal,chained-comparison,attribute-defined-outside-init,too-many-boolean-expressions,contextmanager-generator-missing-cleanup
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager,use-dict-literal,chained-comparison,attribute-defined-outside-init,too-many-boolean-expressions,contextmanager-generator-missing-cleanup,too-many-positional-arguments
good-names=i,j,x,y,m,t,fd,db,cc,x1,x2,y1,y2,pt,k,v,nr
[DESIGN]
max-returns=7
max-returns=7

View File

@ -44,7 +44,6 @@ endif()
set(BUILD_IMPORTER on CACHE BOOL "Build everything for importing/updating the database")
set(BUILD_API on CACHE BOOL "Build everything for the API server")
set(BUILD_MODULE off CACHE BOOL "Build PostgreSQL module for legacy tokenizer")
set(BUILD_TESTS on CACHE BOOL "Build test suite")
set(BUILD_OSM2PGSQL on CACHE BOOL "Build osm2pgsql (expert only)")
set(INSTALL_MUNIN_PLUGINS on CACHE BOOL "Install Munin plugins for supervising Nominatim")
@ -139,14 +138,6 @@ if (BUILD_TESTS)
endif()
endif()
#-----------------------------------------------------------------------------
# Postgres module
#-----------------------------------------------------------------------------
if (BUILD_MODULE)
add_subdirectory(module)
endif()
#-----------------------------------------------------------------------------
# Installation
#-----------------------------------------------------------------------------
@ -195,11 +186,6 @@ if (BUILD_OSM2PGSQL)
endif()
endif()
if (BUILD_MODULE)
install(PROGRAMS ${PROJECT_BINARY_DIR}/module/nominatim.so
DESTINATION ${NOMINATIM_LIBDIR}/module)
endif()
install(FILES settings/env.defaults
settings/address-levels.json
settings/phrase-settings.json

View File

@ -61,8 +61,7 @@ pylint3 --extension-pkg-whitelist=osmium nominatim
Before submitting a pull request make sure that the tests pass:
```
cd build
make test
make tests
```
## Releases

View File

@ -131,76 +131,13 @@ script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.
## Using an external PostgreSQL database
You can install Nominatim using a database that runs on a different server when
you have physical access to the file system on the other server. Nominatim
uses a custom normalization library that needs to be made accessible to the
PostgreSQL server. This section explains how to set up the normalization
library.
!!! note
The external module is only needed when using the legacy tokenizer.
If you have chosen the ICU tokenizer, then you can ignore this section
and follow the standard import documentation.
### Option 1: Compiling the library on the database server
The most sure way to get a working library is to compile it on the database
server. From the prerequisites you need at least cmake, gcc and the
PostgreSQL server package.
Clone or unpack the Nominatim source code, enter the source directory and
create and enter a build directory.
```sh
cd Nominatim
mkdir build
cd build
```
Now configure cmake to only build the PostgreSQL module and build it:
```
cmake -DBUILD_IMPORTER=off -DBUILD_API=off -DBUILD_TESTS=off -DBUILD_DOCS=off -DBUILD_OSM2PGSQL=off ..
make
```
When done, you find the normalization library in `build/module/nominatim.so`.
Copy it to a place where it is readable and executable by the PostgreSQL server
process.
### Option 2: Compiling the library on the import machine
You can also compile the normalization library on the machine from where you
run the import.
!!! important
You can only do this when the database server and the import machine have
the same architecture and run the same version of Linux. Otherwise there is
no guarantee that the compiled library is compatible with the PostgreSQL
server running on the database server.
Make sure that the PostgreSQL server package is installed on the machine
**with the same version as on the database server**. You do not need to install
the PostgreSQL server itself.
Download and compile Nominatim as per standard instructions. Once done, you find
the normalization library in `build/module/nominatim.so`. Copy the file to
the database server at a location where it is readable and executable by the
PostgreSQL server process.
### Running the import
On the client side you now need to configure the import to point to the
correct location of the library **on the database server**. Add the following
line to your your `.env` file:
```
NOMINATIM_DATABASE_MODULE_PATH="<directory on the database server where nominatim.so resides>"
```
Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
to follow the [standard instructions for importing](Import.md).
You can install Nominatim using a database that runs on a different server.
Simply point the configuration variable `NOMINATIM_DATABASE_DSN` to the
server and follow the standard import documentation.
The import will be faster, if the import is run directly from the database
machine. You can easily switch to a different machine for the query frontend
after the import.
## Moving the database to another machine
@ -225,20 +162,9 @@ target machine.
data updates but the resulting database is only about a third of the size
of a full database.
Next install Nominatim on the target machine by following the standard installation
instructions. Again, make sure to use the same version as the source machine.
Next install nominatim-api on the target machine by following the standard
installation instructions. Again, make sure to use the same version as the
source machine.
Create a project directory on your destination machine and set up the `.env`
file to match the configuration on the source machine. Finally run
nominatim refresh --website
to make sure that the local installation of Nominatim will be used.
If you are using the legacy tokenizer you might also have to switch to the
PostgreSQL module that was compiled on your target machine. If you get errors
that PostgreSQL cannot find or access `nominatim.so` then rerun
nominatim refresh --functions
on the target machine to update the the location of the module.
file to match the configuration on the source machine. That's all.

View File

@ -178,18 +178,6 @@ make
sudo make install
```
!!! warning
The default installation no longer compiles the PostgreSQL module that
is needed for the legacy tokenizer from older Nominatim versions. If you
are upgrading an older database or want to run the
[legacy tokenizer](../customize/Tokenizers.md#legacy-tokenizer) for
some other reason, you need to enable the PostgreSQL module via
cmake: `cmake -DBUILD_MODULE=on ../Nominatim`. To compile the module
you need to have the server development headers for PostgreSQL installed.
On Ubuntu/Debian run: `sudo apt install postgresql-server-dev-<postgresql version>`
The legacy tokenizer is deprecated and will be removed in Nominatim 5.0
Nominatim installs itself into `/usr/local` per default. To choose a different
installation directory add `-DCMAKE_INSTALL_PREFIX=<install root>` to the
cmake command. Make sure that the `bin` directory is available in your path

View File

@ -64,26 +64,6 @@ Nominatim grants minimal rights to this user to all tables that are needed
for running geocoding queries.
#### NOMINATIM_DATABASE_MODULE_PATH
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Directory where to find the PostgreSQL server module |
| **Format:** | path |
| **Default:** | _empty_ (use `<project_directory>/module`) |
| **After Changes:** | run `nominatim refresh --functions` |
| **Comment:** | Legacy tokenizer only |
Defines the directory in which the PostgreSQL server module `nominatim.so`
is stored. The directory and module must be accessible by the PostgreSQL
server.
For information on how to use this setting when working with external databases,
see [Advanced Installations](../admin/Advanced-Installations.md).
The option is only used by the Legacy tokenizer and ignored otherwise.
#### NOMINATIM_TOKENIZER
| Summary | |
@ -114,20 +94,6 @@ on the file format.
If a relative path is given, then the file is searched first relative to the
project directory and then in the global settings directory.
#### NOMINATIM_MAX_WORD_FREQUENCY
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Number of occurrences before a word is considered frequent |
| **Format:** | int |
| **Default:** | 50000 |
| **After Changes:** | cannot be changed after import |
| **Comment:** | Legacy tokenizer only |
The word frequency count is used by the Legacy tokenizer to automatically
identify _stop words_. Any partial term that occurs more often then what
is defined in this setting, is effectively ignored during search.
#### NOMINATIM_LIMIT_REINDEXING
@ -162,25 +128,6 @@ codes, to restrict import to a subset of languages.
Currently only affects the initial import of country names and special phrases.
#### NOMINATIM_TERM_NORMALIZATION
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Rules for normalizing terms for comparisons |
| **Format:** | string: semicolon-separated list of ICU rules |
| **Default:** | :: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC (); |
| **Comment:** | Legacy tokenizer only |
[Special phrases](Special-Phrases.md) have stricter matching requirements than
normal search terms. They must appear exactly in the query after this term
normalization has been applied.
Only has an effect on the Legacy tokenizer. For the ICU tokenizer the rules
defined in the
[normalization section](Tokenizers.md#normalization-and-transliteration)
will be used.
#### NOMINATIM_USE_US_TIGER_DATA
| Summary | |

View File

@ -15,53 +15,6 @@ they can be configured.
chosen tokenizer is very limited as well. See the comments in each tokenizer
section.
## Legacy tokenizer
!!! danger
The Legacy tokenizer is deprecated and will be removed in Nominatim 5.0.
If you still use a database with the legacy tokenizer, you must reimport
it using the ICU tokenizer below.
The legacy tokenizer implements the analysis algorithms of older Nominatim
versions. It uses a special Postgresql module to normalize names and queries.
This tokenizer is automatically installed and used when upgrading an older
database. It should not be used for new installations anymore.
### Compiling the PostgreSQL module
The tokeinzer needs a special C module for PostgreSQL which is not compiled
by default. If you need the legacy tokenizer, compile Nominatim as follows:
```
mkdir build
cd build
cmake -DBUILD_MODULE=on
make
```
### Enabling the tokenizer
To enable the tokenizer add the following line to your project configuration:
```
NOMINATIM_TOKENIZER=legacy
```
The Postgresql module for the tokenizer is available in the `module` directory
and also installed with the remainder of the software under
`lib/nominatim/module/nominatim.so`. You can specify a custom location for
the module with
```
NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
```
This is in particular useful when the database runs on a different server.
See [Advanced installations](../admin/Advanced-Installations.md#using-an-external-postgresql-database) for details.
There are no other configuration options for the legacy tokenizer. All
normalization functions are hard-coded.
## ICU tokenizer
The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to

View File

@ -72,8 +72,6 @@ The tests can be configured with a set of environment variables (`behave -D key=
* `DB_PORT` - (optional) port of database on host
* `DB_USER` - (optional) username of database login
* `DB_PASS` - (optional) password for database login
* `SERVER_MODULE_PATH` - (optional) path on the Postgres server to Nominatim
module shared library file (only needed for legacy tokenizer)
* `REMOVE_TEMPLATE` - if true, the template and API database will not be reused
during the next run. Reusing the base templates speeds
up tests considerably but might lead to outdated errors

View File

@ -1,426 +0,0 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- Get tokens used for searching the given place.
--
-- These are the tokens that will be saved in the search_name table.
CREATE OR REPLACE FUNCTION token_get_name_search_tokens(info JSONB)
RETURNS INTEGER[]
AS $$
SELECT (info->>'names')::INTEGER[]
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Get tokens for matching the place name against others.
--
-- This should usually be restricted to full name tokens.
CREATE OR REPLACE FUNCTION token_get_name_match_tokens(info JSONB)
RETURNS INTEGER[]
AS $$
SELECT (info->>'names')::INTEGER[]
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Return the housenumber tokens applicable for the place.
CREATE OR REPLACE FUNCTION token_get_housenumber_search_tokens(info JSONB)
RETURNS INTEGER[]
AS $$
SELECT (info->>'hnr_tokens')::INTEGER[]
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Return the housenumber in the form that it can be matched during search.
CREATE OR REPLACE FUNCTION token_normalized_housenumber(info JSONB)
RETURNS TEXT
AS $$
SELECT info->>'hnr';
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null or info->>'place_search' is null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null and info->>'street' != '{}';
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_place(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'place_match' is not null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->>'street')::INTEGER[] && street_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->>'place_match')::INTEGER[] && place_tokens
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_addr_place_search_tokens(info JSONB)
RETURNS INTEGER[]
AS $$
SELECT (info->>'place_search')::INTEGER[]
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_get_address_keys(info JSONB)
RETURNS SETOF TEXT
AS $$
SELECT * FROM jsonb_object_keys(info->'addr');
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_get_address_search_tokens(info JSONB, key TEXT)
RETURNS INTEGER[]
AS $$
SELECT (info->'addr'->key->>0)::INTEGER[];
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
RETURNS BOOLEAN
AS $$
SELECT (info->'addr'->key->>1)::INTEGER[] && tokens;
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_get_postcode(info JSONB)
RETURNS TEXT
AS $$
SELECT info->>'postcode';
$$ LANGUAGE SQL IMMUTABLE STRICT;
-- Return token info that should be saved permanently in the database.
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
RETURNS JSONB
AS $$
SELECT NULL::JSONB;
$$ LANGUAGE SQL IMMUTABLE STRICT;
--------------- private functions ----------------------------------------------
-- Functions for term normalisation and access to the 'word' table.
CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text
AS '{{ modulepath }}/nominatim.so', 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text
AS '{{ modulepath }}/nominatim.so', 'gettokenstring'
LANGUAGE c IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
AS $$
DECLARE
o TEXT;
BEGIN
o := public.gettokenstring(public.transliteration(name));
RETURN trim(substr(o,1,length(o)));
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- returns NULL if the word is too common
CREATE OR REPLACE FUNCTION getorcreate_word_id(lookup_word TEXT)
RETURNS INTEGER
AS $$
DECLARE
lookup_token TEXT;
return_word_id INTEGER;
count INTEGER;
BEGIN
lookup_token := trim(lookup_word);
SELECT min(word_id), max(search_name_count) FROM word
WHERE word_token = lookup_token and class is null and type is null
INTO return_word_id, count;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, null, null, null, null, 0);
ELSE
IF count > {{ max_word_freq }} THEN
return_word_id := NULL;
END IF;
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
-- Create housenumber tokens from an OSM addr:housenumber.
-- The housnumber is split at comma and semicolon as necessary.
-- The function returns the normalized form of the housenumber suitable
-- for comparison.
CREATE OR REPLACE FUNCTION create_housenumbers(housenumbers TEXT[],
OUT tokens TEXT,
OUT normtext TEXT)
AS $$
BEGIN
SELECT array_to_string(array_agg(trans), ';'), array_agg(tid)::TEXT
INTO normtext, tokens
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) as tid
FROM (SELECT make_standard_name(h) as lookup_word
FROM unnest(housenumbers) h) x) y;
END;
$$ LANGUAGE plpgsql STABLE STRICT;
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
RETURNS INTEGER
AS $$
DECLARE
lookup_token TEXT;
return_word_id INTEGER;
BEGIN
lookup_token := ' ' || trim(lookup_word);
SELECT min(word_id) FROM word
WHERE word_token = lookup_token and class='place' and type='house'
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, null,
'place', 'house', null, 0);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
RETURNS BOOLEAN
AS $$
DECLARE
r RECORD;
lookup_token TEXT;
return_word_id INTEGER;
BEGIN
lookup_token := ' ' || make_standard_name(postcode);
FOR r IN
SELECT word_id FROM word
WHERE word_token = lookup_token and word = postcode
and class='place' and type='postcode'
LOOP
RETURN false;
END LOOP;
INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
'place', 'postcode', null, 0);
RETURN true;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getorcreate_name_id(lookup_word TEXT, src_word TEXT)
RETURNS INTEGER
AS $$
DECLARE
lookup_token TEXT;
nospace_lookup_token TEXT;
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word
WHERE word_token = lookup_token and class is null and type is null
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, src_word,
null, null, null, 0);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
-- Normalize a string and lookup its word ids (partial words).
CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT)
RETURNS INTEGER[]
AS $$
DECLARE
words TEXT[];
id INTEGER;
return_word_id INTEGER[];
word_ids INTEGER[];
j INTEGER;
BEGIN
words := string_to_array(make_standard_name(lookup_word), ' ');
IF array_upper(words, 1) IS NOT NULL THEN
FOR j IN 1..array_upper(words, 1) LOOP
IF (words[j] != '') THEN
SELECT array_agg(word_id) INTO word_ids
FROM word
WHERE word_token = words[j] and class is null and type is null;
IF word_ids IS NULL THEN
id := nextval('seq_word');
INSERT INTO word VALUES (id, words[j], null, null, null, null, 0);
return_word_id := return_word_id || id;
ELSE
return_word_id := array_merge(return_word_id, word_ids);
END IF;
END IF;
END LOOP;
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
-- Normalize a string and look up its name ids (full words).
CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT)
RETURNS INTEGER[]
AS $$
DECLARE
lookup_token TEXT;
return_word_ids INTEGER[];
BEGIN
lookup_token := ' '|| make_standard_name(lookup_word);
SELECT array_agg(word_id) FROM word
WHERE word_token = lookup_token and class is null and type is null
INTO return_word_ids;
RETURN return_word_ids;
END;
$$
LANGUAGE plpgsql STABLE STRICT;
CREATE OR REPLACE FUNCTION make_keywords(src HSTORE)
RETURNS INTEGER[]
AS $$
DECLARE
result INTEGER[];
s TEXT;
w INTEGER;
words TEXT[];
value TEXT;
j INTEGER;
BEGIN
result := '{}'::INTEGER[];
FOR value IN SELECT unnest(regexp_split_to_array(svals(src), E'[,;]')) LOOP
-- full name
s := make_standard_name(value);
w := getorcreate_name_id(s, value);
IF not(ARRAY[w] <@ result) THEN
result := result || w;
END IF;
-- partial single-word terms
words := string_to_array(s, ' ');
IF array_upper(words, 1) IS NOT NULL THEN
FOR j IN 1..array_upper(words, 1) LOOP
IF (words[j] != '') THEN
w = getorcreate_word_id(words[j]);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF;
END LOOP;
END IF;
-- consider parts before an opening bracket a full word as well
words := regexp_split_to_array(value, E'[(]');
IF array_upper(words, 1) > 1 THEN
s := make_standard_name(words[1]);
IF s != '' THEN
w := getorcreate_name_id(s, words[1]);
IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF;
END IF;
s := regexp_replace(value, '市$', '');
IF s != value THEN
s := make_standard_name(s);
IF s != '' THEN
w := getorcreate_name_id(s, value);
IF NOT (ARRAY[w] <@ result) THEN
result := result || w;
END IF;
END IF;
END IF;
END LOOP;
RETURN result;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION precompute_words(src TEXT)
RETURNS INTEGER
AS $$
DECLARE
s TEXT;
w INTEGER;
words TEXT[];
i INTEGER;
j INTEGER;
BEGIN
s := make_standard_name(src);
w := getorcreate_name_id(s, src);
w := getorcreate_word_id(s);
words := string_to_array(s, ' ');
IF array_upper(words, 1) IS NOT NULL THEN
FOR j IN 1..array_upper(words, 1) LOOP
IF (words[j] != '') THEN
w := getorcreate_word_id(words[j]);
END IF;
END LOOP;
END IF;
words := regexp_split_to_array(src, E'[,;()]');
IF array_upper(words, 1) != 1 THEN
FOR j IN 1..array_upper(words, 1) LOOP
s := make_standard_name(words[j]);
IF s != '' THEN
w := getorcreate_word_id(s);
END IF;
END LOOP;
END IF;
s := regexp_replace(src, '市$', '');
IF s != src THEN
s := make_standard_name(s);
IF s != '' THEN
w := getorcreate_name_id(s, src);
END IF;
END IF;
RETURN 1;
END;
$$
LANGUAGE plpgsql;

View File

@ -1,10 +0,0 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- Required for details lookup.
CREATE INDEX IF NOT EXISTS idx_word_word_id
ON word USING BTREE (word_id) {{db.tablespace.search_index}};

View File

@ -1,28 +0,0 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS word;
CREATE TABLE word (
word_id INTEGER,
word_token text NOT NULL,
word text,
class text,
type text,
country_code varchar(2),
search_name_count INTEGER,
operator TEXT
) {{db.tablespace.search_data}};
CREATE INDEX idx_word_word_token ON word
USING BTREE (word_token) {{db.tablespace.search_index}};
CREATE INDEX idx_word_word ON word
USING BTREE (word) {{db.tablespace.search_index}} WHERE word is not null;
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
DROP SEQUENCE IF EXISTS seq_word;
CREATE SEQUENCE seq_word start 1;
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";

View File

@ -1,28 +0,0 @@
# just use the pgxs makefile
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "16" "15" "14" "13" "12" "11" "10" "9.6")
list(APPEND PG_CONFIG_HINTS
"/usr/pgsql-${suffix}/bin")
endforeach()
find_program(PG_CONFIG pg_config HINTS ${PG_CONFIG_HINTS})
execute_process(COMMAND ${PG_CONFIG} --pgxs
OUTPUT_VARIABLE PGXS
OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT EXISTS "${PGXS}")
message(FATAL_ERROR "Postgresql server package not found.")
endif()
ADD_CUSTOM_COMMAND( OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/dummy
COMMAND PGXS=${PGXS} PG_CONFIG=${PG_CONFIG} MODSRCDIR=${CMAKE_CURRENT_SOURCE_DIR} $(MAKE) -f ${CMAKE_CURRENT_SOURCE_DIR}/Makefile
COMMENT "Running external makefile ${PGXS}"
)
ADD_CUSTOM_TARGET( nominatim_lib ALL
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/dummy
)

View File

@ -1,11 +0,0 @@
MODULES = nominatim
PG_CPPFLAGS = -I$(MODSRCDIR)
include $(PGXS)
VPATH = $(MODSRCDIR)
all:
chmod 755 nominatim.so
install:
@echo Library does not need to be installed.

View File

@ -1,301 +0,0 @@
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
#include "postgres.h"
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include <utfasciitable.h>
#if PG_MAJORVERSION_NUM > 15
#include "varatt.h"
#endif
PG_MODULE_MAGIC;
Datum transliteration( PG_FUNCTION_ARGS );
Datum gettokenstring( PG_FUNCTION_ARGS );
void str_replace(char* buffer, int* len, int* changes, char* from, int fromlen, char* to, int tolen, int);
void str_dupspaces(char* buffer);
PG_FUNCTION_INFO_V1( transliteration );
Datum
transliteration( PG_FUNCTION_ARGS )
{
static char * ascii = UTFASCII;
static uint16 asciilookup[65536] = UTFASCIILOOKUP;
char * asciipos;
text *source;
unsigned char *sourcedata;
int sourcedatalength;
unsigned int c1,c2,c3,c4;
unsigned int * wchardata;
unsigned int * wchardatastart;
text *result;
unsigned char *resultdata;
int resultdatalength;
int iLen;
if (GetDatabaseEncoding() != PG_UTF8)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("requires UTF8 database encoding")));
}
if (PG_ARGISNULL(0))
{
PG_RETURN_NULL();
}
// The original string
source = PG_GETARG_TEXT_P(0);
sourcedata = (unsigned char *)VARDATA(source);
sourcedatalength = VARSIZE(source) - VARHDRSZ;
// Intermediate wchar version of string
wchardatastart = wchardata = (unsigned int *)palloc((sourcedatalength+1)*sizeof(int));
// Based on pg_utf2wchar_with_len from wchar.c
// Postgresql strings are not zero terminalted
while (sourcedatalength > 0)
{
if ((*sourcedata & 0x80) == 0)
{
*wchardata = *sourcedata++;
wchardata++;
sourcedatalength--;
}
else if ((*sourcedata & 0xe0) == 0xc0)
{
if (sourcedatalength < 2) break;
c1 = *sourcedata++ & 0x1f;
c2 = *sourcedata++ & 0x3f;
*wchardata = (c1 << 6) | c2;
if (*wchardata < 65536) wchardata++;
sourcedatalength -= 2;
}
else if ((*sourcedata & 0xf0) == 0xe0)
{
if (sourcedatalength < 3) break;
c1 = *sourcedata++ & 0x0f;
c2 = *sourcedata++ & 0x3f;
c3 = *sourcedata++ & 0x3f;
*wchardata = (c1 << 12) | (c2 << 6) | c3;
if (*wchardata < 65536) wchardata++;
sourcedatalength -= 3;
}
else if ((*sourcedata & 0xf8) == 0xf0)
{
if (sourcedatalength < 4) break;
c1 = *sourcedata++ & 0x07;
c2 = *sourcedata++ & 0x3f;
c3 = *sourcedata++ & 0x3f;
c4 = *sourcedata++ & 0x3f;
*wchardata = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4;
if (*wchardata < 65536) wchardata++;
sourcedatalength -= 4;
}
else if ((*sourcedata & 0xfc) == 0xf8)
{
// table does not extend beyond 4 char long, just skip
if (sourcedatalength < 5) break;
sourcedatalength -= 5;
sourcedata += 5;
}
else if ((*sourcedata & 0xfe) == 0xfc)
{
// table does not extend beyond 4 char long, just skip
if (sourcedatalength < 6) break;
sourcedatalength -= 6;
sourcedata += 6;
}
else
{
// assume lenngth 1, silently drop bogus characters
sourcedatalength--;
sourcedata += 1;
}
}
*wchardata = 0;
// calc the length of transliteration string
resultdatalength = 0;
wchardata = wchardatastart;
while(*wchardata)
{
if (*(asciilookup + *wchardata) > 0) resultdatalength += *(ascii + *(asciilookup + *wchardata));
wchardata++;
}
// allocate & create the result
result = (text *)palloc(resultdatalength + VARHDRSZ);
SET_VARSIZE(result, resultdatalength + VARHDRSZ);
resultdata = (unsigned char *)VARDATA(result);
wchardata = wchardatastart;
while(*wchardata)
{
if (*(asciilookup + *wchardata) > 0)
{
asciipos = ascii + *(asciilookup + *wchardata);
for(iLen = *asciipos; iLen > 0; iLen--)
{
asciipos++;
*resultdata = *asciipos;
resultdata++;
}
}
/*else
{
ereport( WARNING, ( errcode( ERRCODE_SUCCESSFUL_COMPLETION ),
errmsg( "missing char: %i\n", *wchardata )));
}*/
wchardata++;
}
pfree(wchardatastart);
PG_RETURN_TEXT_P(result);
}
// Set isspace=1 if the replacement _only_ adds a space before the search string. I.e. to == " " + from
void str_replace(char* buffer, int* len, int* changes, char* from, int fromlen, char* to, int tolen, int isspace)
{
char *p;
// Search string is too long to be present
if (fromlen > *len) return;
p = strstr(buffer, from);
while(p)
{
if (!isspace || (p > buffer && *(p-1) != ' '))
{
(*changes)++;
if (tolen != fromlen) memmove(p+tolen, p+fromlen, *len-(p-buffer)+1);
memcpy(p, to, tolen);
*len += tolen - fromlen;
}
p = strstr(p+1, from);
}
}
void str_dupspaces(char* buffer)
{
char *out;
int wasspace;
out = buffer;
wasspace = 0;
while(*buffer)
{
if (wasspace && *buffer != ' ') wasspace = 0;
if (!wasspace)
{
*out = *buffer;
out++;
wasspace = (*buffer == ' ');
}
buffer++;
}
*out = 0;
}
PG_FUNCTION_INFO_V1( gettokenstring );
Datum
gettokenstring( PG_FUNCTION_ARGS )
{
text *source;
unsigned char *sourcedata;
int sourcedatalength;
char * buffer;
int len;
int changes;
text *result;
if (GetDatabaseEncoding() != PG_UTF8)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("requires UTF8 database encoding")));
}
if (PG_ARGISNULL(0))
{
PG_RETURN_NULL();
}
// The original string
source = PG_GETARG_TEXT_P(0);
sourcedata = (unsigned char *)VARDATA(source);
sourcedatalength = VARSIZE(source) - VARHDRSZ;
// Buffer for doing the replace in - string could get slightly longer (double is massive overkill)
buffer = (char *)palloc((sourcedatalength*2)*sizeof(char));
memcpy(buffer+1, sourcedata, sourcedatalength);
buffer[0] = 32;
buffer[sourcedatalength+1] = 32;
buffer[sourcedatalength+2] = 0;
len = sourcedatalength+3;
changes = 1;
str_dupspaces(buffer);
while(changes)
{
changes = 0;
#include <tokenstringreplacements.inc>
str_dupspaces(buffer);
}
// 'and' in various languages
str_replace(buffer, &len, &changes, " and ", 5, " ", 1, 0);
str_replace(buffer, &len, &changes, " und ", 5, " ", 1, 0);
str_replace(buffer, &len, &changes, " en ", 4, " ", 1, 0);
str_replace(buffer, &len, &changes, " et ", 4, " ", 1, 0);
str_replace(buffer, &len, &changes, " y ", 3, " ", 1, 0);
// 'the' (and similar)
str_replace(buffer, &len, &changes, " the ", 5, " ", 1, 0);
str_replace(buffer, &len, &changes, " der ", 5, " ", 1, 0);
str_replace(buffer, &len, &changes, " den ", 5, " ", 1, 0);
str_replace(buffer, &len, &changes, " die ", 5, " ", 1, 0);
str_replace(buffer, &len, &changes, " das ", 5, " ", 1, 0);
str_replace(buffer, &len, &changes, " la ", 4, " ", 1, 0);
str_replace(buffer, &len, &changes, " le ", 4, " ", 1, 0);
str_replace(buffer, &len, &changes, " el ", 4, " ", 1, 0);
str_replace(buffer, &len, &changes, " il ", 4, " ", 1, 0);
// german
str_replace(buffer, &len, &changes, "ae", 2, "a", 1, 0);
str_replace(buffer, &len, &changes, "oe", 2, "o", 1, 0);
str_replace(buffer, &len, &changes, "ue", 2, "u", 1, 0);
str_replace(buffer, &len, &changes, "sss", 3, "ss", 2, 0);
str_replace(buffer, &len, &changes, "ih", 2, "i", 1, 0);
str_replace(buffer, &len, &changes, "eh", 2, "e", 1, 0);
// russian
str_replace(buffer, &len, &changes, "ie", 2, "i", 1, 0);
str_replace(buffer, &len, &changes, "yi", 2, "i", 1, 0);
// allocate & create the result
len--;// Drop the terminating zero
result = (text *)palloc(len + VARHDRSZ);
SET_VARSIZE(result, len + VARHDRSZ);
memcpy(VARDATA(result), buffer, len);
pfree(buffer);
PG_RETURN_TEXT_P(result);
}

View File

@ -1,884 +0,0 @@
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
str_replace(buffer, &len, &changes, " national wildlife refuge area ", 31, " nwra ", 6, 0);
str_replace(buffer, &len, &changes, " national recreation area ", 26, " nra ", 5, 0);
str_replace(buffer, &len, &changes, " air national guard base ", 25, " angb ", 6, 0);
str_replace(buffer, &len, &changes, " zhilishchien komplieks ", 24, " zh k ", 6, 0);
str_replace(buffer, &len, &changes, " trung tam thuong mdhi ", 23, " tttm ", 6, 0);
str_replace(buffer, &len, &changes, " poligono industrial ", 21, " pgind ", 7, 0);
str_replace(buffer, &len, &changes, " trung hoc pho thong ", 21, " thpt ", 6, 0);
str_replace(buffer, &len, &changes, " onze lieve vrouw e ", 20, " olv ", 5, 0);
str_replace(buffer, &len, &changes, " strada provinciale ", 20, " sp ", 4, 0);
str_replace(buffer, &len, &changes, "onze lieve vrouw e ", 19, " olv ", 5, 0);
str_replace(buffer, &len, &changes, " punto kilometrico ", 19, " pk ", 4, 0);
str_replace(buffer, &len, &changes, " cong vien van hoa ", 19, " cvvh ", 6, 0);
str_replace(buffer, &len, &changes, " can cu khong quan ", 19, " cckq ", 6, 0);
str_replace(buffer, &len, &changes, "strada provinciale ", 19, " sp ", 4, 0);
str_replace(buffer, &len, &changes, " strada regionale ", 18, " sr ", 4, 0);
str_replace(buffer, &len, &changes, " strada comunale ", 17, " sc ", 4, 0);
str_replace(buffer, &len, &changes, "strada regionale ", 17, " sr ", 4, 0);
str_replace(buffer, &len, &changes, " trung hoc co so ", 17, " thcs ", 6, 0);
str_replace(buffer, &len, &changes, " san bay quoc te ", 17, " sbqt ", 6, 0);
str_replace(buffer, &len, &changes, " cong ty co phyn ", 17, " ctcp ", 6, 0);
str_replace(buffer, &len, &changes, " khu cong nghiep ", 17, " kcn ", 5, 0);
str_replace(buffer, &len, &changes, " air force base ", 16, " afb ", 5, 0);
str_replace(buffer, &len, &changes, " strada statale ", 16, " ss ", 4, 0);
str_replace(buffer, &len, &changes, " vien bcyo tang ", 16, " vbt ", 5, 0);
str_replace(buffer, &len, &changes, "strada comunale ", 16, " sc ", 4, 0);
str_replace(buffer, &len, &changes, " circunvalacion ", 16, " ccvcn ", 7, 0);
str_replace(buffer, &len, &changes, " paseo maritimo ", 16, " psmar ", 7, 0);
str_replace(buffer, &len, &changes, " wielkopolskie ", 15, " wlkp ", 6, 0);
str_replace(buffer, &len, &changes, " national park ", 15, " np ", 4, 0);
str_replace(buffer, &len, &changes, " middle school ", 15, " ms ", 4, 0);
str_replace(buffer, &len, &changes, " international ", 15, " intl ", 6, 0);
str_replace(buffer, &len, &changes, " burgermeister ", 15, " bgm ", 5, 0);
str_replace(buffer, &len, &changes, " vuon quoc gia ", 15, " vqg ", 5, 0);
str_replace(buffer, &len, &changes, " qucyng truong ", 15, " qt ", 4, 0);
str_replace(buffer, &len, &changes, "strada statale ", 15, " ss ", 4, 0);
str_replace(buffer, &len, &changes, " state highway ", 15, " sh ", 4, 0);
str_replace(buffer, &len, &changes, "burgermeister ", 14, " bgm ", 5, 0);
str_replace(buffer, &len, &changes, " right of way ", 14, " rowy ", 6, 0);
str_replace(buffer, &len, &changes, " hauptbahnhof ", 14, " hbf ", 5, 0);
str_replace(buffer, &len, &changes, " apartamentos ", 14, " aptos ", 7, 0);
str_replace(buffer, &len, &changes, " wielkopolski ", 14, " wlkp ", 6, 0);
str_replace(buffer, &len, &changes, " burgemeester ", 14, " bg ", 4, 0);
str_replace(buffer, &len, &changes, " camino nuevo ", 14, " c n ", 5, 0);
str_replace(buffer, &len, &changes, " camino hondo ", 14, " c h ", 5, 0);
str_replace(buffer, &len, &changes, " urbanizacion ", 14, " urb ", 5, 0);
str_replace(buffer, &len, &changes, " camino viejo ", 14, " c v ", 5, 0);
str_replace(buffer, &len, &changes, " wielkopolska ", 14, " wlkp ", 6, 0);
str_replace(buffer, &len, &changes, " wojewodztwie ", 14, " woj ", 5, 0);
str_replace(buffer, &len, &changes, " county route ", 14, " cr ", 4, 0);
str_replace(buffer, &len, &changes, " prolongacion ", 14, " prol ", 6, 0);
str_replace(buffer, &len, &changes, " thoroughfare ", 14, " thor ", 6, 0);
str_replace(buffer, &len, &changes, " san van dong ", 14, " svd ", 5, 0);
str_replace(buffer, &len, &changes, " tong cong ty ", 14, " tct ", 5, 0);
str_replace(buffer, &len, &changes, " khu nghi mat ", 14, " knm ", 5, 0);
str_replace(buffer, &len, &changes, " nha thi dzu ", 13, " ntd ", 5, 0);
str_replace(buffer, &len, &changes, " khu du lich ", 13, " kdl ", 5, 0);
str_replace(buffer, &len, &changes, " demarcacion ", 13, " demar ", 7, 0);
str_replace(buffer, &len, &changes, " cau ldhc bo ", 13, " clb ", 5, 0);
str_replace(buffer, &len, &changes, " interchange ", 13, " intg ", 6, 0);
str_replace(buffer, &len, &changes, " distributor ", 13, " dstr ", 6, 0);
str_replace(buffer, &len, &changes, " state route ", 13, " sr ", 4, 0);
str_replace(buffer, &len, &changes, " wojewodztwo ", 13, " woj ", 5, 0);
str_replace(buffer, &len, &changes, " reservation ", 13, " res ", 5, 0);
str_replace(buffer, &len, &changes, " monseigneur ", 13, " mgr ", 5, 0);
str_replace(buffer, &len, &changes, " transversal ", 13, " trval ", 7, 0);
str_replace(buffer, &len, &changes, " extrarradio ", 13, " extrr ", 7, 0);
str_replace(buffer, &len, &changes, " high school ", 13, " hs ", 4, 0);
str_replace(buffer, &len, &changes, " mazowieckie ", 13, " maz ", 5, 0);
str_replace(buffer, &len, &changes, " residencial ", 13, " resid ", 7, 0);
str_replace(buffer, &len, &changes, " cong truong ", 13, " ct ", 4, 0);
str_replace(buffer, &len, &changes, " cooperativa ", 13, " coop ", 6, 0);
str_replace(buffer, &len, &changes, " diseminado ", 12, " disem ", 7, 0);
str_replace(buffer, &len, &changes, " barranquil ", 12, " bqllo ", 7, 0);
str_replace(buffer, &len, &changes, " fire track ", 12, " ftrk ", 6, 0);
str_replace(buffer, &len, &changes, " south east ", 12, " se ", 4, 0);
str_replace(buffer, &len, &changes, " north east ", 12, " ne ", 4, 0);
str_replace(buffer, &len, &changes, " university ", 12, " univ ", 6, 0);
str_replace(buffer, &len, &changes, " south west ", 12, " sw ", 4, 0);
str_replace(buffer, &len, &changes, " monasterio ", 12, " mtrio ", 7, 0);
str_replace(buffer, &len, &changes, " vecindario ", 12, " vecin ", 7, 0);
str_replace(buffer, &len, &changes, " carreterin ", 12, " ctrin ", 7, 0);
str_replace(buffer, &len, &changes, " callejuela ", 12, " cjla ", 6, 0);
str_replace(buffer, &len, &changes, " north-east ", 12, " ne ", 4, 0);
str_replace(buffer, &len, &changes, " south-west ", 12, " sw ", 4, 0);
str_replace(buffer, &len, &changes, " gebroeders ", 12, " gebr ", 6, 0);
str_replace(buffer, &len, &changes, " serviceway ", 12, " swy ", 5, 0);
str_replace(buffer, &len, &changes, " quadrangle ", 12, " qdgl ", 6, 0);
str_replace(buffer, &len, &changes, " commandant ", 12, " cmdt ", 6, 0);
str_replace(buffer, &len, &changes, " extramuros ", 12, " extrm ", 7, 0);
str_replace(buffer, &len, &changes, " escalinata ", 12, " escal ", 7, 0);
str_replace(buffer, &len, &changes, " north-west ", 12, " n ", 3, 0);
str_replace(buffer, &len, &changes, " bulevardul ", 12, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " particular ", 12, " parti ", 7, 0);
str_replace(buffer, &len, &changes, " mazowiecka ", 12, " maz ", 5, 0);
str_replace(buffer, &len, &changes, " mazowiecki ", 12, " maz ", 5, 0);
str_replace(buffer, &len, &changes, " north west ", 12, " n ", 3, 0);
str_replace(buffer, &len, &changes, " industrial ", 12, " ind ", 5, 0);
str_replace(buffer, &len, &changes, " costanilla ", 12, " cstan ", 7, 0);
str_replace(buffer, &len, &changes, " khach sdhn ", 12, " ks ", 4, 0);
str_replace(buffer, &len, &changes, " south-east ", 12, " se ", 4, 0);
str_replace(buffer, &len, &changes, " phi truong ", 12, " pt ", 4, 0);
str_replace(buffer, &len, &changes, " expressway ", 12, " exp ", 5, 0);
str_replace(buffer, &len, &changes, " fondamenta ", 12, " f ta ", 6, 0);
str_replace(buffer, &len, &changes, " apartments ", 12, " apts ", 6, 0);
str_replace(buffer, &len, &changes, " cul de sac ", 12, " cds ", 5, 0);
str_replace(buffer, &len, &changes, " corralillo ", 12, " crrlo ", 7, 0);
str_replace(buffer, &len, &changes, " mitropolit ", 12, " mit ", 5, 0);
str_replace(buffer, &len, &changes, " etorbidea ", 11, " etorb ", 7, 0);
str_replace(buffer, &len, &changes, " ploshchad ", 11, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " cobertizo ", 11, " cbtiz ", 7, 0);
str_replace(buffer, &len, &changes, " underpass ", 11, " upas ", 6, 0);
str_replace(buffer, &len, &changes, " crossroad ", 11, " crd ", 5, 0);
str_replace(buffer, &len, &changes, " fundatura ", 11, " fnd ", 5, 0);
str_replace(buffer, &len, &changes, " foreshore ", 11, " fshr ", 6, 0);
str_replace(buffer, &len, &changes, " parklands ", 11, " pkld ", 6, 0);
str_replace(buffer, &len, &changes, " esplanade ", 11, " esp ", 5, 0);
str_replace(buffer, &len, &changes, " centreway ", 11, " cnwy ", 6, 0);
str_replace(buffer, &len, &changes, " formation ", 11, " form ", 6, 0);
str_replace(buffer, &len, &changes, " explanada ", 11, " expla ", 7, 0);
str_replace(buffer, &len, &changes, " viviendas ", 11, " vvdas ", 7, 0);
str_replace(buffer, &len, &changes, " northeast ", 11, " ne ", 4, 0);
str_replace(buffer, &len, &changes, " cong vien ", 11, " cv ", 4, 0);
str_replace(buffer, &len, &changes, " northwest ", 11, " n ", 3, 0);
str_replace(buffer, &len, &changes, " buildings ", 11, " bldgs ", 7, 0);
str_replace(buffer, &len, &changes, " errepidea ", 11, " err ", 5, 0);
str_replace(buffer, &len, &changes, " extension ", 11, " ex ", 4, 0);
str_replace(buffer, &len, &changes, " municipal ", 11, " mun ", 5, 0);
str_replace(buffer, &len, &changes, " southeast ", 11, " se ", 4, 0);
str_replace(buffer, &len, &changes, " sanatorio ", 11, " sanat ", 7, 0);
str_replace(buffer, &len, &changes, " thanh pho ", 11, " tp ", 4, 0);
str_replace(buffer, &len, &changes, " firetrail ", 11, " fit ", 5, 0);
str_replace(buffer, &len, &changes, " santuario ", 11, " santu ", 7, 0);
str_replace(buffer, &len, &changes, " southwest ", 11, " sw ", 4, 0);
str_replace(buffer, &len, &changes, " autopista ", 11, " auto ", 6, 0);
str_replace(buffer, &len, &changes, " president ", 11, " pres ", 6, 0);
str_replace(buffer, &len, &changes, " rinconada ", 11, " rcda ", 6, 0);
str_replace(buffer, &len, &changes, " kardinaal ", 11, " kard ", 6, 0);
str_replace(buffer, &len, &changes, " plazoleta ", 11, " pzta ", 6, 0);
str_replace(buffer, &len, &changes, " duong sat ", 11, " ds ", 4, 0);
str_replace(buffer, &len, &changes, " trung tam ", 11, " tt ", 4, 0);
str_replace(buffer, &len, &changes, " piazzetta ", 11, " pta ", 5, 0);
str_replace(buffer, &len, &changes, " boardwalk ", 11, " bwlk ", 6, 0);
str_replace(buffer, &len, &changes, " bulievard ", 11, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " luitenant ", 11, " luit ", 6, 0);
str_replace(buffer, &len, &changes, " courtyard ", 11, " ctyd ", 6, 0);
str_replace(buffer, &len, &changes, " reservoir ", 11, " res ", 5, 0);
str_replace(buffer, &len, &changes, " bulevardu ", 11, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " community ", 11, " comm ", 6, 0);
str_replace(buffer, &len, &changes, " concourse ", 11, " con ", 5, 0);
str_replace(buffer, &len, &changes, " profiesor ", 11, " prof ", 6, 0);
str_replace(buffer, &len, &changes, " promenade ", 11, " prom ", 6, 0);
str_replace(buffer, &len, &changes, " gienieral ", 11, " ghien ", 7, 0);
str_replace(buffer, &len, &changes, " puistikko ", 11, " pko ", 5, 0);
str_replace(buffer, &len, &changes, " balneario ", 11, " balnr ", 7, 0);
str_replace(buffer, &len, &changes, " carretera ", 11, " ctra ", 6, 0);
str_replace(buffer, &len, &changes, " ingenieur ", 11, " ir ", 4, 0);
str_replace(buffer, &len, &changes, " boulevard ", 11, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " deviation ", 11, " devn ", 6, 0);
str_replace(buffer, &len, &changes, " hipodromo ", 11, " hipod ", 7, 0);
str_replace(buffer, &len, &changes, " professor ", 11, " prof ", 6, 0);
str_replace(buffer, &len, &changes, " triangle ", 10, " tri ", 5, 0);
str_replace(buffer, &len, &changes, " dotsient ", 10, " dots ", 6, 0);
str_replace(buffer, &len, &changes, " boundary ", 10, " bdy ", 5, 0);
str_replace(buffer, &len, &changes, " salizada ", 10, " s da ", 6, 0);
str_replace(buffer, &len, &changes, " trunkway ", 10, " tkwy ", 6, 0);
str_replace(buffer, &len, &changes, " cinturon ", 10, " cint ", 6, 0);
str_replace(buffer, &len, &changes, "president ", 10, " pres ", 6, 0);
str_replace(buffer, &len, &changes, " military ", 10, " mil ", 5, 0);
str_replace(buffer, &len, &changes, " jonkheer ", 10, " jhr ", 5, 0);
str_replace(buffer, &len, &changes, " motorway ", 10, " mwy ", 5, 0);
str_replace(buffer, &len, &changes, " steenweg ", 10, " stwg ", 6, 0);
str_replace(buffer, &len, &changes, " crescent ", 10, " cr ", 4, 0);
str_replace(buffer, &len, &changes, " kanunnik ", 10, " kan ", 5, 0);
str_replace(buffer, &len, &changes, " koningin ", 10, " kon ", 5, 0);
str_replace(buffer, &len, &changes, " crossing ", 10, " xing ", 6, 0);
str_replace(buffer, &len, &changes, " callejon ", 10, " cjon ", 6, 0);
str_replace(buffer, &len, &changes, " pasadizo ", 10, " pzo ", 5, 0);
str_replace(buffer, &len, &changes, " crossway ", 10, " cowy ", 6, 0);
str_replace(buffer, &len, &changes, " cottages ", 10, " cotts ", 7, 0);
str_replace(buffer, &len, &changes, " mountain ", 10, " mtn ", 5, 0);
str_replace(buffer, &len, &changes, " business ", 10, " bus ", 5, 0);
str_replace(buffer, &len, &changes, " pierwszy ", 10, " 1 ", 3, 0);
str_replace(buffer, &len, &changes, " pierwsza ", 10, " 1 ", 3, 0);
str_replace(buffer, &len, &changes, " pierwsze ", 10, " 1 ", 3, 0);
str_replace(buffer, &len, &changes, " barriada ", 10, " barda ", 7, 0);
str_replace(buffer, &len, &changes, " entrance ", 10, " ent ", 5, 0);
str_replace(buffer, &len, &changes, " causeway ", 10, " cway ", 6, 0);
str_replace(buffer, &len, &changes, " generaal ", 10, " gen ", 5, 0);
str_replace(buffer, &len, &changes, " driveway ", 10, " dvwy ", 6, 0);
str_replace(buffer, &len, &changes, " township ", 10, " twp ", 5, 0);
str_replace(buffer, &len, &changes, " stazione ", 10, " staz ", 6, 0);
str_replace(buffer, &len, &changes, " broadway ", 10, " bway ", 6, 0);
str_replace(buffer, &len, &changes, " alleyway ", 10, " alwy ", 6, 0);
str_replace(buffer, &len, &changes, " quadrant ", 10, " qdrt ", 6, 0);
str_replace(buffer, &len, &changes, " apeadero ", 10, " apdro ", 7, 0);
str_replace(buffer, &len, &changes, " arboleda ", 10, " arb ", 5, 0);
str_replace(buffer, &len, &changes, " escalera ", 10, " esca ", 6, 0);
str_replace(buffer, &len, &changes, " rdhp hat ", 10, " rh ", 4, 0);
str_replace(buffer, &len, &changes, " transito ", 10, " trans ", 7, 0);
str_replace(buffer, &len, &changes, " ddhi hoc ", 10, " dh ", 4, 0);
str_replace(buffer, &len, &changes, " travesia ", 10, " trva ", 6, 0);
str_replace(buffer, &len, &changes, " barranco ", 10, " branc ", 7, 0);
str_replace(buffer, &len, &changes, " namestie ", 10, " nam ", 5, 0);
str_replace(buffer, &len, &changes, " viaducto ", 10, " vcto ", 6, 0);
str_replace(buffer, &len, &changes, " convento ", 10, " cnvto ", 7, 0);
str_replace(buffer, &len, &changes, " estacion ", 10, " estcn ", 7, 0);
str_replace(buffer, &len, &changes, "puistikko ", 10, " pko ", 5, 0);
str_replace(buffer, &len, &changes, " precinct ", 10, " pct ", 5, 0);
str_replace(buffer, &len, &changes, " heiligen ", 10, " hl ", 4, 0);
str_replace(buffer, &len, &changes, " edificio ", 10, " edifc ", 7, 0);
str_replace(buffer, &len, &changes, " prazuela ", 10, " przla ", 7, 0);
str_replace(buffer, &len, &changes, " thi trzn ", 10, " tt ", 4, 0);
str_replace(buffer, &len, &changes, " ridgeway ", 10, " rgwy ", 6, 0);
str_replace(buffer, &len, &changes, " riverway ", 10, " rvwy ", 6, 0);
str_replace(buffer, &len, &changes, " corredor ", 10, " crrdo ", 7, 0);
str_replace(buffer, &len, &changes, " passatge ", 10, " ptge ", 6, 0);
str_replace(buffer, &len, &changes, " junction ", 10, " jnc ", 5, 0);
str_replace(buffer, &len, &changes, " hospital ", 10, " hosp ", 6, 0);
str_replace(buffer, &len, &changes, " highroad ", 10, " hrd ", 5, 0);
str_replace(buffer, &len, &changes, " torrente ", 10, " trrnt ", 7, 0);
str_replace(buffer, &len, &changes, " avinguda ", 10, " av ", 4, 0);
str_replace(buffer, &len, &changes, " portillo ", 10, " ptilo ", 7, 0);
str_replace(buffer, &len, &changes, " diagonal ", 10, " diag ", 6, 0);
str_replace(buffer, &len, &changes, " buu dien ", 10, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " alqueria ", 10, " alque ", 7, 0);
str_replace(buffer, &len, &changes, " poligono ", 10, " polig ", 7, 0);
str_replace(buffer, &len, &changes, " roadside ", 10, " rdsd ", 6, 0);
str_replace(buffer, &len, &changes, " glorieta ", 10, " gta ", 5, 0);
str_replace(buffer, &len, &changes, " fundacul ", 10, " fdc ", 5, 0);
str_replace(buffer, &len, &changes, " cao dang ", 10, " cd ", 4, 0);
str_replace(buffer, &len, &changes, " rosebowl ", 10, " rsbl ", 6, 0);
str_replace(buffer, &len, &changes, " complejo ", 10, " compj ", 7, 0);
str_replace(buffer, &len, &changes, " carretil ", 10, " crtil ", 7, 0);
str_replace(buffer, &len, &changes, " intrarea ", 10, " int ", 5, 0);
str_replace(buffer, &len, &changes, " gran via ", 10, " g v ", 5, 0);
str_replace(buffer, &len, &changes, " approach ", 10, " app ", 5, 0);
str_replace(buffer, &len, &changes, " stradela ", 10, " sdla ", 6, 0);
str_replace(buffer, &len, &changes, " conjunto ", 10, " cjto ", 6, 0);
str_replace(buffer, &len, &changes, " arterial ", 10, " artl ", 6, 0);
str_replace(buffer, &len, &changes, " plazuela ", 10, " plzla ", 7, 0);
str_replace(buffer, &len, &changes, " frontage ", 10, " frtg ", 6, 0);
str_replace(buffer, &len, &changes, " faubourg ", 10, " fg ", 4, 0);
str_replace(buffer, &len, &changes, " mansions ", 10, " mans ", 6, 0);
str_replace(buffer, &len, &changes, " turnpike ", 10, " tpk ", 5, 0);
str_replace(buffer, &len, &changes, " piazzale ", 10, " p le ", 6, 0);
str_replace(buffer, &len, &changes, " tieu hoc ", 10, " th ", 4, 0);
str_replace(buffer, &len, &changes, " bulevard ", 10, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " sendera ", 9, " sedra ", 7, 0);
str_replace(buffer, &len, &changes, " cutting ", 9, " cutt ", 6, 0);
str_replace(buffer, &len, &changes, " cantina ", 9, " canti ", 7, 0);
str_replace(buffer, &len, &changes, " cantera ", 9, " cantr ", 7, 0);
str_replace(buffer, &len, &changes, " rotonda ", 9, " rtda ", 6, 0);
str_replace(buffer, &len, &changes, " pasillo ", 9, " psllo ", 7, 0);
str_replace(buffer, &len, &changes, " landing ", 9, " ldg ", 5, 0);
str_replace(buffer, &len, &changes, " kolonel ", 9, " kol ", 5, 0);
str_replace(buffer, &len, &changes, " cong ty ", 9, " cty ", 5, 0);
str_replace(buffer, &len, &changes, " fairway ", 9, " fawy ", 6, 0);
str_replace(buffer, &len, &changes, " highway ", 9, " hwy ", 5, 0);
str_replace(buffer, &len, &changes, " lookout ", 9, " lkt ", 5, 0);
str_replace(buffer, &len, &changes, " meander ", 9, " mr ", 4, 0);
str_replace(buffer, &len, &changes, " carrera ", 9, " cra ", 5, 0);
str_replace(buffer, &len, &changes, " station ", 9, " stn ", 5, 0);
str_replace(buffer, &len, &changes, " kapitan ", 9, " kap ", 5, 0);
str_replace(buffer, &len, &changes, " medical ", 9, " med ", 5, 0);
str_replace(buffer, &len, &changes, " broeder ", 9, " br ", 4, 0);
str_replace(buffer, &len, &changes, " poblado ", 9, " pbdo ", 6, 0);
str_replace(buffer, &len, &changes, " impasse ", 9, " imp ", 5, 0);
str_replace(buffer, &len, &changes, " gardens ", 9, " gdn ", 5, 0);
str_replace(buffer, &len, &changes, " nha tho ", 9, " nt ", 4, 0);
str_replace(buffer, &len, &changes, " nha hat ", 9, " nh ", 4, 0);
str_replace(buffer, &len, &changes, " freeway ", 9, " fwy ", 5, 0);
str_replace(buffer, &len, &changes, " trasera ", 9, " tras ", 6, 0);
str_replace(buffer, &len, &changes, " portico ", 9, " prtco ", 7, 0);
str_replace(buffer, &len, &changes, " terrace ", 9, " ter ", 5, 0);
str_replace(buffer, &len, &changes, " heights ", 9, " hts ", 5, 0);
str_replace(buffer, &len, &changes, " camping ", 9, " campg ", 7, 0);
str_replace(buffer, &len, &changes, " callizo ", 9, " cllzo ", 7, 0);
str_replace(buffer, &len, &changes, " footway ", 9, " ftwy ", 6, 0);
str_replace(buffer, &len, &changes, " calzada ", 9, " czada ", 7, 0);
str_replace(buffer, &len, &changes, " dominee ", 9, " ds ", 4, 0);
str_replace(buffer, &len, &changes, " meadows ", 9, " mdws ", 6, 0);
str_replace(buffer, &len, &changes, " sendero ", 9, " send ", 6, 0);
str_replace(buffer, &len, &changes, " osiedle ", 9, " os ", 4, 0);
str_replace(buffer, &len, &changes, " estrada ", 9, " estda ", 7, 0);
str_replace(buffer, &len, &changes, " avenida ", 9, " av ", 4, 0);
str_replace(buffer, &len, &changes, " zgornji ", 9, " zg ", 4, 0);
str_replace(buffer, &len, &changes, " zgornje ", 9, " zg ", 4, 0);
str_replace(buffer, &len, &changes, " zgornja ", 9, " zg ", 4, 0);
str_replace(buffer, &len, &changes, " arrabal ", 9, " arral ", 7, 0);
str_replace(buffer, &len, &changes, " espalda ", 9, " eslda ", 7, 0);
str_replace(buffer, &len, &changes, " entrada ", 9, " entd ", 6, 0);
str_replace(buffer, &len, &changes, " kleiner ", 9, " kl ", 4, 0);
str_replace(buffer, &len, &changes, " kleines ", 9, " kl ", 4, 0);
str_replace(buffer, &len, &changes, " viaduct ", 9, " via ", 5, 0);
str_replace(buffer, &len, &changes, " roadway ", 9, " rdwy ", 6, 0);
str_replace(buffer, &len, &changes, " strasse ", 9, " st ", 4, 0);
str_replace(buffer, &len, &changes, " spodnje ", 9, " sp ", 4, 0);
str_replace(buffer, &len, &changes, " spodnji ", 9, " sp ", 4, 0);
str_replace(buffer, &len, &changes, " spodnja ", 9, " sp ", 4, 0);
str_replace(buffer, &len, &changes, " fabrica ", 9, " fca ", 5, 0);
str_replace(buffer, &len, &changes, " muntele ", 9, " mt ", 4, 0);
str_replace(buffer, &len, &changes, " maantee ", 9, " mt ", 4, 0);
str_replace(buffer, &len, &changes, " srednje ", 9, " sr ", 4, 0);
str_replace(buffer, &len, &changes, " unterer ", 9, " u ", 3, 0);
str_replace(buffer, &len, &changes, " unteres ", 9, " u ", 3, 0);
str_replace(buffer, &len, &changes, " plateau ", 9, " plat ", 6, 0);
str_replace(buffer, &len, &changes, " srednji ", 9, " sr ", 4, 0);
str_replace(buffer, &len, &changes, " empresa ", 9, " empr ", 6, 0);
str_replace(buffer, &len, &changes, " angosta ", 9, " angta ", 7, 0);
str_replace(buffer, &len, &changes, " costera ", 9, " coste ", 7, 0);
str_replace(buffer, &len, &changes, " tinh lo ", 9, " tl ", 4, 0);
str_replace(buffer, &len, &changes, " quoc lo ", 9, " ql ", 4, 0);
str_replace(buffer, &len, &changes, " auf der ", 9, " a d ", 5, 0);
str_replace(buffer, &len, &changes, " bulvari ", 9, " bl ", 4, 0);
str_replace(buffer, &len, &changes, " ddhi lo ", 9, " dl ", 4, 0);
str_replace(buffer, &len, &changes, " namesti ", 9, " nam ", 5, 0);
str_replace(buffer, &len, &changes, " passeig ", 9, " pg ", 4, 0);
str_replace(buffer, &len, &changes, " carrero ", 9, " cro ", 5, 0);
str_replace(buffer, &len, &changes, " cortijo ", 9, " crtjo ", 7, 0);
str_replace(buffer, &len, &changes, " san bay ", 9, " sb ", 4, 0);
str_replace(buffer, &len, &changes, " riviera ", 9, " rvra ", 6, 0);
str_replace(buffer, &len, &changes, " caddesi ", 9, " cd ", 4, 0);
str_replace(buffer, &len, &changes, " andador ", 9, " andad ", 7, 0);
str_replace(buffer, &len, &changes, " walkway ", 9, " wkwy ", 6, 0);
str_replace(buffer, &len, &changes, " granden ", 9, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " grosser ", 9, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " grosses ", 9, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " reserve ", 9, " res ", 5, 0);
str_replace(buffer, &len, &changes, " alameda ", 9, " alam ", 6, 0);
str_replace(buffer, &len, &changes, " retreat ", 9, " rtt ", 5, 0);
str_replace(buffer, &len, &changes, " acequia ", 9, " aceq ", 6, 0);
str_replace(buffer, &len, &changes, " platsen ", 9, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " bahnhof ", 9, " bf ", 4, 0);
str_replace(buffer, &len, &changes, " autovia ", 9, " autov ", 7, 0);
str_replace(buffer, &len, &changes, " srednja ", 9, " sr ", 4, 0);
str_replace(buffer, &len, &changes, " galeria ", 9, " gale ", 6, 0);
str_replace(buffer, &len, &changes, " circuit ", 9, " cct ", 5, 0);
str_replace(buffer, &len, &changes, " svingen ", 9, " sv ", 4, 0);
str_replace(buffer, &len, &changes, " plassen ", 9, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " mirador ", 9, " mrdor ", 7, 0);
str_replace(buffer, &len, &changes, " laneway ", 9, " lnwy ", 6, 0);
str_replace(buffer, &len, &changes, " kolonia ", 9, " kol ", 5, 0);
str_replace(buffer, &len, &changes, " outlook ", 9, " otlk ", 6, 0);
str_replace(buffer, &len, &changes, " caravan ", 9, " cvn ", 5, 0);
str_replace(buffer, &len, &changes, " osiedlu ", 9, " os ", 4, 0);
str_replace(buffer, &len, &changes, " palacio ", 9, " palac ", 7, 0);
str_replace(buffer, &len, &changes, " pantano ", 9, " pant ", 6, 0);
str_replace(buffer, &len, &changes, " partida ", 9, " ptda ", 6, 0);
str_replace(buffer, &len, &changes, " calleja ", 9, " cllja ", 7, 0);
str_replace(buffer, &len, &changes, " mevrouw ", 9, " mevr ", 6, 0);
str_replace(buffer, &len, &changes, " meester ", 9, " mr ", 4, 0);
str_replace(buffer, &len, &changes, " pastoor ", 9, " past ", 6, 0);
str_replace(buffer, &len, &changes, " prinses ", 9, " pr ", 4, 0);
str_replace(buffer, &len, &changes, " bulevar ", 9, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " tollway ", 9, " tlwy ", 6, 0);
str_replace(buffer, &len, &changes, "steenweg ", 9, " stwg ", 6, 0);
str_replace(buffer, &len, &changes, " caserio ", 9, " csrio ", 7, 0);
str_replace(buffer, &len, &changes, " mercado ", 9, " merc ", 6, 0);
str_replace(buffer, &len, &changes, " alejach ", 9, " al ", 4, 0);
str_replace(buffer, &len, &changes, " kvartal ", 9, " kv ", 4, 0);
str_replace(buffer, &len, &changes, " parkway ", 9, " pwy ", 5, 0);
str_replace(buffer, &len, &changes, " passage ", 9, " ps ", 4, 0);
str_replace(buffer, &len, &changes, " pathway ", 9, " pway ", 6, 0);
str_replace(buffer, &len, &changes, " splaiul ", 9, " sp ", 4, 0);
str_replace(buffer, &len, &changes, " soseaua ", 9, " sos ", 5, 0);
str_replace(buffer, &len, &changes, " colonia ", 9, " col ", 5, 0);
str_replace(buffer, &len, &changes, " wielkie ", 9, " wlk ", 5, 0);
str_replace(buffer, &len, &changes, " trzecie ", 9, " 3 ", 3, 0);
str_replace(buffer, &len, &changes, " llanura ", 9, " llnra ", 7, 0);
str_replace(buffer, &len, &changes, " malecon ", 9, " malec ", 7, 0);
str_replace(buffer, &len, &changes, " trzecia ", 9, " 3 ", 3, 0);
str_replace(buffer, &len, &changes, " trailer ", 9, " trlr ", 6, 0);
str_replace(buffer, &len, &changes, " cuadra ", 8, " cuadr ", 7, 0);
str_replace(buffer, &len, &changes, " cty cp ", 8, " ctcp ", 6, 0);
str_replace(buffer, &len, &changes, " paraje ", 8, " praje ", 7, 0);
str_replace(buffer, &len, &changes, " parque ", 8, " pque ", 6, 0);
str_replace(buffer, &len, &changes, " piazza ", 8, " p za ", 6, 0);
str_replace(buffer, &len, &changes, " puerta ", 8, " pta ", 5, 0);
str_replace(buffer, &len, &changes, " little ", 8, " lt ", 4, 0);
str_replace(buffer, &len, &changes, " pueblo ", 8, " pblo ", 6, 0);
str_replace(buffer, &len, &changes, " puente ", 8, " pnte ", 6, 0);
str_replace(buffer, &len, &changes, " jardin ", 8, " jdin ", 6, 0);
str_replace(buffer, &len, &changes, " granja ", 8, " granj ", 7, 0);
str_replace(buffer, &len, &changes, " market ", 8, " mkt ", 5, 0);
str_replace(buffer, &len, &changes, " pasaje ", 8, " psaje ", 7, 0);
str_replace(buffer, &len, &changes, " rotary ", 8, " rty ", 5, 0);
str_replace(buffer, &len, &changes, " corral ", 8, " crral ", 7, 0);
str_replace(buffer, &len, &changes, " siding ", 8, " sdng ", 6, 0);
str_replace(buffer, &len, &changes, " nucleo ", 8, " ncleo ", 7, 0);
str_replace(buffer, &len, &changes, " muelle ", 8, " muell ", 7, 0);
str_replace(buffer, &len, &changes, " carril ", 8, " crril ", 7, 0);
str_replace(buffer, &len, &changes, " portal ", 8, " prtal ", 7, 0);
str_replace(buffer, &len, &changes, " ramble ", 8, " rmbl ", 6, 0);
str_replace(buffer, &len, &changes, " pocket ", 8, " pkt ", 5, 0);
str_replace(buffer, &len, &changes, " chalet ", 8, " chlet ", 7, 0);
str_replace(buffer, &len, &changes, " canton ", 8, " cant ", 6, 0);
str_replace(buffer, &len, &changes, " ladera ", 8, " ldera ", 7, 0);
str_replace(buffer, &len, &changes, " parade ", 8, " pde ", 5, 0);
str_replace(buffer, &len, &changes, " dehesa ", 8, " dhsa ", 6, 0);
str_replace(buffer, &len, &changes, " museum ", 8, " mus ", 5, 0);
str_replace(buffer, &len, &changes, " middle ", 8, " mid ", 5, 0);
str_replace(buffer, &len, &changes, " cuesta ", 8, " custa ", 7, 0);
str_replace(buffer, &len, &changes, " gracht ", 8, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " virful ", 8, " vf ", 4, 0);
str_replace(buffer, &len, &changes, " m tele ", 8, " mt ", 4, 0);
str_replace(buffer, &len, &changes, " varful ", 8, " vf ", 4, 0);
str_replace(buffer, &len, &changes, " str la ", 8, " sdla ", 6, 0);
str_replace(buffer, &len, &changes, " arcade ", 8, " arc ", 5, 0);
str_replace(buffer, &len, &changes, " strada ", 8, " st ", 4, 0);
str_replace(buffer, &len, &changes, " access ", 8, " accs ", 6, 0);
str_replace(buffer, &len, &changes, " bajada ", 8, " bjada ", 7, 0);
str_replace(buffer, &len, &changes, " veliki ", 8, " v ", 3, 0);
str_replace(buffer, &len, &changes, "strasse ", 8, " st ", 4, 0);
str_replace(buffer, &len, &changes, " velike ", 8, " v ", 3, 0);
str_replace(buffer, &len, &changes, " untere ", 8, " u ", 3, 0);
str_replace(buffer, &len, &changes, " velika ", 8, " v ", 3, 0);
str_replace(buffer, &len, &changes, " artery ", 8, " arty ", 6, 0);
str_replace(buffer, &len, &changes, " avenue ", 8, " av ", 4, 0);
str_replace(buffer, &len, &changes, " miasto ", 8, " m ", 3, 0);
str_replace(buffer, &len, &changes, " bypass ", 8, " byp ", 5, 0);
str_replace(buffer, &len, &changes, " placem ", 8, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " barrio ", 8, " bo ", 4, 0);
str_replace(buffer, &len, &changes, " center ", 8, " ctr ", 5, 0);
str_replace(buffer, &len, &changes, " bldngs ", 8, " bldgs ", 7, 0);
str_replace(buffer, &len, &changes, " puerto ", 8, " pto ", 5, 0);
str_replace(buffer, &len, &changes, " wielka ", 8, " wlk ", 5, 0);
str_replace(buffer, &len, &changes, " tunnel ", 8, " tun ", 5, 0);
str_replace(buffer, &len, &changes, " wielki ", 8, " wlk ", 5, 0);
str_replace(buffer, &len, &changes, " bridge ", 8, " bri ", 5, 0);
str_replace(buffer, &len, &changes, " trzeci ", 8, " 3 ", 3, 0);
str_replace(buffer, &len, &changes, " veliko ", 8, " v ", 3, 0);
str_replace(buffer, &len, &changes, " quelle ", 8, " qu ", 4, 0);
str_replace(buffer, &len, &changes, " acceso ", 8, " acces ", 7, 0);
str_replace(buffer, &len, &changes, " bulvar ", 8, " bl ", 4, 0);
str_replace(buffer, &len, &changes, " sokagi ", 8, " sk ", 4, 0);
str_replace(buffer, &len, &changes, "platsen ", 8, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " stigen ", 8, " st ", 4, 0);
str_replace(buffer, &len, &changes, " brucke ", 8, " br ", 4, 0);
str_replace(buffer, &len, &changes, " an der ", 8, " a d ", 5, 0);
str_replace(buffer, &len, &changes, " thi xa ", 8, " tx ", 4, 0);
str_replace(buffer, &len, &changes, " nordre ", 8, " ndr ", 5, 0);
str_replace(buffer, &len, &changes, " rambla ", 8, " rbla ", 6, 0);
str_replace(buffer, &len, &changes, " sondre ", 8, " sdr ", 5, 0);
str_replace(buffer, &len, &changes, "quoc lo ", 8, " ql ", 4, 0);
str_replace(buffer, &len, &changes, " phuong ", 8, " p ", 3, 0);
str_replace(buffer, &len, &changes, " vastra ", 8, " v ", 3, 0);
str_replace(buffer, &len, &changes, " carrer ", 8, " c ", 3, 0);
str_replace(buffer, &len, &changes, " oberes ", 8, " o ", 3, 0);
str_replace(buffer, &len, &changes, " raitti ", 8, " r ", 3, 0);
str_replace(buffer, &len, &changes, " puisto ", 8, " ps ", 4, 0);
str_replace(buffer, &len, &changes, " arroyo ", 8, " arry ", 6, 0);
str_replace(buffer, &len, &changes, " penger ", 8, " pgr ", 5, 0);
str_replace(buffer, &len, &changes, " oberer ", 8, " o ", 3, 0);
str_replace(buffer, &len, &changes, " kleine ", 8, " kl ", 4, 0);
str_replace(buffer, &len, &changes, " grosse ", 8, " gr ", 4, 0);
str_replace(buffer, &len, &changes, "granden ", 8, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " villas ", 8, " vlls ", 6, 0);
str_replace(buffer, &len, &changes, " taival ", 8, " tvl ", 5, 0);
str_replace(buffer, &len, &changes, " in der ", 8, " i d ", 5, 0);
str_replace(buffer, &len, &changes, " centre ", 8, " ctr ", 5, 0);
str_replace(buffer, &len, &changes, " drugie ", 8, " 2 ", 3, 0);
str_replace(buffer, &len, &changes, " dokter ", 8, " dr ", 4, 0);
str_replace(buffer, &len, &changes, " grange ", 8, " gra ", 5, 0);
str_replace(buffer, &len, &changes, " doctor ", 8, " dr ", 4, 0);
str_replace(buffer, &len, &changes, " vicolo ", 8, " v lo ", 6, 0);
str_replace(buffer, &len, &changes, " kort e ", 8, " k ", 3, 0);
str_replace(buffer, &len, &changes, " koning ", 8, " kon ", 5, 0);
str_replace(buffer, &len, &changes, " straat ", 8, " st ", 4, 0);
str_replace(buffer, &len, &changes, " svieti ", 8, " sv ", 4, 0);
str_replace(buffer, &len, &changes, " callej ", 8, " cjon ", 6, 0);
str_replace(buffer, &len, &changes, " ground ", 8, " grnd ", 6, 0);
str_replace(buffer, &len, &changes, " vereda ", 8, " vreda ", 7, 0);
str_replace(buffer, &len, &changes, " chemin ", 8, " ch ", 4, 0);
str_replace(buffer, &len, &changes, " street ", 8, " st ", 4, 0);
str_replace(buffer, &len, &changes, " strand ", 8, " st ", 4, 0);
str_replace(buffer, &len, &changes, " sainte ", 8, " ste ", 5, 0);
str_replace(buffer, &len, &changes, " camino ", 8, " cno ", 5, 0);
str_replace(buffer, &len, &changes, " garden ", 8, " gdn ", 5, 0);
str_replace(buffer, &len, &changes, " follow ", 8, " folw ", 6, 0);
str_replace(buffer, &len, &changes, " estate ", 8, " est ", 5, 0);
str_replace(buffer, &len, &changes, " doktor ", 8, " d r ", 5, 0);
str_replace(buffer, &len, &changes, " subway ", 8, " sbwy ", 6, 0);
str_replace(buffer, &len, &changes, " ulitsa ", 8, " ul ", 4, 0);
str_replace(buffer, &len, &changes, " square ", 8, " sq ", 4, 0);
str_replace(buffer, &len, &changes, " towers ", 8, " twrs ", 6, 0);
str_replace(buffer, &len, &changes, "plassen ", 8, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " county ", 8, " co ", 4, 0);
str_replace(buffer, &len, &changes, " brazal ", 8, " brzal ", 7, 0);
str_replace(buffer, &len, &changes, " circus ", 8, " crcs ", 6, 0);
str_replace(buffer, &len, &changes, "svingen ", 8, " sv ", 4, 0);
str_replace(buffer, &len, &changes, " rampla ", 8, " rampa ", 7, 0);
str_replace(buffer, &len, &changes, " bloque ", 8, " blque ", 7, 0);
str_replace(buffer, &len, &changes, " circle ", 8, " cir ", 5, 0);
str_replace(buffer, &len, &changes, " island ", 8, " is ", 4, 0);
str_replace(buffer, &len, &changes, " common ", 8, " comm ", 6, 0);
str_replace(buffer, &len, &changes, " ribera ", 8, " rbra ", 6, 0);
str_replace(buffer, &len, &changes, " sector ", 8, " sect ", 6, 0);
str_replace(buffer, &len, &changes, " rincon ", 8, " rcon ", 6, 0);
str_replace(buffer, &len, &changes, " van de ", 8, " vd ", 4, 0);
str_replace(buffer, &len, &changes, " corner ", 8, " cnr ", 5, 0);
str_replace(buffer, &len, &changes, " subida ", 8, " sbida ", 7, 0);
str_replace(buffer, &len, &changes, " banda ", 7, " b ", 3, 0);
str_replace(buffer, &len, &changes, " bulev ", 7, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " barro ", 7, " bo ", 4, 0);
str_replace(buffer, &len, &changes, " cllon ", 7, " cjon ", 6, 0);
str_replace(buffer, &len, &changes, " p zza ", 7, " p za ", 6, 0);
str_replace(buffer, &len, &changes, " drugi ", 7, " 2 ", 3, 0);
str_replace(buffer, &len, &changes, " druga ", 7, " 2 ", 3, 0);
str_replace(buffer, &len, &changes, " placu ", 7, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " aleji ", 7, " al ", 4, 0);
str_replace(buffer, &len, &changes, " aleja ", 7, " al ", 4, 0);
str_replace(buffer, &len, &changes, " aleje ", 7, " al ", 4, 0);
str_replace(buffer, &len, &changes, " stary ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " stara ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " dolny ", 7, " dln ", 5, 0);
str_replace(buffer, &len, &changes, " dolna ", 7, " dln ", 5, 0);
str_replace(buffer, &len, &changes, " gorne ", 7, " gn ", 4, 0);
str_replace(buffer, &len, &changes, " gorna ", 7, " gn ", 4, 0);
str_replace(buffer, &len, &changes, " stare ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " gorny ", 7, " gn ", 4, 0);
str_replace(buffer, &len, &changes, " ulicy ", 7, " ul ", 4, 0);
str_replace(buffer, &len, &changes, " ulica ", 7, " ul ", 4, 0);
str_replace(buffer, &len, &changes, " o l v ", 7, " olv ", 5, 0);
str_replace(buffer, &len, &changes, " plein ", 7, " pln ", 5, 0);
str_replace(buffer, &len, &changes, " markt ", 7, " mkt ", 5, 0);
str_replace(buffer, &len, &changes, " lange ", 7, " l ", 3, 0);
str_replace(buffer, &len, &changes, " viale ", 7, " v le ", 6, 0);
str_replace(buffer, &len, &changes, "gracht ", 7, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " prins ", 7, " pr ", 4, 0);
str_replace(buffer, &len, &changes, "straat ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " plass ", 7, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " sving ", 7, " sv ", 4, 0);
str_replace(buffer, &len, &changes, " gaten ", 7, " g ", 3, 0);
str_replace(buffer, &len, &changes, " veien ", 7, " v ", 3, 0);
str_replace(buffer, &len, &changes, " vliet ", 7, " vlt ", 5, 0);
str_replace(buffer, &len, &changes, " dolne ", 7, " dln ", 5, 0);
str_replace(buffer, &len, &changes, " b dul ", 7, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " sodra ", 7, " s ", 3, 0);
str_replace(buffer, &len, &changes, " norra ", 7, " n ", 3, 0);
str_replace(buffer, &len, &changes, " gamla ", 7, " gla ", 5, 0);
str_replace(buffer, &len, &changes, " grand ", 7, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " vagen ", 7, " v ", 3, 0);
str_replace(buffer, &len, &changes, " gatan ", 7, " g ", 3, 0);
str_replace(buffer, &len, &changes, " ostra ", 7, " o ", 3, 0);
str_replace(buffer, &len, &changes, "vastra ", 7, " v ", 3, 0);
str_replace(buffer, &len, &changes, " cadde ", 7, " cd ", 4, 0);
str_replace(buffer, &len, &changes, " duong ", 7, " d ", 3, 0);
str_replace(buffer, &len, &changes, " sokak ", 7, " sk ", 4, 0);
str_replace(buffer, &len, &changes, " plats ", 7, " pl ", 4, 0);
str_replace(buffer, &len, &changes, "stigen ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " vayla ", 7, " vla ", 5, 0);
str_replace(buffer, &len, &changes, "taival ", 7, " tvl ", 5, 0);
str_replace(buffer, &len, &changes, " sveti ", 7, " sv ", 4, 0);
str_replace(buffer, &len, &changes, " aukio ", 7, " auk ", 5, 0);
str_replace(buffer, &len, &changes, " sveta ", 7, " sv ", 4, 0);
str_replace(buffer, &len, &changes, " cesta ", 7, " c ", 3, 0);
str_replace(buffer, &len, &changes, " piata ", 7, " pta ", 5, 0);
str_replace(buffer, &len, &changes, " aleea ", 7, " al ", 4, 0);
str_replace(buffer, &len, &changes, " kaari ", 7, " kri ", 5, 0);
str_replace(buffer, &len, &changes, "penger ", 7, " pgr ", 5, 0);
str_replace(buffer, &len, &changes, " ranta ", 7, " rt ", 4, 0);
str_replace(buffer, &len, &changes, " rinne ", 7, " rn ", 4, 0);
str_replace(buffer, &len, &changes, "raitti ", 7, " r ", 3, 0);
str_replace(buffer, &len, &changes, "puisto ", 7, " ps ", 4, 0);
str_replace(buffer, &len, &changes, " polku ", 7, " p ", 3, 0);
str_replace(buffer, &len, &changes, " porta ", 7, " pta ", 5, 0);
str_replace(buffer, &len, &changes, " ponte ", 7, " p te ", 6, 0);
str_replace(buffer, &len, &changes, " paseo ", 7, " po ", 4, 0);
str_replace(buffer, &len, &changes, " fbrca ", 7, " fca ", 5, 0);
str_replace(buffer, &len, &changes, " allee ", 7, " al ", 4, 0);
str_replace(buffer, &len, &changes, " cours ", 7, " crs ", 5, 0);
str_replace(buffer, &len, &changes, "sainte ", 7, " ste ", 5, 0);
str_replace(buffer, &len, &changes, "square ", 7, " sq ", 4, 0);
str_replace(buffer, &len, &changes, " largo ", 7, " l go ", 6, 0);
str_replace(buffer, &len, &changes, " wharf ", 7, " whrf ", 6, 0);
str_replace(buffer, &len, &changes, " corte ", 7, " c te ", 6, 0);
str_replace(buffer, &len, &changes, " corso ", 7, " c so ", 6, 0);
str_replace(buffer, &len, &changes, " campo ", 7, " c po ", 6, 0);
str_replace(buffer, &len, &changes, " santa ", 7, " sta ", 5, 0);
str_replace(buffer, &len, &changes, " calle ", 7, " c ", 3, 0);
str_replace(buffer, &len, &changes, " strip ", 7, " strp ", 6, 0);
str_replace(buffer, &len, &changes, " alley ", 7, " al ", 4, 0);
str_replace(buffer, &len, &changes, " north ", 7, " n ", 3, 0);
str_replace(buffer, &len, &changes, " block ", 7, " blk ", 5, 0);
str_replace(buffer, &len, &changes, " gully ", 7, " gly ", 5, 0);
str_replace(buffer, &len, &changes, " sielo ", 7, " s ", 3, 0);
str_replace(buffer, &len, &changes, " brace ", 7, " br ", 4, 0);
str_replace(buffer, &len, &changes, " ronde ", 7, " rnde ", 6, 0);
str_replace(buffer, &len, &changes, " grove ", 7, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " break ", 7, " brk ", 5, 0);
str_replace(buffer, &len, &changes, " roads ", 7, " rds ", 5, 0);
str_replace(buffer, &len, &changes, " track ", 7, " trk ", 5, 0);
str_replace(buffer, &len, &changes, " house ", 7, " ho ", 4, 0);
str_replace(buffer, &len, &changes, " trail ", 7, " trl ", 5, 0);
str_replace(buffer, &len, &changes, " mount ", 7, " mt ", 4, 0);
str_replace(buffer, &len, &changes, " cross ", 7, " crss ", 6, 0);
str_replace(buffer, &len, &changes, " beach ", 7, " bch ", 5, 0);
str_replace(buffer, &len, &changes, " point ", 7, " pt ", 4, 0);
str_replace(buffer, &len, &changes, " basin ", 7, " basn ", 6, 0);
str_replace(buffer, &len, &changes, " green ", 7, " gn ", 4, 0);
str_replace(buffer, &len, &changes, " plaza ", 7, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " lille ", 7, " ll ", 4, 0);
str_replace(buffer, &len, &changes, " slope ", 7, " slpe ", 6, 0);
str_replace(buffer, &len, &changes, " placa ", 7, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " place ", 7, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " shunt ", 7, " shun ", 6, 0);
str_replace(buffer, &len, &changes, " saint ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " ulice ", 7, " ul ", 4, 0);
str_replace(buffer, &len, &changes, " amble ", 7, " ambl ", 6, 0);
str_replace(buffer, &len, &changes, " route ", 7, " rt ", 4, 0);
str_replace(buffer, &len, &changes, " sound ", 7, " snd ", 5, 0);
str_replace(buffer, &len, &changes, " store ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " front ", 7, " frnt ", 6, 0);
str_replace(buffer, &len, &changes, " elbow ", 7, " elb ", 5, 0);
str_replace(buffer, &len, &changes, " glade ", 7, " gl ", 4, 0);
str_replace(buffer, &len, &changes, " south ", 7, " s ", 3, 0);
str_replace(buffer, &len, &changes, " round ", 7, " rnd ", 5, 0);
str_replace(buffer, &len, &changes, " drive ", 7, " dr ", 4, 0);
str_replace(buffer, &len, &changes, " croft ", 7, " cft ", 5, 0);
str_replace(buffer, &len, &changes, " platz ", 7, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " ferry ", 7, " fy ", 4, 0);
str_replace(buffer, &len, &changes, " ridge ", 7, " rdge ", 6, 0);
str_replace(buffer, &len, &changes, " tanav ", 7, " tn ", 4, 0);
str_replace(buffer, &len, &changes, " banan ", 7, " ba ", 4, 0);
str_replace(buffer, &len, &changes, " quays ", 7, " qys ", 5, 0);
str_replace(buffer, &len, &changes, " sankt ", 7, " st ", 4, 0);
str_replace(buffer, &len, &changes, " vkhod ", 7, " vkh ", 5, 0);
str_replace(buffer, &len, &changes, " chase ", 7, " ch ", 4, 0);
str_replace(buffer, &len, &changes, " vista ", 7, " vsta ", 6, 0);
str_replace(buffer, &len, &changes, " rhein ", 7, " rh ", 4, 0);
str_replace(buffer, &len, &changes, " court ", 7, " ct ", 4, 0);
str_replace(buffer, &len, &changes, "brucke ", 7, " br ", 4, 0);
str_replace(buffer, &len, &changes, " upper ", 7, " up ", 4, 0);
str_replace(buffer, &len, &changes, " river ", 7, " r ", 3, 0);
str_replace(buffer, &len, &changes, " range ", 7, " rnge ", 6, 0);
str_replace(buffer, &len, &changes, " lower ", 7, " lr ", 4, 0);
str_replace(buffer, &len, &changes, " kalea ", 7, " k ", 3, 0);
str_replace(buffer, &len, &changes, " crest ", 7, " crst ", 6, 0);
str_replace(buffer, &len, &changes, " obere ", 7, " o ", 3, 0);
str_replace(buffer, &len, &changes, " manor ", 7, " mnr ", 5, 0);
str_replace(buffer, &len, &changes, " byway ", 7, " bywy ", 6, 0);
str_replace(buffer, &len, &changes, " reach ", 7, " rch ", 5, 0);
str_replace(buffer, &len, &changes, " copse ", 7, " cps ", 5, 0);
str_replace(buffer, &len, &changes, "quelle ", 7, " qu ", 4, 0);
str_replace(buffer, &len, &changes, " creek ", 7, " cr ", 4, 0);
str_replace(buffer, &len, &changes, " close ", 7, " c ", 3, 0);
str_replace(buffer, &len, &changes, " fort ", 6, " ft ", 4, 0);
str_replace(buffer, &len, &changes, " apch ", 6, " app ", 5, 0);
str_replace(buffer, &len, &changes, " mont ", 6, " mt ", 4, 0);
str_replace(buffer, &len, &changes, " bdul ", 6, " bd ", 4, 0);
str_replace(buffer, &len, &changes, "saint ", 6, " st ", 4, 0);
str_replace(buffer, &len, &changes, " back ", 6, " bk ", 4, 0);
str_replace(buffer, &len, &changes, " c le ", 6, " c ", 3, 0);
str_replace(buffer, &len, &changes, "place ", 6, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " frwy ", 6, " fwy ", 5, 0);
str_replace(buffer, &len, &changes, " quai ", 6, " qu ", 4, 0);
str_replace(buffer, &len, &changes, " ally ", 6, " al ", 4, 0);
str_replace(buffer, &len, &changes, " m te ", 6, " mt ", 4, 0);
str_replace(buffer, &len, &changes, " lane ", 6, " ln ", 4, 0);
str_replace(buffer, &len, &changes, "aukio ", 6, " auk ", 5, 0);
str_replace(buffer, &len, &changes, " loop ", 6, " lp ", 4, 0);
str_replace(buffer, &len, &changes, " line ", 6, " ln ", 4, 0);
str_replace(buffer, &len, &changes, " alue ", 6, " al ", 4, 0);
str_replace(buffer, &len, &changes, " link ", 6, " lk ", 4, 0);
str_replace(buffer, &len, &changes, " glde ", 6, " gl ", 4, 0);
str_replace(buffer, &len, &changes, " alea ", 6, " al ", 4, 0);
str_replace(buffer, &len, &changes, " gate ", 6, " g ", 3, 0);
str_replace(buffer, &len, &changes, " intr ", 6, " int ", 5, 0);
str_replace(buffer, &len, &changes, " gdns ", 6, " gdn ", 5, 0);
str_replace(buffer, &len, &changes, " hird ", 6, " hrd ", 5, 0);
str_replace(buffer, &len, &changes, " varf ", 6, " vf ", 4, 0);
str_replace(buffer, &len, &changes, " virf ", 6, " vf ", 4, 0);
str_replace(buffer, &len, &changes, " hgts ", 6, " hts ", 5, 0);
str_replace(buffer, &len, &changes, " expy ", 6, " exp ", 5, 0);
str_replace(buffer, &len, &changes, "markt ", 6, " mkt ", 5, 0);
str_replace(buffer, &len, &changes, " bypa ", 6, " byp ", 5, 0);
str_replace(buffer, &len, &changes, "o l v ", 6, " olv ", 5, 0);
str_replace(buffer, &len, &changes, " cres ", 6, " cr ", 4, 0);
str_replace(buffer, &len, &changes, " bdwy ", 6, " bway ", 6, 0);
str_replace(buffer, &len, &changes, " csac ", 6, " cds ", 5, 0);
str_replace(buffer, &len, &changes, " nowy ", 6, " n ", 3, 0);
str_replace(buffer, &len, &changes, " laan ", 6, " ln ", 4, 0);
str_replace(buffer, &len, &changes, " crsg ", 6, " xing ", 6, 0);
str_replace(buffer, &len, &changes, "vliet ", 6, " vlt ", 5, 0);
str_replace(buffer, &len, &changes, " city ", 6, " cty ", 5, 0);
str_replace(buffer, &len, &changes, "sving ", 6, " sv ", 4, 0);
str_replace(buffer, &len, &changes, "plass ", 6, " pl ", 4, 0);
str_replace(buffer, &len, &changes, "gaten ", 6, " g ", 3, 0);
str_replace(buffer, &len, &changes, "veien ", 6, " v ", 3, 0);
str_replace(buffer, &len, &changes, " gata ", 6, " g ", 3, 0);
str_replace(buffer, &len, &changes, " sint ", 6, " st ", 4, 0);
str_replace(buffer, &len, &changes, " caus ", 6, " cway ", 6, 0);
str_replace(buffer, &len, &changes, " cove ", 6, " cv ", 4, 0);
str_replace(buffer, &len, &changes, "plein ", 6, " pln ", 5, 0);
str_replace(buffer, &len, &changes, " cswy ", 6, " cway ", 6, 0);
str_replace(buffer, &len, &changes, " plac ", 6, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " nowa ", 6, " n ", 3, 0);
str_replace(buffer, &len, &changes, " kolo ", 6, " k ", 3, 0);
str_replace(buffer, &len, &changes, " katu ", 6, " k ", 3, 0);
str_replace(buffer, &len, &changes, " duze ", 6, " dz ", 4, 0);
str_replace(buffer, &len, &changes, " blvd ", 6, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " p ta ", 6, " pta ", 5, 0);
str_replace(buffer, &len, &changes, " maly ", 6, " ml ", 4, 0);
str_replace(buffer, &len, &changes, " mala ", 6, " ml ", 4, 0);
str_replace(buffer, &len, &changes, " bdge ", 6, " bri ", 5, 0);
str_replace(buffer, &len, &changes, " nowe ", 6, " n ", 3, 0);
str_replace(buffer, &len, &changes, " brdg ", 6, " bri ", 5, 0);
str_replace(buffer, &len, &changes, " male ", 6, " ml ", 4, 0);
str_replace(buffer, &len, &changes, " drwy ", 6, " dvwy ", 6, 0);
str_replace(buffer, &len, &changes, " duza ", 6, " dz ", 4, 0);
str_replace(buffer, &len, &changes, " utca ", 6, " u ", 3, 0);
str_replace(buffer, &len, &changes, " east ", 6, " e ", 3, 0);
str_replace(buffer, &len, &changes, " duzy ", 6, " dz ", 4, 0);
str_replace(buffer, &len, &changes, "kaari ", 6, " kri ", 5, 0);
str_replace(buffer, &len, &changes, " quan ", 6, " q ", 3, 0);
str_replace(buffer, &len, &changes, " svwy ", 6, " swy ", 5, 0);
str_replace(buffer, &len, &changes, " shwy ", 6, " sh ", 4, 0);
str_replace(buffer, &len, &changes, " road ", 6, " rd ", 4, 0);
str_replace(buffer, &len, &changes, "sankt ", 6, " st ", 4, 0);
str_replace(buffer, &len, &changes, " quay ", 6, " qy ", 4, 0);
str_replace(buffer, &len, &changes, "plats ", 6, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " rise ", 6, " ri ", 4, 0);
str_replace(buffer, &len, &changes, " berg ", 6, " bg ", 4, 0);
str_replace(buffer, &len, &changes, " tcty ", 6, " tct ", 5, 0);
str_replace(buffer, &len, &changes, " viad ", 6, " via ", 5, 0);
str_replace(buffer, &len, &changes, " view ", 6, " vw ", 4, 0);
str_replace(buffer, &len, &changes, " vdct ", 6, " via ", 5, 0);
str_replace(buffer, &len, &changes, " vale ", 6, " v ", 3, 0);
str_replace(buffer, &len, &changes, " avda ", 6, " av ", 4, 0);
str_replace(buffer, &len, &changes, " grad ", 6, " ghr ", 5, 0);
str_replace(buffer, &len, &changes, " walk ", 6, " wlk ", 5, 0);
str_replace(buffer, &len, &changes, " west ", 6, " w ", 3, 0);
str_replace(buffer, &len, &changes, " yard ", 6, " yd ", 4, 0);
str_replace(buffer, &len, &changes, " blok ", 6, " bl ", 4, 0);
str_replace(buffer, &len, &changes, " terr ", 6, " ter ", 5, 0);
str_replace(buffer, &len, &changes, " cmno ", 6, " cno ", 5, 0);
str_replace(buffer, &len, &changes, " stra ", 6, " st ", 4, 0);
str_replace(buffer, &len, &changes, " thfr ", 6, " thor ", 6, 0);
str_replace(buffer, &len, &changes, " turn ", 6, " tn ", 4, 0);
str_replace(buffer, &len, &changes, " tpke ", 6, " tpk ", 5, 0);
str_replace(buffer, &len, &changes, " burg ", 6, " bg ", 4, 0);
str_replace(buffer, &len, &changes, "vayla ", 6, " vla ", 5, 0);
str_replace(buffer, &len, &changes, "vagen ", 6, " v ", 3, 0);
str_replace(buffer, &len, &changes, " tori ", 6, " tr ", 4, 0);
str_replace(buffer, &len, &changes, "gatan ", 6, " g ", 3, 0);
str_replace(buffer, &len, &changes, "grand ", 6, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " pass ", 6, " ps ", 4, 0);
str_replace(buffer, &len, &changes, " pkwy ", 6, " pwy ", 5, 0);
str_replace(buffer, &len, &changes, " park ", 6, " pk ", 4, 0);
str_replace(buffer, &len, &changes, "rinne ", 6, " rn ", 4, 0);
str_replace(buffer, &len, &changes, " mtwy ", 6, " mwy ", 5, 0);
str_replace(buffer, &len, &changes, " mndr ", 6, " mr ", 4, 0);
str_replace(buffer, &len, &changes, " kyla ", 6, " kl ", 4, 0);
str_replace(buffer, &len, &changes, " kuja ", 6, " kj ", 4, 0);
str_replace(buffer, &len, &changes, "platz ", 6, " pl ", 4, 0);
str_replace(buffer, &len, &changes, "ranta ", 6, " rt ", 4, 0);
str_replace(buffer, &len, &changes, " mile ", 6, " mi ", 4, 0);
str_replace(buffer, &len, &changes, " pfad ", 6, " p ", 3, 0);
str_replace(buffer, &len, &changes, " mews ", 6, " m ", 3, 0);
str_replace(buffer, &len, &changes, "polku ", 6, " p ", 3, 0);
str_replace(buffer, &len, &changes, " psge ", 6, " ps ", 4, 0);
str_replace(buffer, &len, &changes, " plza ", 6, " pl ", 4, 0);
str_replace(buffer, &len, &changes, "ostra ", 6, " o ", 3, 0);
str_replace(buffer, &len, &changes, "gamla ", 6, " gla ", 5, 0);
str_replace(buffer, &len, &changes, " stig ", 6, " st ", 4, 0);
str_replace(buffer, &len, &changes, "norra ", 6, " n ", 3, 0);
str_replace(buffer, &len, &changes, "sodra ", 6, " s ", 3, 0);
str_replace(buffer, &len, &changes, " pike ", 6, " pk ", 4, 0);
str_replace(buffer, &len, &changes, " dorf ", 6, " df ", 4, 0);
str_replace(buffer, &len, &changes, " piaz ", 6, " p za ", 6, 0);
str_replace(buffer, &len, &changes, " phwy ", 6, " pway ", 6, 0);
str_replace(buffer, &len, &changes, "pfad ", 5, " p ", 3, 0);
str_replace(buffer, &len, &changes, " mnt ", 5, " mt ", 4, 0);
str_replace(buffer, &len, &changes, "gata ", 5, " g ", 3, 0);
str_replace(buffer, &len, &changes, " bhf ", 5, " bf ", 4, 0);
str_replace(buffer, &len, &changes, " bad ", 5, " b ", 3, 0);
str_replace(buffer, &len, &changes, "gate ", 5, " g ", 3, 0);
str_replace(buffer, &len, &changes, " zum ", 5, " z ", 3, 0);
str_replace(buffer, &len, &changes, "stig ", 5, " st ", 4, 0);
str_replace(buffer, &len, &changes, " blv ", 5, " bd ", 4, 0);
str_replace(buffer, &len, &changes, "kuja ", 5, " kj ", 4, 0);
str_replace(buffer, &len, &changes, " bul ", 5, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " str ", 5, " st ", 4, 0);
str_replace(buffer, &len, &changes, "alue ", 5, " al ", 4, 0);
str_replace(buffer, &len, &changes, " cen ", 5, " ctr ", 5, 0);
str_replace(buffer, &len, &changes, " ave ", 5, " av ", 4, 0);
str_replace(buffer, &len, &changes, "kyla ", 5, " kl ", 4, 0);
str_replace(buffer, &len, &changes, " ale ", 5, " al ", 4, 0);
str_replace(buffer, &len, &changes, " spl ", 5, " sp ", 4, 0);
str_replace(buffer, &len, &changes, " all ", 5, " al ", 4, 0);
str_replace(buffer, &len, &changes, " k s ", 5, " ks ", 4, 0);
str_replace(buffer, &len, &changes, " aly ", 5, " al ", 4, 0);
str_replace(buffer, &len, &changes, "dorf ", 5, " df ", 4, 0);
str_replace(buffer, &len, &changes, " bvd ", 5, " bd ", 4, 0);
str_replace(buffer, &len, &changes, " vag ", 5, " v ", 3, 0);
str_replace(buffer, &len, &changes, " iii ", 5, " 3 ", 3, 0);
str_replace(buffer, &len, &changes, " tie ", 5, " t ", 3, 0);
str_replace(buffer, &len, &changes, " sok ", 5, " sk ", 4, 0);
str_replace(buffer, &len, &changes, "burg ", 5, " bg ", 4, 0);
str_replace(buffer, &len, &changes, "katu ", 5, " k ", 3, 0);
str_replace(buffer, &len, &changes, "berg ", 5, " bg ", 4, 0);
str_replace(buffer, &len, &changes, "tori ", 5, " tr ", 4, 0);
str_replace(buffer, &len, &changes, " kte ", 5, " k ", 3, 0);
str_replace(buffer, &len, &changes, " gro ", 5, " gr ", 4, 0);
str_replace(buffer, &len, &changes, " grn ", 5, " gn ", 4, 0);
str_replace(buffer, &len, &changes, " gld ", 5, " gl ", 4, 0);
str_replace(buffer, &len, &changes, " san ", 5, " s ", 3, 0);
str_replace(buffer, &len, &changes, " hse ", 5, " ho ", 4, 0);
str_replace(buffer, &len, &changes, " gte ", 5, " g ", 3, 0);
str_replace(buffer, &len, &changes, " rte ", 5, " rt ", 4, 0);
str_replace(buffer, &len, &changes, " rue ", 5, " r ", 3, 0);
str_replace(buffer, &len, &changes, " che ", 5, " ch ", 4, 0);
str_replace(buffer, &len, &changes, " pas ", 5, " ps ", 4, 0);
str_replace(buffer, &len, &changes, " plz ", 5, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " pnt ", 5, " pt ", 4, 0);
str_replace(buffer, &len, &changes, " pky ", 5, " pwy ", 5, 0);
str_replace(buffer, &len, &changes, " pza ", 5, " pl ", 4, 0);
str_replace(buffer, &len, &changes, " rvr ", 5, " r ", 3, 0);
str_replace(buffer, &len, &changes, " riv ", 5, " r ", 3, 0);
str_replace(buffer, &len, &changes, " lit ", 5, " lt ", 4, 0);
str_replace(buffer, &len, &changes, " p k ", 5, " pk ", 4, 0);
str_replace(buffer, &len, &changes, " lwr ", 5, " lr ", 4, 0);
str_replace(buffer, &len, &changes, " low ", 5, " lr ", 4, 0);
str_replace(buffer, &len, &changes, " sth ", 5, " s ", 3, 0);
str_replace(buffer, &len, &changes, " crk ", 5, " cr ", 4, 0);
str_replace(buffer, &len, &changes, "pres ", 5, " pres ", 6, 1);
str_replace(buffer, &len, &changes, "laan ", 5, " ln ", 4, 0);
str_replace(buffer, &len, &changes, " bda ", 5, " b ", 3, 0);
str_replace(buffer, &len, &changes, " vei ", 5, " v ", 3, 0);
str_replace(buffer, &len, &changes, " via ", 5, " v ", 3, 0);
str_replace(buffer, &len, &changes, " way ", 5, " wy ", 4, 0);
str_replace(buffer, &len, &changes, " upr ", 5, " up ", 4, 0);
str_replace(buffer, &len, &changes, " avd ", 5, " av ", 4, 0);
str_replace(buffer, &len, &changes, " crt ", 5, " ct ", 4, 0);
str_replace(buffer, &len, &changes, "stwg ", 5, " stwg ", 6, 1);
str_replace(buffer, &len, &changes, "sint ", 5, " st ", 4, 0);
str_replace(buffer, &len, &changes, " v d ", 5, " vd ", 4, 0);
str_replace(buffer, &len, &changes, " van ", 5, " v ", 3, 0);
str_replace(buffer, &len, &changes, " drv ", 5, " dr ", 4, 0);
str_replace(buffer, &len, &changes, " tce ", 5, " ter ", 5, 0);
str_replace(buffer, &len, &changes, " va ", 4, " v ", 3, 0);
str_replace(buffer, &len, &changes, " oa ", 4, " o ", 3, 0);
str_replace(buffer, &len, &changes, " sa ", 4, " s ", 3, 0);
str_replace(buffer, &len, &changes, " na ", 4, " n ", 3, 0);
str_replace(buffer, &len, &changes, "bgm ", 4, " bgm ", 5, 1);
str_replace(buffer, &len, &changes, " nw ", 4, " n ", 3, 0);
str_replace(buffer, &len, &changes, "vag ", 4, " v ", 3, 0);
str_replace(buffer, &len, &changes, " im ", 4, " 1 ", 3, 0);
str_replace(buffer, &len, &changes, "vla ", 4, " vla ", 5, 1);
str_replace(buffer, &len, &changes, "gla ", 4, " gla ", 5, 1);
str_replace(buffer, &len, &changes, " am ", 4, " a ", 3, 0);
str_replace(buffer, &len, &changes, " ph ", 4, " p ", 3, 0);
str_replace(buffer, &len, &changes, "rue ", 4, " r ", 3, 0);
str_replace(buffer, &len, &changes, " ga ", 4, " g ", 3, 0);
str_replace(buffer, &len, &changes, "ste ", 4, " ste ", 5, 1);
str_replace(buffer, &len, &changes, "str ", 4, " st ", 4, 0);
str_replace(buffer, &len, &changes, " cl ", 4, " c ", 3, 0);
str_replace(buffer, &len, &changes, " vn ", 4, " v ", 3, 0);
str_replace(buffer, &len, &changes, " gt ", 4, " g ", 3, 0);
str_replace(buffer, &len, &changes, "vei ", 4, " v ", 3, 0);
str_replace(buffer, &len, &changes, "vlt ", 4, " vlt ", 5, 1);
str_replace(buffer, &len, &changes, " ce ", 4, " cv ", 4, 0);
str_replace(buffer, &len, &changes, " ii ", 4, " 2 ", 3, 0);
str_replace(buffer, &len, &changes, "pln ", 4, " pln ", 5, 1);
str_replace(buffer, &len, &changes, "olv ", 4, " olv ", 5, 1);
str_replace(buffer, &len, &changes, "mkt ", 4, " mkt ", 5, 1);
str_replace(buffer, &len, &changes, "tvl ", 4, " tvl ", 5, 1);
str_replace(buffer, &len, &changes, " ob ", 4, " o ", 3, 0);
str_replace(buffer, &len, &changes, "pgr ", 4, " pgr ", 5, 1);
str_replace(buffer, &len, &changes, " in ", 4, " 1 ", 3, 0);
str_replace(buffer, &len, &changes, " mw ", 4, " m ", 3, 0);
str_replace(buffer, &len, &changes, "kri ", 4, " kri ", 5, 1);
str_replace(buffer, &len, &changes, "pko ", 4, " pko ", 5, 1);
str_replace(buffer, &len, &changes, "auk ", 4, " auk ", 5, 1);
str_replace(buffer, &len, &changes, "tie ", 4, " t ", 3, 0);
str_replace(buffer, &len, &changes, " i ", 3, " 1 ", 3, 0);

File diff suppressed because one or more lines are too long

View File

@ -2,4 +2,4 @@
from nominatim_db import cli
exit(cli.nominatim(module_dir=None, osm2pgsql_path=None))
exit(cli.nominatim(osm2pgsql_path=None))

172
phpcs.xml
View File

@ -1,172 +0,0 @@
<?xml version="1.0"?>
<ruleset name="Nominatim Standard">
<description>Nominatim coding standard</description>
<!-- based on another standard, you can find it here -->
<!-- /usr/share/php/PHP/CodeSniffer/Standards/PSR2/ruleset.xml -->
<!-- https://github.com/squizlabs/PHP_CodeSniffer/blob/master/CodeSniffer/Standards/PSR2/ruleset.xml -->
<rule ref="PSR2"/>
<exclude-pattern>./lib/template/*html*</exclude-pattern>
<exclude-pattern>./lib/template/includes/</exclude-pattern>
<exclude-pattern>./module/</exclude-pattern>
<exclude-pattern>./website/css</exclude-pattern>
<exclude-pattern>./website/js</exclude-pattern>
<rule ref="Generic.Files.LineLength">
<properties>
<property name="lineLimit" value="194"/>
<property name="absoluteLineLimit" value="194"/>
</properties>
</rule>
<!-- "A file should declare new symbols (classes, functions, constants, etc.) and cause no
other side effects, or it should execute logic with side effects, but should not do both."
... we have too many script and includes to be able to enforce that.
-->
<rule ref="PSR1.Files.SideEffects.FoundWithSymbols">
<severity>0</severity>
</rule>
<!-- eval, system, etc -->
<rule ref="Generic.PHP.ForbiddenFunctions">
<properties>
<property name="forbiddenFunctions" type="array" value="sizeof=>count,delete=>unset,print=>echo,create_function=>null,eval=>null"/>
</properties>
</rule>
<!-- **************************************************************
DOCUMENTATION
************************************************************** -->
<rule ref="PEAR.Commenting.FunctionComment.Missing">
<severity>0</severity>
</rule>
<!-- **************************************************************
COMMENTS
************************************************************** -->
<!-- any comments in the lines before function() are better than forcing
a PHPdoc style right now -->
<rule ref="PEAR.Commenting.FunctionComment.WrongStyle">
<severity>0</severity>
</rule>
<!-- We allow comments after statements -->
<rule ref="Squiz.Commenting.PostStatementComment.Found">
<severity>0</severity>
</rule>
<!-- ... even without space e.g. //some words -->
<rule ref="Squiz.Commenting.InlineComment.NoSpaceBefore">
<severity>0</severity>
</rule>
<!-- blank lines after inline comments are fine -->
<rule ref="Squiz.Commenting.InlineComment.SpacingAfter">
<severity>0</severity>
</rule>
<!-- Comments don't have to start uppercase -->
<rule ref="Squiz.Commenting.InlineComment.NotCapital">
<severity>0</severity>
</rule>
<!-- Comments don't have to end with one of .!? -->
<rule ref="Squiz.Commenting.InlineComment.InvalidEndChar">
<severity>0</severity>
</rule>
<!-- Empty comments are fine -->
<rule ref="Squiz.Commenting.InlineComment.Empty">
<severity>0</severity>
</rule>
<!-- **************************************************************
INDENTATION, SPACING
************************************************************** -->
<rule ref="Squiz.Arrays.ArrayDeclaration.KeyNotAligned" />
<!-- Aligned looks nicer, but causes too many warnings currently -->
<rule ref="Squiz.Arrays.ArrayDeclaration.DoubleArrowNotAligned">
<severity>0</severity>
</rule>
<!-- **************************************************************
VARIABLES
************************************************************** -->
<!-- CONST_this_var is fine, we don't need ConstThisVar -->
<rule ref="Generic.NamingConventions.UpperCaseConstantName.ConstantNotUpperCase">
<severity>0</severity>
</rule>
<!-- simply disagree with "Each line in an array declaration must end in a comma" -->
<rule ref="Squiz.Arrays.ArrayDeclaration.NoCommaAfterLast">
<severity>0</severity>
</rule>
<rule ref="Squiz.Arrays.ArrayDeclaration.NoComma">
<severity>0</severity>
</rule>
<!-- We allow "$abc = array($aPoint[1], $aPoint[2])" -->
<rule ref="Squiz.Arrays.ArrayDeclaration.SingleLineNotAllowed">
<severity>0</severity>
</rule>
<!-- array() instead of [] for initialisation -->
<rule ref="Generic.Arrays.DisallowShortArraySyntax.Found" />
<!-- **************************************************************
STRING QUOTING
************************************************************** -->
<!-- Prefer single quoted strings -->
<rule ref="Squiz.Strings.DoubleQuoteUsage" />
<!-- We allow variabled inside double-quoted strings "abc $somevar" -->
<rule ref="Squiz.Strings.DoubleQuoteUsage.ContainsVar">
<severity>0</severity>
</rule>
<!-- **************************************************************
CONTROL STRUCTURES
************************************************************** -->
<!-- we allow "if (a) echo 'b'" without brackets -->
<rule ref="Generic.ControlStructures.InlineControlStructure.NotAllowed">
<severity>0</severity>
</rule>
<!-- We allow "if (a)". No need for "if (a === TRUE)" -->
<rule ref="Squiz.Operators.ComparisonOperatorUsage.ImplicitTrue">
<severity>0</severity>
</rule>
<!-- ... same for "if (!a)" -->
<rule ref="Squiz.Operators.ComparisonOperatorUsage.NotAllowed">
<severity>0</severity>
</rule>
</ruleset>

View File

@ -12,24 +12,12 @@ NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim"
# Nominatim sets up read-only access for this user during installation.
NOMINATIM_DATABASE_WEBUSER="www-data"
# Directory where to find the PostgreSQL server module.
# When empty the module is expected to be located in the 'module' subdirectory
# in the project directory.
# Changing this value requires to run 'nominatim refresh --functions'.
NOMINATIM_DATABASE_MODULE_PATH=
# Tokenizer used for normalizing and parsing queries and names.
# The tokenizer is set up during import and cannot be changed afterwards
# without a reimport.
# Currently available tokenizers: icu, legacy
NOMINATIM_TOKENIZER="icu"
# Number of occurrences of a word before it is considered frequent.
# Similar to the concept of stop words. Frequent partial words get ignored
# or handled differently during search.
# Changing this value requires a reimport.
NOMINATIM_MAX_WORD_FREQUENCY=50000
# If true, admin level changes on places with many contained children are blocked.
NOMINATIM_LIMIT_REINDEXING=yes
@ -40,12 +28,6 @@ NOMINATIM_LIMIT_REINDEXING=yes
# Currently only affects the initial import of country names and special phrases.
NOMINATIM_LANGUAGES=
# Rules for normalizing terms for comparisons.
# The default is to remove accents and punctuation and to lower-case the
# term. Spaces are kept but collapsed to one standard space.
# Changing this value requires a reimport.
NOMINATIM_TERM_NORMALIZATION=":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();"
# Configuration file for the tokenizer.
# The content depends on the tokenizer used. If left empty the default settings
# for the chosen tokenizer will be used. The configuration can only be set

View File

@ -167,8 +167,7 @@ class SearchBuilder:
expected_count = sum(t.count for t in hnrs)
partials = {t.token: t.addr_count for trange in address
for t in self.query.get_partials_list(trange)
if t.is_indexed}
for t in self.query.get_partials_list(trange)}
if not partials:
# can happen when none of the partials is indexed
@ -219,11 +218,9 @@ class SearchBuilder:
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
addr_tokens = list({t.token for t in addr_partials})
partials_indexed = all(t.is_indexed for t in name_partials.values()) \
and all(t.is_indexed for t in addr_partials)
exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
if (len(name_partials) > 3 or exp_count < 8000):
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
return
@ -232,8 +229,6 @@ class SearchBuilder:
name_fulls = self.query.get_tokens(name, TokenType.WORD)
if name_fulls:
fulls_count = sum(t.count for t in name_fulls)
if partials_indexed:
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
if fulls_count < 50000 or addr_count < 30000:
yield penalty,fulls_count / (2**len(addr_tokens)), \
@ -243,8 +238,7 @@ class SearchBuilder:
# To catch remaining results, lookup by name and address
# We only do this if there is a reasonable number of results expected.
exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
if exp_count < 10000 and addr_count < 20000\
and all(t.is_indexed for t in name_partials.values()):
if exp_count < 10000 and addr_count < 20000:
penalty += 0.35 * max(1 if name_fulls else 0.1,
5 - len(name_partials) - len(addr_tokens))
yield penalty, exp_count,\
@ -260,11 +254,10 @@ class SearchBuilder:
addr_restrict_tokens = []
addr_lookup_tokens = []
for t in addr_partials:
if t.is_indexed:
if t.addr_count > 20000:
addr_restrict_tokens.append(t.token)
else:
addr_lookup_tokens.append(t.token)
if t.addr_count > 20000:
addr_restrict_tokens.append(t.token)
else:
addr_lookup_tokens.append(t.token)
if addr_restrict_tokens:
lookup.append(dbf.FieldLookup('nameaddress_vector',
@ -289,13 +282,12 @@ class SearchBuilder:
addr_restrict_tokens = []
addr_lookup_tokens = []
for t in addr_partials:
if t.is_indexed:
if t.addr_count > 20000:
addr_restrict_tokens.append(t.token)
else:
addr_lookup_tokens.append(t.token)
if t.addr_count > 20000:
addr_restrict_tokens.append(t.token)
else:
addr_lookup_tokens.append(t.token)
else:
addr_restrict_tokens = [t.token for t in addr_partials if t.is_indexed]
addr_restrict_tokens = [t.token for t in addr_partials]
addr_lookup_tokens = []
return dbf.lookup_by_any_name([t.token for t in name_fulls],

View File

@ -123,7 +123,7 @@ class ICUToken(qmod.Token):
lookup_word = row.word_token
return ICUToken(penalty=penalty, token=row.word_id, count=max(1, count),
lookup_word=lookup_word, is_indexed=True,
lookup_word=lookup_word,
word_token=row.word_token, info=row.info,
addr_count=max(1, addr_count))
@ -259,7 +259,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
if len(part.token) <= 4 and part[0].isdigit()\
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
ICUToken(0.5, 0, 1, 1, part.token, True, part.token, None))
ICUToken(penalty=0.5, token=0,
count=1, addr_count=1, lookup_word=part.token,
word_token=part.token, info=None))
def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:

View File

@ -1,273 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of query analysis for the legacy tokenizer.
"""
from typing import Tuple, Dict, List, Optional, Iterator, Any, cast
from copy import copy
from collections import defaultdict
import dataclasses
import sqlalchemy as sa
from ..typing import SaRow
from ..connection import SearchConnection
from ..logging import log
from . import query as qmod
from .query_analyzer_factory import AbstractQueryAnalyzer
def yield_words(terms: List[str], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
""" Return all combinations of words in the terms list after the
given position.
"""
total = len(terms)
for first in range(start, total):
word = terms[first]
yield word, qmod.TokenRange(first, first + 1)
for last in range(first + 1, min(first + 20, total)):
word = ' '.join((word, terms[last]))
yield word, qmod.TokenRange(first, last + 1)
@dataclasses.dataclass
class LegacyToken(qmod.Token):
""" Specialised token for legacy tokenizer.
"""
word_token: str
category: Optional[Tuple[str, str]]
country: Optional[str]
operator: Optional[str]
@property
def info(self) -> Dict[str, Any]:
""" Dictionary of additional properties of the token.
Should only be used for debugging purposes.
"""
return {'category': self.category,
'country': self.country,
'operator': self.operator}
def get_category(self) -> Tuple[str, str]:
assert self.category
return self.category
class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
""" Converter for query strings into a tokenized query
using the tokens created by a legacy tokenizer.
"""
def __init__(self, conn: SearchConnection) -> None:
self.conn = conn
async def setup(self) -> None:
""" Set up static data structures needed for the analysis.
"""
self.max_word_freq = int(await self.conn.get_property('tokenizer_maxwordfreq'))
if 'word' not in self.conn.t.meta.tables:
sa.Table('word', self.conn.t.meta,
sa.Column('word_id', sa.Integer),
sa.Column('word_token', sa.Text, nullable=False),
sa.Column('word', sa.Text),
sa.Column('class', sa.Text),
sa.Column('type', sa.Text),
sa.Column('country_code', sa.Text),
sa.Column('search_name_count', sa.Integer),
sa.Column('operator', sa.Text))
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
""" Analyze the given list of phrases and return the
tokenized query.
"""
log().section('Analyze query (using Legacy tokenizer)')
normalized = []
if phrases:
for row in await self.conn.execute(sa.select(*(sa.func.make_standard_name(p.text)
for p in phrases))):
normalized = [qmod.Phrase(p.ptype, r) for r, p in zip(row, phrases) if r]
break
query = qmod.QueryStruct(normalized)
log().var_dump('Normalized query', query.source)
if not query.source:
return query
parts, words = self.split_query(query)
lookup_words = list(words.keys())
log().var_dump('Split query', parts)
log().var_dump('Extracted words', lookup_words)
for row in await self.lookup_in_db(lookup_words):
for trange in words[row.word_token.strip()]:
token, ttype = self.make_token(row)
if ttype == qmod.TokenType.NEAR_ITEM:
if trange.start == 0:
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype == qmod.TokenType.QUALIFIER:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
query.add_token(trange, ttype, token)
self.add_extra_tokens(query, parts)
self.rerank_tokens(query)
log().table_dump('Word tokens', _dump_word_tokens(query))
return query
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form.
This only removes case, so some difference with the normalization
in the phrase remains.
"""
return text.lower()
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
Dict[str, List[qmod.TokenRange]]]:
""" Transliterate the phrases and split them into tokens.
Returns a list of transliterated tokens and a dictionary
of words for lookup together with their position.
"""
parts: List[str] = []
phrase_start = 0
words = defaultdict(list)
for phrase in query.source:
query.nodes[-1].ptype = phrase.ptype
for trans in phrase.text.split(' '):
if trans:
for term in trans.split(' '):
if term:
parts.append(trans)
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
query.nodes[-1].btype = qmod.BreakType.WORD
query.nodes[-1].btype = qmod.BreakType.PHRASE
for word, wrange in yield_words(parts, phrase_start):
words[word].append(wrange)
phrase_start = len(parts)
query.nodes[-1].btype = qmod.BreakType.END
return parts, words
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
""" Return the token information from the database for the
given word tokens.
"""
t = self.conn.t.meta.tables['word']
sql = t.select().where(t.c.word_token.in_(words + [' ' + w for w in words]))
return await self.conn.execute(sql)
def make_token(self, row: SaRow) -> Tuple[LegacyToken, qmod.TokenType]:
""" Create a LegacyToken from the row of the word table.
Also determines the type of token.
"""
penalty = 0.0
is_indexed = True
rowclass = getattr(row, 'class')
if row.country_code is not None:
ttype = qmod.TokenType.COUNTRY
lookup_word = row.country_code
elif rowclass is not None:
if rowclass == 'place' and row.type == 'house':
ttype = qmod.TokenType.HOUSENUMBER
lookup_word = row.word_token[1:]
elif rowclass == 'place' and row.type == 'postcode':
ttype = qmod.TokenType.POSTCODE
lookup_word = row.word
else:
ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
else qmod.TokenType.QUALIFIER
lookup_word = row.word
elif row.word_token.startswith(' '):
ttype = qmod.TokenType.WORD
lookup_word = row.word or row.word_token[1:]
else:
ttype = qmod.TokenType.PARTIAL
lookup_word = row.word_token
penalty = 0.21
if row.search_name_count > self.max_word_freq:
is_indexed = False
return LegacyToken(penalty=penalty, token=row.word_id,
count=max(1, row.search_name_count or 1),
addr_count=1, # not supported
lookup_word=lookup_word,
word_token=row.word_token.strip(),
category=(rowclass, row.type) if rowclass is not None else None,
country=row.country_code,
operator=row.operator,
is_indexed=is_indexed),\
ttype
def add_extra_tokens(self, query: qmod.QueryStruct, parts: List[str]) -> None:
""" Add tokens to query that are not saved in the database.
"""
for part, node, i in zip(parts, query.nodes, range(1000)):
if len(part) <= 4 and part.isdigit()\
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
LegacyToken(penalty=0.5, token=0, count=1, addr_count=1,
lookup_word=part, word_token=part,
category=None, country=None,
operator=None, is_indexed=True))
def rerank_tokens(self, query: qmod.QueryStruct) -> None:
""" Add penalties to tokens that depend on presence of other token.
"""
for _, node, tlist in query.iter_token_lists():
if tlist.ttype == qmod.TokenType.POSTCODE:
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) > 4):
repl.add_penalty(0.39)
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
and len(tlist.tokens[0].lookup_word) <= 3:
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info', 'indexed']
for node in query.nodes:
for tlist in node.starting:
for token in tlist.tokens:
t = cast(LegacyToken, token)
yield [tlist.ttype.name, t.token, t.word_token or '',
t.lookup_word or '', t.penalty, t.count, t.info,
'Y' if t.is_indexed else 'N']
async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
""" Create and set up a new query analyzer for a database based
on the ICU tokenizer.
"""
out = LegacyQueryAnalyzer(conn)
await out.setup()
return out

View File

@ -101,7 +101,6 @@ class Token(ABC):
count: int
addr_count: int
lookup_word: str
is_indexed: bool
@abstractmethod

View File

@ -111,8 +111,7 @@ class CommandlineParser:
args.config = Configuration(args.project_dir,
environ=kwargs.get('environ', os.environ))
args.config.set_libdirs(module=kwargs['module_dir'],
osm2pgsql=kwargs['osm2pgsql_path'])
args.config.set_libdirs(osm2pgsql=kwargs['osm2pgsql_path'])
log = logging.getLogger()
log.warning('Using project directory: %s', str(args.project_dir))
@ -120,10 +119,6 @@ class CommandlineParser:
try:
ret = args.command.run(args)
if args.config.TOKENIZER == 'legacy':
log.warning('WARNING: the "legacy" tokenizer is deprecated '
'and will be removed in Nominatim 5.0.')
return ret
except UsageError as exception:
if log.isEnabledFor(logging.DEBUG):

View File

@ -72,7 +72,6 @@ class Configuration:
self.project_dir = None
class _LibDirs:
module: Path
osm2pgsql: Path
sql = paths.SQLLIB_DIR
data = paths.DATA_DIR

View File

@ -9,7 +9,6 @@ Path settings for extra data used by Nominatim.
"""
from pathlib import Path
PHPLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-php').resolve()
SQLLIB_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-sql').resolve()
DATA_DIR = (Path(__file__) / '..' / '..' / '..' / 'data').resolve()
CONFIG_DIR = (Path(__file__) / '..' / '..' / '..' / 'settings').resolve()

View File

@ -1,666 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tokenizer implementing normalisation as used before Nominatim 4.
"""
from typing import Optional, Sequence, List, Tuple, Mapping, Any, Callable, \
cast, Dict, Set, Iterable
from collections import OrderedDict
import logging
from pathlib import Path
import re
import shutil
from icu import Transliterator
import psycopg
from psycopg import sql as pysql
from ..errors import UsageError
from ..db.connection import connect, Connection, drop_tables, table_exists,\
execute_scalar, register_hstore
from ..config import Configuration
from ..db import properties
from ..db import utils as db_utils
from ..db.sql_preprocessor import SQLPreprocessor
from ..data.place_info import PlaceInfo
from .base import AbstractAnalyzer, AbstractTokenizer
DBCFG_NORMALIZATION = "tokenizer_normalization"
DBCFG_MAXWORDFREQ = "tokenizer_maxwordfreq"
LOG = logging.getLogger()
def create(dsn: str, data_dir: Path) -> 'LegacyTokenizer':
""" Create a new instance of the tokenizer provided by this module.
"""
LOG.warning('WARNING: the legacy tokenizer is deprecated '
'and will be removed in Nominatim 5.0.')
return LegacyTokenizer(dsn, data_dir)
def _install_module(config_module_path: str, src_dir: Optional[Path], module_dir: Path) -> str:
""" Copies the PostgreSQL normalisation module into the project
directory if necessary. For historical reasons the module is
saved in the '/module' subdirectory and not with the other tokenizer
data.
The function detects when the installation is run from the
build directory. It doesn't touch the module in that case.
"""
# Custom module locations are simply used as is.
if config_module_path:
LOG.info("Using custom path for database module at '%s'", config_module_path)
return config_module_path
# Otherwise a source dir must be given.
if src_dir is None:
raise UsageError("The legacy tokenizer cannot be used with the Nominatim pip module.")
# Compatibility mode for builddir installations.
if module_dir.exists() and src_dir.samefile(module_dir):
LOG.info('Running from build directory. Leaving database module as is.')
return str(module_dir)
# In any other case install the module in the project directory.
if not module_dir.exists():
module_dir.mkdir()
destfile = module_dir / 'nominatim.so'
shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
destfile.chmod(0o755)
LOG.info('Database module installed at %s', str(destfile))
return str(module_dir)
def _check_module(module_dir: str, conn: Connection) -> None:
""" Try to use the PostgreSQL module to confirm that it is correctly
installed and accessible from PostgreSQL.
"""
with conn.cursor() as cur:
try:
cur.execute(pysql.SQL("""CREATE FUNCTION nominatim_test_import_func(text)
RETURNS text AS {}, 'transliteration'
LANGUAGE c IMMUTABLE STRICT;
DROP FUNCTION nominatim_test_import_func(text)
""").format(pysql.Literal(f'{module_dir}/nominatim.so')))
except psycopg.DatabaseError as err:
LOG.fatal("Error accessing database module: %s", err)
raise UsageError("Database module cannot be accessed.") from err
class LegacyTokenizer(AbstractTokenizer):
""" The legacy tokenizer uses a special PostgreSQL module to normalize
names and queries. The tokenizer thus implements normalization through
calls to the database.
"""
def __init__(self, dsn: str, data_dir: Path) -> None:
self.dsn = dsn
self.data_dir = data_dir
self.normalization: Optional[str] = None
def init_new_db(self, config: Configuration, init_db: bool = True) -> None:
""" Set up a new tokenizer for the database.
This copies all necessary data in the project directory to make
sure the tokenizer remains stable even over updates.
"""
assert config.project_dir is not None
module_dir = _install_module(config.DATABASE_MODULE_PATH,
config.lib_dir.module,
config.project_dir / 'module')
self.normalization = config.TERM_NORMALIZATION
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
self._save_config(conn, config)
conn.commit()
if init_db:
self.update_sql_functions(config)
self._init_db_tables(config)
def init_from_project(self, config: Configuration) -> None:
""" Initialise the tokenizer from the project directory.
"""
assert config.project_dir is not None
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
if not (config.project_dir / 'module' / 'nominatim.so').exists():
_install_module(config.DATABASE_MODULE_PATH,
config.lib_dir.module,
config.project_dir / 'module')
def finalize_import(self, config: Configuration) -> None:
""" Do any required postprocessing to make the tokenizer data ready
for use.
"""
with connect(self.dsn) as conn:
sqlp = SQLPreprocessor(conn, config)
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
def update_sql_functions(self, config: Configuration) -> None:
""" Reimport the SQL functions for this tokenizer.
"""
assert config.project_dir is not None
with connect(self.dsn) as conn:
max_word_freq = properties.get_property(conn, DBCFG_MAXWORDFREQ)
modulepath = config.DATABASE_MODULE_PATH or \
str((config.project_dir / 'module').resolve())
sqlp = SQLPreprocessor(conn, config)
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer.sql',
max_word_freq=max_word_freq,
modulepath=modulepath)
def check_database(self, _: Configuration) -> Optional[str]:
""" Check that the tokenizer is set up correctly.
"""
hint = """\
The Postgresql extension nominatim.so was not correctly loaded.
Error: {error}
Hints:
* Check the output of the CMmake/make installation step
* Does nominatim.so exist?
* Does nominatim.so exist on the database server?
* Can nominatim.so be accessed by the database user?
"""
with connect(self.dsn) as conn:
try:
out = execute_scalar(conn, "SELECT make_standard_name('a')")
except psycopg.Error as err:
return hint.format(error=str(err))
if out != 'a':
return hint.format(error='Unexpected result for make_standard_name()')
return None
def migrate_database(self, config: Configuration) -> None:
""" Initialise the project directory of an existing database for
use with this tokenizer.
This is a special migration function for updating existing databases
to new software versions.
"""
assert config.project_dir is not None
self.normalization = config.TERM_NORMALIZATION
module_dir = _install_module(config.DATABASE_MODULE_PATH,
config.lib_dir.module,
config.project_dir / 'module')
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
self._save_config(conn, config)
def update_statistics(self, config: Configuration, threads: int = 1) -> None:
""" Recompute the frequency of full words.
"""
with connect(self.dsn) as conn:
if table_exists(conn, 'search_name'):
drop_tables(conn, "word_frequencies")
with conn.cursor() as cur:
LOG.info("Computing word frequencies")
cur.execute("""CREATE TEMP TABLE word_frequencies AS
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id""")
cur.execute("CREATE INDEX ON word_frequencies(id)")
LOG.info("Update word table with recomputed frequencies")
cur.execute("""UPDATE word SET search_name_count = count
FROM word_frequencies
WHERE word_token like ' %' and word_id = id""")
drop_tables(conn, "word_frequencies")
conn.commit()
def update_word_tokens(self) -> None:
""" No house-keeping implemented for the legacy tokenizer.
"""
LOG.info("No tokenizer clean-up available.")
def name_analyzer(self) -> 'LegacyNameAnalyzer':
""" Create a new analyzer for tokenizing names and queries
using this tokinzer. Analyzers are context managers and should
be used accordingly:
```
with tokenizer.name_analyzer() as analyzer:
analyser.tokenize()
```
When used outside the with construct, the caller must ensure to
call the close() function before destructing the analyzer.
Analyzers are not thread-safe. You need to instantiate one per thread.
"""
normalizer = Transliterator.createFromRules("phrase normalizer",
self.normalization)
return LegacyNameAnalyzer(self.dsn, normalizer)
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
""" Return a list of the `num` most frequent full words
in the database.
"""
with conn.cursor() as cur:
cur.execute(""" SELECT word FROM word WHERE word is not null
ORDER BY search_name_count DESC LIMIT %s""", (num,))
return list(s[0] for s in cur)
def _init_db_tables(self, config: Configuration) -> None:
""" Set up the word table and fill it with pre-computed word
frequencies.
"""
with connect(self.dsn) as conn:
sqlp = SQLPreprocessor(conn, config)
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
conn.commit()
LOG.warning("Precomputing word tokens")
db_utils.execute_file(self.dsn, config.lib_dir.data / 'words.sql')
def _save_config(self, conn: Connection, config: Configuration) -> None:
""" Save the configuration that needs to remain stable for the given
database as database properties.
"""
assert self.normalization is not None
properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
properties.set_property(conn, DBCFG_MAXWORDFREQ, config.MAX_WORD_FREQUENCY)
class LegacyNameAnalyzer(AbstractAnalyzer):
""" The legacy analyzer uses the special Postgresql module for
splitting names.
Each instance opens a connection to the database to request the
normalization.
"""
def __init__(self, dsn: str, normalizer: Any):
self.conn: Optional[Connection] = connect(dsn)
self.conn.autocommit = True
self.normalizer = normalizer
register_hstore(self.conn)
self._cache = _TokenCache(self.conn)
def close(self) -> None:
""" Free all resources used by the analyzer.
"""
if self.conn:
self.conn.close()
self.conn = None
def get_word_token_info(self, words: Sequence[str]) -> List[Tuple[str, str, int]]:
""" Return token information for the given list of words.
If a word starts with # it is assumed to be a full name
otherwise is a partial name.
The function returns a list of tuples with
(original word, word token, word id).
The function is used for testing and debugging only
and not necessarily efficient.
"""
assert self.conn is not None
with self.conn.cursor() as cur:
cur.execute("""SELECT t.term, word_token, word_id
FROM word, (SELECT unnest(%s::TEXT[]) as term) t
WHERE word_token = (CASE
WHEN left(t.term, 1) = '#' THEN
' ' || make_standard_name(substring(t.term from 2))
ELSE
make_standard_name(t.term)
END)
and class is null and country_code is null""",
(words, ))
return [(r[0], r[1], r[2]) for r in cur]
def normalize(self, phrase: str) -> str:
""" Normalize the given phrase, i.e. remove all properties that
are irrelevant for search.
"""
return cast(str, self.normalizer.transliterate(phrase))
def normalize_postcode(self, postcode: str) -> str:
""" Convert the postcode to a standardized form.
This function must yield exactly the same result as the SQL function
'token_normalized_postcode()'.
"""
return postcode.strip().upper()
def update_postcodes_from_db(self) -> None:
""" Update postcode tokens in the word table from the location_postcode
table.
"""
assert self.conn is not None
with self.conn.cursor() as cur:
# This finds us the rows in location_postcode and word that are
# missing in the other table.
cur.execute("""SELECT * FROM
(SELECT pc, word FROM
(SELECT distinct(postcode) as pc FROM location_postcode) p
FULL JOIN
(SELECT word FROM word
WHERE class ='place' and type = 'postcode') w
ON pc = word) x
WHERE pc is null or word is null""")
to_delete = []
to_add = []
for postcode, word in cur:
if postcode is None:
to_delete.append(word)
else:
to_add.append(postcode)
if to_delete:
cur.execute("""DELETE FROM WORD
WHERE class ='place' and type = 'postcode'
and word = any(%s)
""", (to_delete, ))
if to_add:
cur.execute("""SELECT count(create_postcode_id(pc))
FROM unnest(%s::text[]) as pc
""", (to_add, ))
def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
should_replace: bool) -> None:
""" Replace the search index for special phrases with the new phrases.
"""
assert self.conn is not None
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
for p in phrases))
with self.conn.cursor() as cur:
# Get the old phrases.
existing_phrases = set()
cur.execute("""SELECT word, class as cls, type, operator FROM word
WHERE class != 'place'
OR (type != 'house' AND type != 'postcode')""")
for label, cls, typ, oper in cur:
existing_phrases.add((label, cls, typ, oper or '-'))
to_add = norm_phrases - existing_phrases
to_delete = existing_phrases - norm_phrases
if to_add:
cur.executemany(
""" INSERT INTO word (word_id, word_token, word, class, type,
search_name_count, operator)
(SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
class, type, 0,
CASE WHEN op in ('in', 'near') THEN op ELSE null END
FROM (VALUES (%s, %s, %s, %s)) as v(name, class, type, op))""",
to_add)
if to_delete and should_replace:
cur.executemany(
""" DELETE FROM word
USING (VALUES (%s, %s, %s, %s)) as v(name, in_class, in_type, op)
WHERE word = name and class = in_class and type = in_type
and ((op = '-' and operator is null) or op = operator)""",
to_delete)
LOG.info("Total phrases: %s. Added: %s. Deleted: %s",
len(norm_phrases), len(to_add), len(to_delete))
def add_country_names(self, country_code: str, names: Mapping[str, str]) -> None:
""" Add names for the given country to the search index.
"""
assert self.conn is not None
with self.conn.cursor() as cur:
cur.execute(
"""INSERT INTO word (word_id, word_token, country_code)
(SELECT nextval('seq_word'), lookup_token, %s
FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token
FROM unnest(%s::TEXT[])n) y
WHERE NOT EXISTS(SELECT * FROM word
WHERE word_token = lookup_token and country_code = %s))
""", (country_code, list(names.values()), country_code))
def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
""" Determine tokenizer information about the given place.
Returns a JSON-serialisable structure that will be handed into
the database via the token_info field.
"""
assert self.conn is not None
token_info = _TokenInfo(self._cache)
names = place.name
if names:
token_info.add_names(self.conn, names)
if place.is_country():
assert place.country_code is not None
self.add_country_names(place.country_code, names)
address = place.address
if address:
self._process_place_address(token_info, address)
return token_info.data
def _process_place_address(self, token_info: '_TokenInfo', address: Mapping[str, str]) -> None:
assert self.conn is not None
hnrs = []
addr_terms = []
for key, value in address.items():
if key == 'postcode':
# Make sure the normalized postcode is present in the word table.
if re.search(r'[:,;]', value) is None:
norm_pc = self.normalize_postcode(value)
token_info.set_postcode(norm_pc)
self._cache.add_postcode(self.conn, norm_pc)
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(value)
elif key == 'street':
token_info.add_street(self.conn, value)
elif key == 'place':
token_info.add_place(self.conn, value)
elif not key.startswith('_') \
and key not in ('country', 'full', 'inclusion'):
addr_terms.append((key, value))
if hnrs:
token_info.add_housenumbers(self.conn, hnrs)
if addr_terms:
token_info.add_address_terms(self.conn, addr_terms)
class _TokenInfo:
""" Collect token information to be sent back to the database.
"""
def __init__(self, cache: '_TokenCache') -> None:
self.cache = cache
self.data: Dict[str, Any] = {}
def add_names(self, conn: Connection, names: Mapping[str, str]) -> None:
""" Add token information for the names of the place.
"""
# Create the token IDs for all names.
self.data['names'] = execute_scalar(conn, "SELECT make_keywords(%s)::text",
(names, ))
def add_housenumbers(self, conn: Connection, hnrs: Sequence[str]) -> None:
""" Extract housenumber information from the address.
"""
if len(hnrs) == 1:
token = self.cache.get_housenumber(hnrs[0])
if token is not None:
self.data['hnr_tokens'] = token
self.data['hnr'] = hnrs[0]
return
# split numbers if necessary
simple_list: List[str] = []
for hnr in hnrs:
simple_list.extend((x.strip() for x in re.split(r'[;,]', hnr)))
if len(simple_list) > 1:
simple_list = list(set(simple_list))
with conn.cursor() as cur:
cur.execute("SELECT * FROM create_housenumbers(%s)", (simple_list, ))
result = cur.fetchone()
assert result is not None
self.data['hnr_tokens'], self.data['hnr'] = result
def set_postcode(self, postcode: str) -> None:
""" Set or replace the postcode token with the given value.
"""
self.data['postcode'] = postcode
def add_street(self, conn: Connection, street: str) -> None:
""" Add addr:street match terms.
"""
def _get_street(name: str) -> Optional[str]:
return cast(Optional[str],
execute_scalar(conn, "SELECT word_ids_from_name(%s)::text", (name, )))
tokens = self.cache.streets.get(street, _get_street)
self.data['street'] = tokens or '{}'
def add_place(self, conn: Connection, place: str) -> None:
""" Add addr:place search and match terms.
"""
def _get_place(name: str) -> Tuple[List[int], List[int]]:
with conn.cursor() as cur:
cur.execute("""SELECT make_keywords(hstore('name' , %s))::text,
word_ids_from_name(%s)::text""",
(name, name))
return cast(Tuple[List[int], List[int]], cur.fetchone())
self.data['place_search'], self.data['place_match'] = \
self.cache.places.get(place, _get_place)
def add_address_terms(self, conn: Connection, terms: Sequence[Tuple[str, str]]) -> None:
""" Add additional address terms.
"""
def _get_address_term(name: str) -> Tuple[List[int], List[int]]:
with conn.cursor() as cur:
cur.execute("""SELECT addr_ids_from_name(%s)::text,
word_ids_from_name(%s)::text""",
(name, name))
return cast(Tuple[List[int], List[int]], cur.fetchone())
tokens = {}
for key, value in terms:
items = self.cache.address_terms.get(value, _get_address_term)
if items[0] or items[1]:
tokens[key] = items
if tokens:
self.data['addr'] = tokens
class _LRU:
""" Least recently used cache that accepts a generator function to
produce the item when there is a cache miss.
"""
def __init__(self, maxsize: int = 128):
self.data: 'OrderedDict[str, Any]' = OrderedDict()
self.maxsize = maxsize
def get(self, key: str, generator: Callable[[str], Any]) -> Any:
""" Get the item with the given key from the cache. If nothing
is found in the cache, generate the value through the
generator function and store it in the cache.
"""
value = self.data.get(key)
if value is not None:
self.data.move_to_end(key)
else:
value = generator(key)
if len(self.data) >= self.maxsize:
self.data.popitem(last=False)
self.data[key] = value
return value
class _TokenCache:
""" Cache for token information to avoid repeated database queries.
This cache is not thread-safe and needs to be instantiated per
analyzer.
"""
def __init__(self, conn: Connection):
# various LRU caches
self.streets = _LRU(maxsize=256)
self.places = _LRU(maxsize=128)
self.address_terms = _LRU(maxsize=1024)
# Lookup houseunumbers up to 100 and cache them
with conn.cursor() as cur:
cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
FROM generate_series(1, 100) as i""")
self._cached_housenumbers: Dict[str, str] = {str(r[0]): r[1] for r in cur}
# For postcodes remember the ones that have already been added
self.postcodes: Set[str] = set()
def get_housenumber(self, number: str) -> Optional[str]:
""" Get a housenumber token from the cache.
"""
return self._cached_housenumbers.get(number)
def add_postcode(self, conn: Connection, postcode: str) -> None:
""" Make sure the given postcode is in the database.
"""
if postcode not in self.postcodes:
with conn.cursor() as cur:
cur.execute('SELECT create_postcode_id(%s)', (postcode, ))
self.postcodes.add(postcode)

View File

@ -125,7 +125,8 @@ def update(dsn: str, options: MutableMapping[str, Any],
# Read updates into file.
with _make_replication_server(options['base_url'], socket_timeout) as repl:
outhandler = WriteHandler(str(options['import_file']))
endseq = repl.apply_diffs(outhandler, startseq + 1,
# tyoing: work around typing bug in pyosmium 4.0
endseq = repl.apply_diffs(outhandler, startseq + 1, # type: ignore[arg-type]
max_size=options['max_diff_size'] * 1024)
outhandler.close()

View File

@ -69,7 +69,6 @@ Feature: Search queries
| 0 |
Then there are duplicates
@fail-legacy
Scenario: Search with bounded viewbox in right area
When sending json search query "post" with address
| bounded | viewbox |

View File

@ -104,7 +104,6 @@ Feature: Parenting of objects
| N3 | W2 |
| N4 | W1 |
@fail-legacy
Scenario: addr:street tag parents to appropriately named street, locale names
Given the grid
| 10 | | | | | 11 |

View File

@ -195,7 +195,6 @@ Feature: Import of postcodes
| E45 2 | gb | 23 | 5 |
| Y45 | gb | 21 | 5 |
@fail-legacy
Scenario: Postcodes outside all countries are not added to the postcode and word table
Given the places
| osm | class | type | addr+postcode | addr+housenumber | addr+place | geometry |

View File

@ -11,7 +11,6 @@ Feature: Creation of search terms
| object | name_vector |
| N1 | #New York, #Big Apple |
@fail-legacy
Scenario: Comma-separated names appear as a single full name
Given the places
| osm | class | type | name+alt_name |

View File

@ -27,7 +27,6 @@ Feature: Searching of house numbers
| N1 |
@fail-legacy
Scenario Outline: Numeral housenumbers in any script are found
Given the places
| osm | class | type | housenr | geometry |
@ -84,7 +83,6 @@ Feature: Searching of house numbers
| 2, 4, 12 |
@fail-legacy
Scenario Outline: Housenumber - letter combinations are found
Given the places
| osm | class | type | housenr | geometry |
@ -150,7 +148,6 @@ Feature: Searching of house numbers
| 34/10 |
@fail-legacy
Scenario Outline: a bis housenumber is found
Given the places
| osm | class | type | housenr | geometry |
@ -184,7 +181,6 @@ Feature: Searching of house numbers
| 45 bis |
@fail-legacy
Scenario Outline: a ter housenumber is found
Given the places
| osm | class | type | housenr | geometry |
@ -218,7 +214,6 @@ Feature: Searching of house numbers
| 45 TER |
@fail-legacy
Scenario Outline: a number - letter - number combination housenumber is found
Given the places
| osm | class | type | housenr | geometry |
@ -252,7 +247,6 @@ Feature: Searching of house numbers
| 501h1 |
@fail-legacy
Scenario Outline: Russian housenumbers are found
Given the places
| osm | class | type | housenr | geometry |

View File

@ -1,7 +1,6 @@
@DB
Feature: Searches in Japan
Test specifically for searches of Japanese addresses and in Japanese language.
@fail-legacy
Scenario: A block house-number is parented to the neighbourhood
Given the grid with origin JP
| 1 | | | | 2 |

View File

@ -14,7 +14,6 @@ Feature: Querying fo postcode variants
| 0 | postcode | 399174, Singapore |
@fail-legacy
Scenario Outline: Postcodes in the Netherlands (mixed postcode with spaces)
Given the grid with origin NL
| 10 | | | | 11 |
@ -38,7 +37,6 @@ Feature: Querying fo postcode variants
| 3993 dx |
@fail-legacy
Scenario: Postcodes in Singapore (6-digit postcode)
Given the grid with origin SG
| 10 | | | | 11 |
@ -52,7 +50,6 @@ Feature: Querying fo postcode variants
| 0 | postcode | 399174, Singapore |
@fail-legacy
Scenario Outline: Postcodes in Andorra (with country code)
Given the grid with origin AD
| 10 | | | | 11 |
@ -76,7 +73,6 @@ Feature: Querying fo postcode variants
| AD675 |
@fail-legacy
Scenario: Different postcodes with the same normalization can both be found
Given the places
| osm | class | type | addr+postcode | addr+housenumber | geometry |
@ -97,7 +93,6 @@ Feature: Querying fo postcode variants
| postcode | E4 7EA, United Kingdom |
@fail-legacy
Scenario: Postcode areas are preferred over postcode points
Given the grid with origin DE
| 1 | 2 |

View File

@ -77,7 +77,6 @@ Feature: Searching of simple objects
| W1 |
@fail-legacy
Scenario Outline: Special cased american states will be found
Given the grid
| 1 | | 2 |

View File

@ -8,7 +8,6 @@ Feature: Country handling
| | 10 | |
| 4 | | 3 |
@fail-legacy
Scenario: When country names are changed old ones are no longer searchable
Given the places
| osm | class | type | admin | name+name:xy | country | geometry |
@ -27,7 +26,6 @@ Feature: Country handling
When sending search query "Wenig, Loudou"
Then exactly 0 results are returned
@fail-legacy
Scenario: When country names are deleted they are no longer searchable
Given the places
| osm | class | type | admin | name+name:xy | country | geometry |
@ -83,7 +81,6 @@ Feature: Country handling
| N10 | Wenig, Lilly |
@fail-legacy
Scenario: When a localised name is deleted, the standard name takes over
Given the places
| osm | class | type | admin | name+name:de | country | geometry |

View File

@ -27,7 +27,6 @@ userconfig = {
'TEST_DB' : 'test_nominatim',
'API_TEST_DB' : 'test_api_nominatim',
'API_TEST_FILE' : TEST_BASE_DIR / 'testdb' / 'apidb-test-data.pbf',
'SERVER_MODULE_PATH' : None,
'TOKENIZER' : None, # Test with a custom tokenizer
'STYLE' : 'extratags',
'API_ENGINE': 'falcon'
@ -60,9 +59,3 @@ def before_scenario(context, scenario):
def after_scenario(context, scenario):
if 'DB' in context.tags:
context.nominatim.teardown_db(context)
def before_tag(context, tag):
if tag == 'fail-legacy':
if context.config.userdata['TOKENIZER'] == 'legacy':
context.scenario.skip("Not implemented in legacy tokenizer")

View File

@ -34,7 +34,6 @@ class NominatimEnvironment:
self.api_test_file = config['API_TEST_FILE']
self.tokenizer = config['TOKENIZER']
self.import_style = config['STYLE']
self.server_module_path = config['SERVER_MODULE_PATH']
self.reuse_template = not config['REMOVE_TEMPLATE']
self.keep_scenario_db = config['KEEP_TEST_DB']
@ -48,9 +47,6 @@ class NominatimEnvironment:
raise RuntimeError(f"Unknown API engine '{config['API_ENGINE']}'")
self.api_engine = getattr(self, f"create_api_request_func_{config['API_ENGINE']}")()
if self.tokenizer == 'legacy' and self.server_module_path is None:
raise RuntimeError("You must set -DSERVER_MODULE_PATH when testing the legacy tokenizer.")
def connect_database(self, dbname):
""" Return a connection to the database with the given name.
Uses configured host, user and port.
@ -100,9 +96,6 @@ class NominatimEnvironment:
if self.import_style is not None:
self.test_env['NOMINATIM_IMPORT_STYLE'] = self.import_style
if self.server_module_path:
self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
if self.website_dir is not None:
self.website_dir.cleanup()
@ -111,7 +104,6 @@ class NominatimEnvironment:
def get_test_config(self):
cfg = Configuration(Path(self.website_dir.name), environ=self.test_env)
cfg.set_libdirs(module=self.server_module_path)
return cfg
def get_libpq_dsn(self):
@ -190,12 +182,8 @@ class NominatimEnvironment:
self.run_nominatim('add-data', '--tiger-data', str(testdata / 'tiger'))
self.run_nominatim('freeze')
if self.tokenizer == 'legacy':
phrase_file = str(testdata / 'specialphrases_testdb.sql')
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
else:
csv_path = str(testdata / 'full_en_phrases_test.csv')
self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
csv_path = str(testdata / 'full_en_phrases_test.csv')
self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
except:
self.db_drop_database(self.api_test_db)
raise
@ -278,8 +266,7 @@ class NominatimEnvironment:
if self.website_dir is not None:
cmdline = list(cmdline) + ['--project-dir', self.website_dir.name]
cli.nominatim(module_dir=self.server_module_path,
osm2pgsql_path=None,
cli.nominatim(osm2pgsql_path=None,
cli_args=cmdline,
environ=self.test_env)

View File

@ -28,9 +28,8 @@ def check_database_integrity(context):
assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
# word table must not have empty word_tokens
if context.nominatim.tokenizer != 'legacy':
cur.execute("SELECT count(*) FROM word WHERE word_token = ''")
assert cur.fetchone()[0] == 0, "Empty word tokens found in word table"
cur.execute("SELECT count(*) FROM word WHERE word_token = ''")
assert cur.fetchone()[0] == 0, "Empty word tokens found in word table"
@ -324,13 +323,8 @@ def check_word_table_for_postcodes(context, exclude, postcodes):
plist.sort()
with context.db.cursor() as cur:
if nctx.tokenizer != 'legacy':
cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
(plist,))
else:
cur.execute("""SELECT word FROM word WHERE word = any(%s)
and class = 'place' and type = 'postcode'""",
(plist,))
cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
(plist,))
found = [row['word'] for row in cur]
assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"

View File

@ -19,7 +19,7 @@ class MyToken(query.Token):
def mktoken(tid: int):
return MyToken(penalty=3.0, token=tid, count=1, addr_count=1,
lookup_word='foo', is_indexed=True)
lookup_word='foo')
@pytest.mark.parametrize('ptype,ttype', [('NONE', 'WORD'),

View File

@ -33,7 +33,7 @@ def make_query(*args):
q.add_token(TokenRange(start, end), ttype,
MyToken(penalty=0.5 if ttype == TokenType.PARTIAL else 0.0,
token=tid, count=1, addr_count=1,
lookup_word=word, is_indexed=True))
lookup_word=word))
return q
@ -397,14 +397,14 @@ def make_counted_searches(name_part, name_full, address_part, address_full,
q.add_node(BreakType.END, PhraseType.NONE)
q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
MyToken(0.5, 1, name_part, 1, 'name_part', True))
MyToken(0.5, 1, name_part, 1, 'name_part'))
q.add_token(TokenRange(0, 1), TokenType.WORD,
MyToken(0, 101, name_full, 1, 'name_full', True))
MyToken(0, 101, name_full, 1, 'name_full'))
for i in range(num_address_parts):
q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
MyToken(0.5, 2, address_part, 1, 'address_part', True))
MyToken(0.5, 2, address_part, 1, 'address_part'))
q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
MyToken(0, 102, address_full, 1, 'address_full', True))
MyToken(0, 102, address_full, 1, 'address_full'))
builder = SearchBuilder(q, SearchDetails())

View File

@ -1,241 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for query analyzer for legacy tokenizer.
"""
import pytest
import pytest_asyncio
from nominatim_api import NominatimAPIAsync
from nominatim_api.search.query import Phrase, PhraseType, TokenType, BreakType
import nominatim_api.search.legacy_tokenizer as tok
from nominatim_api.logging import set_log_output, get_and_disable
async def add_word(conn, word_id, word_token, word, count):
t = conn.t.meta.tables['word']
await conn.execute(t.insert(), {'word_id': word_id,
'word_token': word_token,
'search_name_count': count,
'word': word})
async def add_housenumber(conn, word_id, hnr):
t = conn.t.meta.tables['word']
await conn.execute(t.insert(), {'word_id': word_id,
'word_token': ' ' + hnr,
'word': hnr,
'class': 'place',
'type': 'house'})
async def add_postcode(conn, word_id, postcode):
t = conn.t.meta.tables['word']
await conn.execute(t.insert(), {'word_id': word_id,
'word_token': ' ' + postcode,
'word': postcode,
'class': 'place',
'type': 'postcode'})
async def add_special_term(conn, word_id, word_token, cls, typ, op):
t = conn.t.meta.tables['word']
await conn.execute(t.insert(), {'word_id': word_id,
'word_token': word_token,
'word': word_token,
'class': cls,
'type': typ,
'operator': op})
def make_phrase(query):
return [Phrase(PhraseType.NONE, s) for s in query.split(',')]
@pytest_asyncio.fixture
async def conn(table_factory, temp_db_cursor):
""" Create an asynchronous SQLAlchemy engine for the test DB.
"""
table_factory('nominatim_properties',
definition='property TEXT, value TEXT',
content=(('tokenizer_maxwordfreq', '10000'), ))
table_factory('word',
definition="""word_id INT, word_token TEXT, word TEXT,
class TEXT, type TEXT, country_code TEXT,
search_name_count INT, operator TEXT
""")
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
RETURNS TEXT AS $$ SELECT lower(name); $$ LANGUAGE SQL;""")
async with NominatimAPIAsync() as api:
async with api.begin() as conn:
yield conn
@pytest.mark.asyncio
async def test_empty_phrase(conn):
ana = await tok.create_query_analyzer(conn)
query = await ana.analyze_query([])
assert len(query.source) == 0
assert query.num_token_slots() == 0
@pytest.mark.asyncio
async def test_single_phrase_with_unknown_terms(conn):
ana = await tok.create_query_analyzer(conn)
await add_word(conn, 1, 'foo', 'FOO', 3)
query = await ana.analyze_query(make_phrase('foo BAR'))
assert len(query.source) == 1
assert query.source[0].ptype == PhraseType.NONE
assert query.source[0].text == 'foo bar'
assert query.num_token_slots() == 2
assert len(query.nodes[0].starting) == 1
assert not query.nodes[1].starting
@pytest.mark.asyncio
async def test_multiple_phrases(conn):
ana = await tok.create_query_analyzer(conn)
await add_word(conn, 1, 'one', 'one', 13)
await add_word(conn, 2, 'two', 'two', 45)
await add_word(conn, 100, 'one two', 'one two', 3)
await add_word(conn, 3, 'three', 'three', 4584)
query = await ana.analyze_query(make_phrase('one two,three'))
assert len(query.source) == 2
@pytest.mark.asyncio
async def test_housenumber_token(conn):
ana = await tok.create_query_analyzer(conn)
await add_housenumber(conn, 556, '45 a')
query = await ana.analyze_query(make_phrase('45 A'))
assert query.num_token_slots() == 2
assert len(query.nodes[0].starting) == 2
query.nodes[0].starting.sort(key=lambda tl: tl.end)
hn1 = query.nodes[0].starting[0]
assert hn1.ttype == TokenType.HOUSENUMBER
assert hn1.end == 1
assert hn1.tokens[0].token == 0
hn2 = query.nodes[0].starting[1]
assert hn2.ttype == TokenType.HOUSENUMBER
assert hn2.end == 2
assert hn2.tokens[0].token == 556
@pytest.mark.asyncio
async def test_postcode_token(conn):
ana = await tok.create_query_analyzer(conn)
await add_postcode(conn, 34, '45ax')
query = await ana.analyze_query(make_phrase('45AX'))
assert query.num_token_slots() == 1
assert [tl.ttype for tl in query.nodes[0].starting] == [TokenType.POSTCODE]
@pytest.mark.asyncio
async def test_partial_tokens(conn):
ana = await tok.create_query_analyzer(conn)
await add_word(conn, 1, ' foo', 'foo', 99)
await add_word(conn, 1, 'foo', 'FOO', 99)
await add_word(conn, 1, 'bar', 'FOO', 990000)
query = await ana.analyze_query(make_phrase('foo bar'))
assert query.num_token_slots() == 2
first = query.nodes[0].starting
first.sort(key=lambda tl: tl.tokens[0].penalty)
assert [tl.ttype for tl in first] == [TokenType.WORD, TokenType.PARTIAL]
assert all(tl.tokens[0].lookup_word == 'foo' for tl in first)
second = query.nodes[1].starting
assert [tl.ttype for tl in second] == [TokenType.PARTIAL]
assert not second[0].tokens[0].is_indexed
@pytest.mark.asyncio
@pytest.mark.parametrize('term,order', [('23456', ['POSTCODE', 'HOUSENUMBER', 'WORD', 'PARTIAL']),
('3', ['HOUSENUMBER', 'POSTCODE', 'WORD', 'PARTIAL'])
])
async def test_penalty_postcodes_and_housenumbers(conn, term, order):
ana = await tok.create_query_analyzer(conn)
await add_postcode(conn, 1, term)
await add_housenumber(conn, 2, term)
await add_word(conn, 3, term, term, 5)
await add_word(conn, 4, ' ' + term, term, 1)
query = await ana.analyze_query(make_phrase(term))
assert query.num_token_slots() == 1
torder = [(tl.tokens[0].penalty, tl.ttype.name) for tl in query.nodes[0].starting]
torder.sort()
assert [t[1] for t in torder] == order
@pytest.mark.asyncio
async def test_category_words_only_at_beginning(conn):
ana = await tok.create_query_analyzer(conn)
await add_special_term(conn, 1, 'foo', 'amenity', 'restaurant', 'in')
await add_word(conn, 2, ' bar', 'BAR', 1)
query = await ana.analyze_query(make_phrase('foo BAR foo'))
assert query.num_token_slots() == 3
assert len(query.nodes[0].starting) == 1
assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM
assert not query.nodes[2].starting
@pytest.mark.asyncio
async def test_qualifier_words(conn):
ana = await tok.create_query_analyzer(conn)
await add_special_term(conn, 1, 'foo', 'amenity', 'restaurant', '-')
await add_word(conn, 2, ' bar', 'w', None)
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
@pytest.mark.asyncio
@pytest.mark.parametrize('logtype', ['text', 'html'])
async def test_log_output(conn, logtype):
ana = await tok.create_query_analyzer(conn)
await add_word(conn, 1, 'foo', 'FOO', 99)
set_log_output(logtype)
await ana.analyze_query(make_phrase('foo'))
assert get_and_disable()

View File

@ -20,7 +20,7 @@ class MyToken(Token):
def make_query(*args):
q = QueryStruct([Phrase(args[0][1], '')])
dummy = MyToken(penalty=3.0, token=45, count=1, addr_count=1,
lookup_word='foo', is_indexed=True)
lookup_word='foo')
for btype, ptype, _ in args[1:]:
q.add_node(btype, ptype)

View File

@ -14,8 +14,7 @@ import nominatim_db.cli
@pytest.fixture
def run_export(tmp_path, capsys):
def _exec(args):
assert 0 == nominatim_db.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
assert 0 == nominatim_db.cli.nominatim(osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
cli_args=['export', '--project-dir', str(tmp_path)]
+ args)
return capsys.readouterr().out.split('\r\n')

View File

@ -27,7 +27,6 @@ def setup_database_with_context(apiobj, table_factory):
@pytest.mark.parametrize('args', [['--search-only'], ['--reverse-only']])
def test_warm_all(tmp_path, args):
assert 0 == nominatim_db.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
assert 0 == nominatim_db.cli.nominatim(osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
cli_args=['admin', '--project-dir', str(tmp_path),
'--warm'] + args)

View File

@ -68,8 +68,7 @@ def cli_call():
Returns a function that can be called with the desired CLI arguments.
"""
def _call_nominatim(*args):
return nominatim_db.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
return nominatim_db.cli.nominatim(osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
cli_args=args)
return _call_nominatim

View File

@ -140,8 +140,8 @@ def test_get_bool(make_config, monkeypatch, value, result):
def test_get_bool_empty(make_config):
config = make_config()
assert config.DATABASE_MODULE_PATH == ''
assert not config.get_bool('DATABASE_MODULE_PATH')
assert config.TOKENIZER_CONFIG == ''
assert not config.get_bool('TOKENIZER_CONFIG')
@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1),
@ -167,10 +167,10 @@ def test_get_int_bad_values(make_config, monkeypatch, value):
def test_get_int_empty(make_config):
config = make_config()
assert config.DATABASE_MODULE_PATH == ''
assert config.TOKENIZER_CONFIG == ''
with pytest.raises(UsageError):
config.get_int('DATABASE_MODULE_PATH')
config.get_int('TOKENIZER_CONFIG')
@pytest.mark.parametrize("value,outlist", [('sd', ['sd']),
@ -193,8 +193,8 @@ def test_get_str_list_empty(make_config):
def test_get_path_empty(make_config):
config = make_config()
assert config.DATABASE_MODULE_PATH == ''
assert not config.get_path('DATABASE_MODULE_PATH')
assert config.TOKENIZER_CONFIG == ''
assert not config.get_path('TOKENIZER_CONFIG')
def test_get_path_absolute(make_config, monkeypatch):

View File

@ -109,7 +109,7 @@ def table_factory(temp_db_conn):
@pytest.fixture
def def_config():
cfg = Configuration(None)
cfg.set_libdirs(module='.', osm2pgsql='.')
cfg.set_libdirs(osm2pgsql=None)
return cfg
@ -118,7 +118,7 @@ def project_env(tmp_path):
projdir = tmp_path / 'project'
projdir.mkdir()
cfg = Configuration(projdir)
cfg.set_libdirs(module='.', osm2pgsql='.')
cfg.set_libdirs(osm2pgsql=None)
return cfg
@ -208,7 +208,7 @@ def osmline_table(temp_db_with_extensions, table_factory):
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions):
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
cfg = Configuration(None)
cfg.set_libdirs(module='.', osm2pgsql='.', sql=tmp_path)
cfg.set_libdirs(osm2pgsql=None, sql=tmp_path)
return cfg

View File

@ -1,99 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Legacy word table for testing with functions to prefil and test contents
of the table.
"""
from nominatim_db.db.connection import execute_scalar
class MockLegacyWordTable:
""" A word table for testing using legacy word table structure.
"""
def __init__(self, conn):
self.conn = conn
with conn.cursor() as cur:
cur.execute("""CREATE TABLE word (word_id INTEGER,
word_token text,
word text,
class text,
type text,
country_code varchar(2),
search_name_count INTEGER,
operator TEXT)""")
conn.commit()
def add_full_word(self, word_id, word, word_token=None):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_id, word_token, word)
VALUES (%s, %s, %s)
""", (word_id, ' ' + (word_token or word), word))
self.conn.commit()
def add_special(self, word_token, word, cls, typ, oper):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_token, word, class, type, operator)
VALUES (%s, %s, %s, %s, %s)
""", (word_token, word, cls, typ, oper))
self.conn.commit()
def add_country(self, country_code, word_token):
with self.conn.cursor() as cur:
cur.execute("INSERT INTO word (word_token, country_code) VALUES(%s, %s)",
(word_token, country_code))
self.conn.commit()
def add_postcode(self, word_token, postcode):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_token, word, class, type)
VALUES (%s, %s, 'place', 'postcode')
""", (word_token, postcode))
self.conn.commit()
def count(self):
return execute_scalar(self.conn, "SELECT count(*) FROM word")
def count_special(self):
return execute_scalar(self.conn, "SELECT count(*) FROM word WHERE class != 'place'")
def get_special(self):
with self.conn.cursor() as cur:
cur.execute("""SELECT word_token, word, class as cls, type, operator
FROM word WHERE class != 'place'""")
result = set((tuple(row) for row in cur))
assert len(result) == cur.rowcount, "Word table has duplicates."
return result
def get_country(self):
with self.conn.cursor() as cur:
cur.execute("""SELECT country_code, word_token
FROM word WHERE country_code is not null""")
result = set((tuple(row) for row in cur))
assert len(result) == cur.rowcount, "Word table has duplicates."
return result
def get_postcodes(self):
with self.conn.cursor() as cur:
cur.execute("""SELECT word FROM word
WHERE class = 'place' and type = 'postcode'""")
return set((row[0] for row in cur))
def get_partial_words(self):
with self.conn.cursor() as cur:
cur.execute("""SELECT word_token, search_name_count FROM word
WHERE class is null and country_code is null
and not word_token like ' %'""")
return set((tuple(row) for row in cur))

View File

@ -1,591 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Test for legacy tokenizer.
"""
import shutil
import re
import pytest
from nominatim_db.data.place_info import PlaceInfo
from nominatim_db.tokenizer import legacy_tokenizer
from nominatim_db.db import properties
from nominatim_db.errors import UsageError
from mock_legacy_word_table import MockLegacyWordTable
# Force use of legacy word table
@pytest.fixture
def word_table(temp_db_conn):
return MockLegacyWordTable(temp_db_conn)
@pytest.fixture
def test_config(project_env, tmp_path):
module_dir = tmp_path / 'module_src'
module_dir.mkdir()
(module_dir / 'nominatim.so').write_text('TEST nominatim.so')
project_env.lib_dir.module = module_dir
sqldir = tmp_path / 'sql'
sqldir.mkdir()
(sqldir / 'tokenizer').mkdir()
# Get the original SQL but replace make_standard_name to avoid module use.
init_sql = (project_env.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer.sql').read_text()
for fn in ('transliteration', 'gettokenstring'):
init_sql = re.sub(f'CREATE OR REPLACE FUNCTION {fn}[^;]*;',
'', init_sql, re.DOTALL)
init_sql += """
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
RETURNS TEXT AS $$ SELECT lower(name); $$ LANGUAGE SQL;
"""
# Also load util functions. Some are needed by the tokenizer.
init_sql += (project_env.lib_dir.sql / 'functions' / 'utils.sql').read_text()
(sqldir / 'tokenizer' / 'legacy_tokenizer.sql').write_text(init_sql)
(sqldir / 'words.sql').write_text("SELECT 'a'")
shutil.copy(str(project_env.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_tables.sql'),
str(sqldir / 'tokenizer' / 'legacy_tokenizer_tables.sql'))
project_env.lib_dir.sql = sqldir
project_env.lib_dir.data = sqldir
return project_env
@pytest.fixture
def tokenizer_factory(dsn, tmp_path, property_table):
(tmp_path / 'tokenizer').mkdir()
def _maker():
return legacy_tokenizer.create(dsn, tmp_path / 'tokenizer')
return _maker
@pytest.fixture
def tokenizer_setup(tokenizer_factory, test_config, monkeypatch, sql_preprocessor):
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
tok = tokenizer_factory()
tok.init_new_db(test_config)
@pytest.fixture
def analyzer(tokenizer_factory, test_config, monkeypatch, sql_preprocessor,
word_table, temp_db_with_extensions, tmp_path):
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
tok = tokenizer_factory()
tok.init_new_db(test_config)
monkeypatch.undo()
with tok.name_analyzer() as analyzer:
yield analyzer
@pytest.fixture
def make_standard_name(temp_db_cursor):
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
RETURNS TEXT AS $$ SELECT '#' || lower(name) || '#'; $$ LANGUAGE SQL""")
@pytest.fixture
def create_postcode_id(temp_db_cursor):
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
RETURNS BOOLEAN AS $$
INSERT INTO word (word_token, word, class, type)
VALUES (' ' || postcode, postcode, 'place', 'postcode')
RETURNING True;
$$ LANGUAGE SQL""")
def test_init_new(tokenizer_factory, test_config, monkeypatch,
temp_db_conn, sql_preprocessor):
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv')
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
tok = tokenizer_factory()
tok.init_new_db(test_config)
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv'
outfile = test_config.project_dir / 'module' / 'nominatim.so'
assert outfile.exists()
assert outfile.read_text() == 'TEST nominatim.so'
assert outfile.stat().st_mode == 33261
def test_init_module_load_failed(tokenizer_factory, test_config):
tok = tokenizer_factory()
with pytest.raises(UsageError):
tok.init_new_db(test_config)
def test_init_module_custom(tokenizer_factory, test_config,
monkeypatch, tmp_path, sql_preprocessor):
module_dir = (tmp_path / 'custom').resolve()
module_dir.mkdir()
(module_dir/ 'nominatim.so').write_text('CUSTOM nomiantim.so')
monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', str(module_dir))
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
tok = tokenizer_factory()
tok.init_new_db(test_config)
assert not (test_config.project_dir / 'module').exists()
def test_init_from_project(tokenizer_setup, tokenizer_factory, test_config):
tok = tokenizer_factory()
tok.init_from_project(test_config)
assert tok.normalization is not None
def test_update_sql_functions(sql_preprocessor, temp_db_conn,
tokenizer_factory, test_config, table_factory,
monkeypatch, temp_db_cursor):
monkeypatch.setenv('NOMINATIM_MAX_WORD_FREQUENCY', '1133')
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
tok = tokenizer_factory()
tok.init_new_db(test_config)
monkeypatch.undo()
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) == '1133'
table_factory('test', 'txt TEXT')
func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer.sql'
func_file.write_text("""INSERT INTO test VALUES ('{{max_word_freq}}'),
('{{modulepath}}')""")
tok.update_sql_functions(test_config)
test_content = temp_db_cursor.row_set('SELECT * FROM test')
assert test_content == set((('1133', ), (str(test_config.project_dir / 'module'), )))
def test_finalize_import(tokenizer_factory, temp_db_conn,
temp_db_cursor, test_config, monkeypatch,
sql_preprocessor_cfg):
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
func_file = test_config.lib_dir.sql / 'tokenizer' / 'legacy_tokenizer_indices.sql'
func_file.write_text("""CREATE FUNCTION test() RETURNS TEXT
AS $$ SELECT 'b'::text $$ LANGUAGE SQL""")
tok = tokenizer_factory()
tok.init_new_db(test_config)
tok.finalize_import(test_config)
temp_db_cursor.scalar('SELECT test()') == 'b'
def test_migrate_database(tokenizer_factory, test_config, temp_db_conn, monkeypatch):
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
tok = tokenizer_factory()
tok.migrate_database(test_config)
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_MAXWORDFREQ) is not None
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) is not None
outfile = test_config.project_dir / 'module' / 'nominatim.so'
assert outfile.exists()
assert outfile.read_text() == 'TEST nominatim.so'
assert outfile.stat().st_mode == 33261
def test_check_database(test_config, tokenizer_factory, monkeypatch,
temp_db_cursor, sql_preprocessor_cfg):
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
tok = tokenizer_factory()
tok.init_new_db(test_config)
assert tok.check_database(False) is None
def test_check_database_no_tokenizer(test_config, tokenizer_factory):
tok = tokenizer_factory()
assert tok.check_database(False) is not None
def test_check_database_bad_setup(test_config, tokenizer_factory, monkeypatch,
temp_db_cursor, sql_preprocessor_cfg):
monkeypatch.setattr(legacy_tokenizer, '_check_module', lambda m, c: None)
tok = tokenizer_factory()
tok.init_new_db(test_config)
# Inject a bad transliteration.
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
RETURNS TEXT AS $$ SELECT 'garbage'::text; $$ LANGUAGE SQL""")
assert tok.check_database(False) is not None
def test_update_statistics_reverse_only(word_table, tokenizer_factory, test_config):
tok = tokenizer_factory()
tok.update_statistics(test_config)
def test_update_statistics(word_table, table_factory, temp_db_cursor, tokenizer_factory, test_config):
word_table.add_full_word(1000, 'hello')
table_factory('search_name',
'place_id BIGINT, name_vector INT[]',
[(12, [1000])])
tok = tokenizer_factory()
tok.update_statistics(test_config)
assert temp_db_cursor.scalar("""SELECT count(*) FROM word
WHERE word_token like ' %' and
search_name_count > 0""") > 0
def test_update_word_tokens(tokenizer_factory):
tok = tokenizer_factory()
# This is a noop and should just pass.
tok.update_word_tokens()
def test_normalize(analyzer):
assert analyzer.normalize('TEsT') == 'test'
def test_update_postcodes_from_db_empty(analyzer, table_factory, word_table,
create_postcode_id):
table_factory('location_postcode', 'postcode TEXT',
content=(('1234',), ('12 34',), ('AB23',), ('1234',)))
analyzer.update_postcodes_from_db()
assert word_table.get_postcodes() == {'1234', '12 34', 'AB23'}
def test_update_postcodes_from_db_add_and_remove(analyzer, table_factory, word_table,
create_postcode_id):
table_factory('location_postcode', 'postcode TEXT',
content=(('1234',), ('45BC', ), ('XX45', )))
word_table.add_postcode(' 1234', '1234')
word_table.add_postcode(' 5678', '5678')
analyzer.update_postcodes_from_db()
assert word_table.get_postcodes() == {'1234', '45BC', 'XX45'}
def test_update_special_phrase_empty_table(analyzer, word_table, make_standard_name):
analyzer.update_special_phrases([
("König bei", "amenity", "royal", "near"),
("Könige", "amenity", "royal", "-"),
("könige", "amenity", "royal", "-"),
("strasse", "highway", "primary", "in")
], True)
assert word_table.get_special() \
== set(((' #könig bei#', 'könig bei', 'amenity', 'royal', 'near'),
(' #könige#', 'könige', 'amenity', 'royal', None),
(' #strasse#', 'strasse', 'highway', 'primary', 'in')))
def test_update_special_phrase_delete_all(analyzer, word_table, make_standard_name):
word_table.add_special(' #foo#', 'foo', 'amenity', 'prison', 'in')
word_table.add_special(' #bar#', 'bar', 'highway', 'road', None)
assert word_table.count_special() == 2
analyzer.update_special_phrases([], True)
assert word_table.count_special() == 0
def test_update_special_phrases_no_replace(analyzer, word_table, make_standard_name):
word_table.add_special(' #foo#', 'foo', 'amenity', 'prison', 'in')
word_table.add_special(' #bar#', 'bar', 'highway', 'road', None)
assert word_table.count_special() == 2
analyzer.update_special_phrases([], False)
assert word_table.count_special() == 2
def test_update_special_phrase_modify(analyzer, word_table, make_standard_name):
word_table.add_special(' #foo#', 'foo', 'amenity', 'prison', 'in')
word_table.add_special(' #bar#', 'bar', 'highway', 'road', None)
assert word_table.count_special() == 2
analyzer.update_special_phrases([
('prison', 'amenity', 'prison', 'in'),
('bar', 'highway', 'road', '-'),
('garden', 'leisure', 'garden', 'near')
], True)
assert word_table.get_special() \
== set(((' #prison#', 'prison', 'amenity', 'prison', 'in'),
(' #bar#', 'bar', 'highway', 'road', None),
(' #garden#', 'garden', 'leisure', 'garden', 'near')))
def test_add_country_names(analyzer, word_table, make_standard_name):
analyzer.add_country_names('de', {'name': 'Germany',
'name:de': 'Deutschland',
'short_name': 'germany'})
assert word_table.get_country() \
== {('de', ' #germany#'),
('de', ' #deutschland#')}
def test_add_more_country_names(analyzer, word_table, make_standard_name):
word_table.add_country('fr', ' #france#')
word_table.add_country('it', ' #italy#')
word_table.add_country('it', ' #itala#')
analyzer.add_country_names('it', {'name': 'Italy', 'ref': 'IT'})
assert word_table.get_country() \
== {('fr', ' #france#'),
('it', ' #italy#'),
('it', ' #itala#'),
('it', ' #it#')}
@pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])
def test_process_place_postcode(analyzer, create_postcode_id, word_table, pcode):
analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
assert word_table.get_postcodes() == {pcode, }
@pytest.mark.parametrize('pcode', ['12:23', 'ab;cd;f', '123;836'])
def test_process_place_bad_postcode(analyzer, create_postcode_id, word_table, pcode):
analyzer.process_place(PlaceInfo({'address': {'postcode' : pcode}}))
assert not word_table.get_postcodes()
class TestHousenumberName:
@staticmethod
@pytest.fixture(autouse=True)
def setup_create_housenumbers(temp_db_cursor):
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION create_housenumbers(
housenumbers TEXT[],
OUT tokens TEXT, OUT normtext TEXT)
AS $$
SELECT housenumbers::TEXT, array_to_string(housenumbers, ';')
$$ LANGUAGE SQL""")
@staticmethod
@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
def test_process_place_housenumbers_simple(analyzer, hnr):
info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : hnr}}))
assert info['hnr'] == hnr
assert info['hnr_tokens'].startswith("{")
@staticmethod
def test_process_place_housenumbers_lists(analyzer):
info = analyzer.process_place(PlaceInfo({'address': {'conscriptionnumber' : '1; 2;3'}}))
assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
@staticmethod
def test_process_place_housenumbers_duplicates(analyzer):
info = analyzer.process_place(PlaceInfo({'address': {'housenumber' : '134',
'conscriptionnumber' : '134',
'streetnumber' : '99a'}}))
assert set(info['hnr'].split(';')) == set(('134', '99a'))
class TestPlaceNames:
@pytest.fixture(autouse=True)
def setup(self, analyzer):
self.analyzer = analyzer
def expect_name_terms(self, info, *expected_terms):
tokens = self.analyzer.get_word_token_info(list(expected_terms))
for token in tokens:
assert token[2] is not None, "No token for {0}".format(token)
assert eval(info['names']) == set((t[2] for t in tokens)),\
f"Expected: {tokens}\nGot: {info['names']}"
def process_named_place(self, names):
return self.analyzer.process_place(PlaceInfo({'name': names}))
def test_simple_names(self):
info = self.process_named_place({'name': 'Soft bAr', 'ref': '34'})
self.expect_name_terms(info, '#Soft bAr', '#34', 'Soft', 'bAr', '34')
@pytest.mark.parametrize('sep', [',' , ';'])
def test_names_with_separator(self, sep):
info = self.process_named_place({'name': sep.join(('New York', 'Big Apple'))})
self.expect_name_terms(info, '#New York', '#Big Apple',
'new', 'york', 'big', 'apple')
def test_full_names_with_bracket(self):
info = self.process_named_place({'name': 'Houseboat (left)'})
self.expect_name_terms(info, '#Houseboat (left)', '#Houseboat',
'houseboat', '(left)')
def test_country_name(self, word_table):
place = PlaceInfo({'name' : {'name': 'Norge'},
'country_code': 'no',
'rank_address': 4,
'class': 'boundary',
'type': 'administrative'})
info = self.analyzer.process_place(place)
self.expect_name_terms(info, '#norge', 'norge')
assert word_table.get_country() == {('no', ' norge')}
class TestPlaceAddress:
@pytest.fixture(autouse=True)
def setup(self, analyzer):
self.analyzer = analyzer
@pytest.fixture
def getorcreate_hnr_id(self, temp_db_cursor):
temp_db_cursor.execute("""CREATE SEQUENCE seq_hnr start 1;
CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
RETURNS INTEGER AS $$
SELECT -nextval('seq_hnr')::INTEGER; $$ LANGUAGE SQL""")
def process_address(self, **kwargs):
return self.analyzer.process_place(PlaceInfo({'address': kwargs}))
def name_token_set(self, *expected_terms):
tokens = self.analyzer.get_word_token_info(list(expected_terms))
for token in tokens:
assert token[2] is not None, "No token for {0}".format(token)
return set((t[2] for t in tokens))
@pytest.mark.parametrize('pcode', ['12345', 'AB 123', '34-345'])
def test_process_place_postcode(self, word_table, pcode):
self.process_address(postcode=pcode)
assert word_table.get_postcodes() == {pcode, }
@pytest.mark.parametrize('pcode', ['12:23', 'ab;cd;f', '123;836'])
def test_process_place_bad_postcode(self, word_table, pcode):
self.process_address(postcode=pcode)
assert not word_table.get_postcodes()
@pytest.mark.parametrize('hnr', ['123a', '0', '101'])
def test_process_place_housenumbers_simple(self, hnr, getorcreate_hnr_id):
info = self.process_address(housenumber=hnr)
assert info['hnr'] == hnr.lower()
assert info['hnr_tokens'] == "{-1}"
def test_process_place_housenumbers_lists(self, getorcreate_hnr_id):
info = self.process_address(conscriptionnumber='1; 2;3')
assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
assert info['hnr_tokens'] == "{-1,-2,-3}"
def test_process_place_housenumbers_duplicates(self, getorcreate_hnr_id):
info = self.process_address(housenumber='134',
conscriptionnumber='134',
streetnumber='99A')
assert set(info['hnr'].split(';')) == set(('134', '99a'))
assert info['hnr_tokens'] == "{-1,-2}"
def test_process_place_street(self):
# legacy tokenizer only indexes known names
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
info = self.process_address(street='Grand Road')
assert eval(info['street']) == self.name_token_set('#Grand Road')
def test_process_place_street_empty(self):
info = self.process_address(street='🜵')
assert info['street'] == '{}'
def test_process_place_place(self):
self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Honu Lulu'}}))
info = self.process_address(place='Honu Lulu')
assert eval(info['place_search']) == self.name_token_set('#Honu Lulu',
'Honu', 'Lulu')
assert eval(info['place_match']) == self.name_token_set('#Honu Lulu')
def test_process_place_place_empty(self):
info = self.process_address(place='🜵')
assert 'place' not in info
def test_process_place_address_terms(self):
for name in ('Zwickau', 'Haupstraße', 'Sachsen'):
self.analyzer.process_place(PlaceInfo({'name': {'name' : name}}))
info = self.process_address(country='de', city='Zwickau', state='Sachsen',
suburb='Zwickau', street='Hauptstr',
full='right behind the church')
city = self.name_token_set('ZWICKAU')
state = self.name_token_set('SACHSEN')
print(info)
result = {k: eval(v[0]) for k,v in info['addr'].items()}
assert result == {'city': city, 'suburb': city, 'state': state}
def test_process_place_address_terms_empty(self):
info = self.process_address(country='de', city=' ', street='Hauptstr',
full='right behind the church')
assert 'addr' not in info

View File

@ -14,8 +14,6 @@ from nominatim_db.errors import UsageError
from nominatim_db.db.connection import server_version_tuple
import nominatim_db.version
from mock_legacy_word_table import MockLegacyWordTable
class DummyTokenizer:
def update_sql_functions(self, config):
@ -28,10 +26,6 @@ def postprocess_mock(monkeypatch):
monkeypatch.setattr(migration.tokenizer_factory, 'get_tokenizer_for_db',
lambda *args: DummyTokenizer())
@pytest.fixture
def legacy_word_table(temp_db_conn):
return MockLegacyWordTable(temp_db_conn)
def test_no_migration_old_versions(temp_db_with_extensions, table_factory, def_config):
table_factory('country_name', 'name HSTORE, country_code TEXT')
@ -145,26 +139,6 @@ def test_add_nominatim_property_table_repeat(temp_db_conn, temp_db_cursor,
assert temp_db_cursor.table_exists('nominatim_properties')
def test_change_housenumber_transliteration(temp_db_conn, temp_db_cursor,
legacy_word_table, placex_table):
placex_table.add(housenumber='3A')
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION make_standard_name(name TEXT)
RETURNS TEXT AS $$ SELECT lower(name) $$ LANGUAGE SQL """)
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_housenumber_id(lookup_word TEXT)
RETURNS INTEGER AS $$ SELECT 4325 $$ LANGUAGE SQL """)
migration.change_housenumber_transliteration(temp_db_conn)
temp_db_conn.commit()
assert temp_db_cursor.scalar('SELECT housenumber from placex') == '3a'
migration.change_housenumber_transliteration(temp_db_conn)
temp_db_conn.commit()
assert temp_db_cursor.scalar('SELECT housenumber from placex') == '3a'
def test_switch_placenode_geometry_index(temp_db_conn, temp_db_cursor, placex_table):
temp_db_cursor.execute("""CREATE INDEX idx_placex_adminname
ON placex (place_id)""")

View File

@ -1,231 +0,0 @@
CREATE OR REPLACE FUNCTION test_getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
lookup_class text, lookup_type text)
RETURNS INTEGER
AS $$
DECLARE
lookup_token TEXT;
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word
WHERE word_token = lookup_token and word = normalized_word
and class = lookup_class and type = lookup_type
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
lookup_class, lookup_type, null, 0);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION test_getorcreate_amenityoperator(lookup_word TEXT,
normalized_word TEXT,
lookup_class text,
lookup_type text,
op text)
RETURNS INTEGER
AS $$
DECLARE
lookup_token TEXT;
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word
WHERE word_token = lookup_token and word = normalized_word
and class = lookup_class and type = lookup_type and operator = op
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
lookup_class, lookup_type, null, 0, op);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
SELECT test_getorcreate_amenity(make_standard_name('Aerodrome'), 'aerodrome', 'aeroway', 'aerodrome');
SELECT test_getorcreate_amenity(make_standard_name('Aerodromes'), 'aerodromes', 'aeroway', 'aerodrome');
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome in'), 'aerodrome in', 'aeroway', 'aerodrome', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes in'), 'aerodromes in', 'aeroway', 'aerodrome', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodrome near'), 'aerodrome near', 'aeroway', 'aerodrome', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Aerodromes near'), 'aerodromes near', 'aeroway', 'aerodrome', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Airport'), 'airport', 'aeroway', 'aerodrome');
SELECT test_getorcreate_amenity(make_standard_name('Airports'), 'airports', 'aeroway', 'aerodrome');
SELECT test_getorcreate_amenityoperator(make_standard_name('Airport in'), 'airport in', 'aeroway', 'aerodrome', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Airports in'), 'airports in', 'aeroway', 'aerodrome', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Airport near'), 'airport near', 'aeroway', 'aerodrome', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Airports near'), 'airports near', 'aeroway', 'aerodrome', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'bar');
SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'bar');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'bar', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'bar', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'bar', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'bar', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Bar'), 'bar', 'amenity', 'pub');
SELECT test_getorcreate_amenity(make_standard_name('Bars'), 'bars', 'amenity', 'pub');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar in'), 'bar in', 'amenity', 'pub', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars in'), 'bars in', 'amenity', 'pub', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bar near'), 'bar near', 'amenity', 'pub', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Bars near'), 'bars near', 'amenity', 'pub', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
SELECT test_getorcreate_amenity(make_standard_name('Food'), 'food', 'amenity', 'restaurant');
SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Food in'), 'food in', 'amenity', 'restaurant', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Food near'), 'food near', 'amenity', 'restaurant', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'bar');
SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'bar');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'bar', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'bar', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'bar', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'bar', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Pub'), 'pub', 'amenity', 'pub');
SELECT test_getorcreate_amenity(make_standard_name('Pubs'), 'pubs', 'amenity', 'pub');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub in'), 'pub in', 'amenity', 'pub', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs in'), 'pubs in', 'amenity', 'pub', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pub near'), 'pub near', 'amenity', 'pub', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Pubs near'), 'pubs near', 'amenity', 'pub', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Restaurant'), 'restaurant', 'amenity', 'restaurant');
SELECT test_getorcreate_amenity(make_standard_name('Restaurants'), 'restaurants', 'amenity', 'restaurant');
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant in'), 'restaurant in', 'amenity', 'restaurant', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants in'), 'restaurants in', 'amenity', 'restaurant', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurant near'), 'restaurant near', 'amenity', 'restaurant', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Restaurants near'), 'restaurants near', 'amenity', 'restaurant', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Mural'), 'mural', 'artwork_type', 'mural');
SELECT test_getorcreate_amenity(make_standard_name('Murals'), 'murals', 'artwork_type', 'mural');
SELECT test_getorcreate_amenityoperator(make_standard_name('Mural in'), 'mural in', 'artwork_type', 'mural', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Murals in'), 'murals in', 'artwork_type', 'mural', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Mural near'), 'mural near', 'artwork_type', 'mural', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Murals near'), 'murals near', 'artwork_type', 'mural', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Sculpture'), 'sculpture', 'artwork_type', 'sculpture');
SELECT test_getorcreate_amenity(make_standard_name('Sculptures'), 'sculptures', 'artwork_type', 'sculpture');
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture in'), 'sculpture in', 'artwork_type', 'sculpture', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures in'), 'sculptures in', 'artwork_type', 'sculpture', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculpture near'), 'sculpture near', 'artwork_type', 'sculpture', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Sculptures near'), 'sculptures near', 'artwork_type', 'sculpture', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'artwork_type', 'statue');
SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'artwork_type', 'statue');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'artwork_type', 'statue', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'artwork_type', 'statue', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'artwork_type', 'statue', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'artwork_type', 'statue', 'near');
SELECT test_getorcreate_amenity(make_standard_name('ATM'), 'atm', 'atm', 'yes');
SELECT test_getorcreate_amenity(make_standard_name('ATMs'), 'atms', 'atm', 'yes');
SELECT test_getorcreate_amenityoperator(make_standard_name('ATM in'), 'atm in', 'atm', 'yes', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs in'), 'atms in', 'atm', 'yes', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('ATM near'), 'atm near', 'atm', 'yes', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('ATMs near'), 'atms near', 'atm', 'yes', 'near');
SELECT test_getorcreate_amenity(make_standard_name('National Park'), 'national park', 'boundary', 'national_park');
SELECT test_getorcreate_amenity(make_standard_name('National Parks'), 'national parks', 'boundary', 'national_park');
SELECT test_getorcreate_amenityoperator(make_standard_name('National Park in'), 'national park in', 'boundary', 'national_park', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks in'), 'national parks in', 'boundary', 'national_park', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('National Park near'), 'national park near', 'boundary', 'national_park', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('National Parks near'), 'national parks near', 'boundary', 'national_park', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Changing table'), 'changing table', 'changing_table', 'yes');
SELECT test_getorcreate_amenity(make_standard_name('Changing tables'), 'changing tables', 'changing_table', 'yes');
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table in'), 'changing table in', 'changing_table', 'yes', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables in'), 'changing tables in', 'changing_table', 'yes', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing table near'), 'changing table near', 'changing_table', 'yes', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Changing tables near'), 'changing tables near', 'changing_table', 'yes', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Roundabout'), 'roundabout', 'junction', 'roundabout');
SELECT test_getorcreate_amenity(make_standard_name('Roundabouts'), 'roundabouts', 'junction', 'roundabout');
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout in'), 'roundabout in', 'junction', 'roundabout', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts in'), 'roundabouts in', 'junction', 'roundabout', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabout near'), 'roundabout near', 'junction', 'roundabout', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Roundabouts near'), 'roundabouts near', 'junction', 'roundabout', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Plaque'), 'plaque', 'memorial', 'plaque');
SELECT test_getorcreate_amenity(make_standard_name('Plaques'), 'plaques', 'memorial', 'plaque');
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque in'), 'plaque in', 'memorial', 'plaque', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques in'), 'plaques in', 'memorial', 'plaque', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaque near'), 'plaque near', 'memorial', 'plaque', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Plaques near'), 'plaques near', 'memorial', 'plaque', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Statue'), 'statue', 'memorial', 'statue');
SELECT test_getorcreate_amenity(make_standard_name('Statues'), 'statues', 'memorial', 'statue');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue in'), 'statue in', 'memorial', 'statue', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues in'), 'statues in', 'memorial', 'statue', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statue near'), 'statue near', 'memorial', 'statue', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Statues near'), 'statues near', 'memorial', 'statue', 'near');
SELECT test_getorcreate_amenity(make_standard_name('Stolperstein'), 'stolperstein', 'memorial', 'stolperstein');
SELECT test_getorcreate_amenity(make_standard_name('Stolpersteins'), 'stolpersteins', 'memorial', 'stolperstein');
SELECT test_getorcreate_amenity(make_standard_name('Stolpersteine'), 'stolpersteine', 'memorial', 'stolperstein');
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein in'), 'stolperstein in', 'memorial', 'stolperstein', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins in'), 'stolpersteins in', 'memorial', 'stolperstein', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine in'), 'stolpersteine in', 'memorial', 'stolperstein', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolperstein near'), 'stolperstein near', 'memorial', 'stolperstein', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteins near'), 'stolpersteins near', 'memorial', 'stolperstein', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('Stolpersteine near'), 'stolpersteine near', 'memorial', 'stolperstein', 'near');
SELECT test_getorcreate_amenity(make_standard_name('War Memorial'), 'war memorial', 'memorial', 'war_memorial');
SELECT test_getorcreate_amenity(make_standard_name('War Memorials'), 'war memorials', 'memorial', 'war_memorial');
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial in'), 'war memorial in', 'memorial', 'war_memorial', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials in'), 'war memorials in', 'memorial', 'war_memorial', 'in');
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorial near'), 'war memorial near', 'memorial', 'war_memorial', 'near');
SELECT test_getorcreate_amenityoperator(make_standard_name('War Memorials near'), 'war memorials near', 'memorial', 'war_memorial', 'near');
CREATE INDEX idx_placex_classtype ON placex (class, type);CREATE TABLE place_classtype_aeroway_aerodrome AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'aeroway' AND type = 'aerodrome';
CREATE INDEX idx_place_classtype_aeroway_aerodrome_centroid ON place_classtype_aeroway_aerodrome USING GIST (centroid);
CREATE INDEX idx_place_classtype_aeroway_aerodrome_place_id ON place_classtype_aeroway_aerodrome USING btree(place_id);
GRANT SELECT ON place_classtype_aeroway_aerodrome TO "www-data";
CREATE TABLE place_classtype_amenity_bar AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'amenity' AND type = 'bar';
CREATE INDEX idx_place_classtype_amenity_bar_centroid ON place_classtype_amenity_bar USING GIST (centroid);
CREATE INDEX idx_place_classtype_amenity_bar_place_id ON place_classtype_amenity_bar USING btree(place_id);
GRANT SELECT ON place_classtype_amenity_bar TO "www-data";
CREATE TABLE place_classtype_amenity_pub AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'amenity' AND type = 'pub';
CREATE INDEX idx_place_classtype_amenity_pub_centroid ON place_classtype_amenity_pub USING GIST (centroid);
CREATE INDEX idx_place_classtype_amenity_pub_place_id ON place_classtype_amenity_pub USING btree(place_id);
GRANT SELECT ON place_classtype_amenity_pub TO "www-data";
CREATE TABLE place_classtype_amenity_restaurant AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'amenity' AND type = 'restaurant';
CREATE INDEX idx_place_classtype_amenity_restaurant_centroid ON place_classtype_amenity_restaurant USING GIST (centroid);
CREATE INDEX idx_place_classtype_amenity_restaurant_place_id ON place_classtype_amenity_restaurant USING btree(place_id);
GRANT SELECT ON place_classtype_amenity_restaurant TO "www-data";
CREATE TABLE place_classtype_artwork_type_mural AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'artwork_type' AND type = 'mural';
CREATE INDEX idx_place_classtype_artwork_type_mural_centroid ON place_classtype_artwork_type_mural USING GIST (centroid);
CREATE INDEX idx_place_classtype_artwork_type_mural_place_id ON place_classtype_artwork_type_mural USING btree(place_id);
GRANT SELECT ON place_classtype_artwork_type_mural TO "www-data";
CREATE TABLE place_classtype_artwork_type_sculpture AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'artwork_type' AND type = 'sculpture';
CREATE INDEX idx_place_classtype_artwork_type_sculpture_centroid ON place_classtype_artwork_type_sculpture USING GIST (centroid);
CREATE INDEX idx_place_classtype_artwork_type_sculpture_place_id ON place_classtype_artwork_type_sculpture USING btree(place_id);
GRANT SELECT ON place_classtype_artwork_type_sculpture TO "www-data";
CREATE TABLE place_classtype_artwork_type_statue AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'artwork_type' AND type = 'statue';
CREATE INDEX idx_place_classtype_artwork_type_statue_centroid ON place_classtype_artwork_type_statue USING GIST (centroid);
CREATE INDEX idx_place_classtype_artwork_type_statue_place_id ON place_classtype_artwork_type_statue USING btree(place_id);
GRANT SELECT ON place_classtype_artwork_type_statue TO "www-data";
CREATE TABLE place_classtype_atm_yes AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'atm' AND type = 'yes';
CREATE INDEX idx_place_classtype_atm_yes_centroid ON place_classtype_atm_yes USING GIST (centroid);
CREATE INDEX idx_place_classtype_atm_yes_place_id ON place_classtype_atm_yes USING btree(place_id);
GRANT SELECT ON place_classtype_atm_yes TO "www-data";
CREATE TABLE place_classtype_boundary_national_park AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'boundary' AND type = 'national_park';
CREATE INDEX idx_place_classtype_boundary_national_park_centroid ON place_classtype_boundary_national_park USING GIST (centroid);
CREATE INDEX idx_place_classtype_boundary_national_park_place_id ON place_classtype_boundary_national_park USING btree(place_id);
GRANT SELECT ON place_classtype_boundary_national_park TO "www-data";
CREATE TABLE place_classtype_changing_table_yes AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'changing_table' AND type = 'yes';
CREATE INDEX idx_place_classtype_changing_table_yes_centroid ON place_classtype_changing_table_yes USING GIST (centroid);
CREATE INDEX idx_place_classtype_changing_table_yes_place_id ON place_classtype_changing_table_yes USING btree(place_id);
GRANT SELECT ON place_classtype_changing_table_yes TO "www-data";
CREATE TABLE place_classtype_junction_roundabout AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'junction' AND type = 'roundabout';
CREATE INDEX idx_place_classtype_junction_roundabout_centroid ON place_classtype_junction_roundabout USING GIST (centroid);
CREATE INDEX idx_place_classtype_junction_roundabout_place_id ON place_classtype_junction_roundabout USING btree(place_id);
GRANT SELECT ON place_classtype_junction_roundabout TO "www-data";
CREATE TABLE place_classtype_memorial_plaque AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'memorial' AND type = 'plaque';
CREATE INDEX idx_place_classtype_memorial_plaque_centroid ON place_classtype_memorial_plaque USING GIST (centroid);
CREATE INDEX idx_place_classtype_memorial_plaque_place_id ON place_classtype_memorial_plaque USING btree(place_id);
GRANT SELECT ON place_classtype_memorial_plaque TO "www-data";
CREATE TABLE place_classtype_memorial_statue AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'memorial' AND type = 'statue';
CREATE INDEX idx_place_classtype_memorial_statue_centroid ON place_classtype_memorial_statue USING GIST (centroid);
CREATE INDEX idx_place_classtype_memorial_statue_place_id ON place_classtype_memorial_statue USING btree(place_id);
GRANT SELECT ON place_classtype_memorial_statue TO "www-data";
CREATE TABLE place_classtype_memorial_stolperstein AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'memorial' AND type = 'stolperstein';
CREATE INDEX idx_place_classtype_memorial_stolperstein_centroid ON place_classtype_memorial_stolperstein USING GIST (centroid);
CREATE INDEX idx_place_classtype_memorial_stolperstein_place_id ON place_classtype_memorial_stolperstein USING btree(place_id);
GRANT SELECT ON place_classtype_memorial_stolperstein TO "www-data";
CREATE TABLE place_classtype_memorial_war_memorial AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = 'memorial' AND type = 'war_memorial';
CREATE INDEX idx_place_classtype_memorial_war_memorial_centroid ON place_classtype_memorial_war_memorial USING GIST (centroid);
CREATE INDEX idx_place_classtype_memorial_war_memorial_place_id ON place_classtype_memorial_war_memorial USING btree(place_id);
GRANT SELECT ON place_classtype_memorial_war_memorial TO "www-data";
DROP INDEX idx_placex_classtype;
DROP FUNCTION test_getorcreate_amenity;
DROP FUNCTION test_getorcreate_amenityoperator;