diff --git a/settings/icu_tokenizer.yaml b/settings/icu_tokenizer.yaml index f30578a2..1fa467be 100644 --- a/settings/icu_tokenizer.yaml +++ b/settings/icu_tokenizer.yaml @@ -38,6 +38,7 @@ sanitizers: default-pattern: "[A-Z0-9- ]{3,12}" - step: clean-tiger-tags - step: split-name-list + delimiters: ; - step: strip-brace-terms - step: tag-analyzer-by-language filter-kind: [".*name.*"] diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index 7c75ca49..538bcbb3 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -2,21 +2,26 @@ Feature: Creation of search terms Tests that search_name table is filled correctly - Scenario Outline: Comma- and semicolon separated names appear as full names + Scenario: Semicolon-separated names appear as separate full names Given the places | osm | class | type | name+alt_name | - | N1 | place | city | New YorkBig Apple | + | N1 | place | city | New York; Big Apple | When importing Then search_name contains | object | name_vector | | N1 | #New York, #Big Apple | - Examples: - | sep | - | , | - | ; | + @fail-legacy + Scenario: Comma-separated names appear as a single full name + Given the places + | osm | class | type | name+alt_name | + | N1 | place | city | New York, Big Apple | + When importing + Then search_name contains + | object | name_vector | + | N1 | #New York Big Apple | - Scenario Outline: Name parts before brackets appear as full names + Scenario: Name parts before brackets appear as full names Given the places | osm | class | type | name+name | | N1 | place | city | Halle (Saale) |