mirror of
https://github.com/osm-search/Nominatim.git
synced 2024-11-22 03:42:11 +03:00
Moved KANJI_MAP to icu-rules
This commit is contained in:
parent
4d61cc87cf
commit
67e1c7dc72
@ -18,25 +18,6 @@ from nominatim.tokenizer.sanitizers.base import ProcessInfo
|
||||
from nominatim.tokenizer.sanitizers.config import SanitizerConfig
|
||||
from nominatim.data.place_name import PlaceName
|
||||
|
||||
KANJI_MAP = {
|
||||
ord('零'): '0',
|
||||
ord('一'): '1',
|
||||
ord('二'): '2',
|
||||
ord('三'): '3',
|
||||
ord('四'): '4',
|
||||
ord('五'): '5',
|
||||
ord('六'): '6',
|
||||
ord('七'): '7',
|
||||
ord('八'): '8',
|
||||
ord('九'): '9'
|
||||
}
|
||||
|
||||
def convert_kanji_sequence_to_number(sequence: str) -> str:
|
||||
"""Converts Kanji numbers to Arabic numbers
|
||||
"""
|
||||
converted = sequence.translate(KANJI_MAP)
|
||||
return converted
|
||||
|
||||
def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
|
||||
"""Set up the sanitizer
|
||||
"""
|
||||
@ -49,11 +30,6 @@ def reconbine_housenumber(
|
||||
) -> List[PlaceName]:
|
||||
""" Recombine the tag of housenumber by using housenumber and blocknumber
|
||||
"""
|
||||
if tmp_blocknumber:
|
||||
tmp_blocknumber = convert_kanji_sequence_to_number(tmp_blocknumber)
|
||||
if tmp_housenumber:
|
||||
tmp_housenumber = convert_kanji_sequence_to_number(tmp_housenumber)
|
||||
|
||||
if tmp_blocknumber and tmp_housenumber:
|
||||
new_address.append(
|
||||
PlaceName(
|
||||
@ -87,11 +63,6 @@ def reconbine_place(
|
||||
) -> List[PlaceName]:
|
||||
""" Recombine the tag of place by using neighbourhood and quarter
|
||||
"""
|
||||
if tmp_neighbourhood:
|
||||
tmp_neighbourhood = convert_kanji_sequence_to_number(tmp_neighbourhood)
|
||||
if tmp_quarter:
|
||||
tmp_quarter = convert_kanji_sequence_to_number(tmp_quarter)
|
||||
|
||||
if tmp_neighbourhood and tmp_quarter:
|
||||
new_address.append(
|
||||
PlaceName(
|
||||
|
@ -1,14 +1,14 @@
|
||||
- "[𞥐𐒠߀𖭐꤀𖩠𑓐𑑐𑋰𑄶꩐꘠᱀᭐᮰᠐០᥆༠໐꧰႐᪐᪀᧐𑵐꯰᱐𑱐𑜰𑛀𑙐𑇐꧐꣐෦𑁦0𝟶𝟘𝟬𝟎𝟢₀⓿⓪⁰] > 0"
|
||||
- "[𞥑𐒡߁𖭑꤁𖩡𑓑𑑑𑋱𑄷꩑꘡᱁᭑᮱᠑១᥇༡໑꧱႑᪑᪁᧑𑵑꯱᱑𑱑𑜱𑛁𑙑𑇑꧑꣑෧𑁧1𝟷𝟙𝟭𝟏𝟣₁¹①⑴⒈❶➀➊⓵] > 1"
|
||||
- "[𞥒𐒢߂𖭒꤂𖩢𑓒𑑒𑋲𑄸꩒꘢᱂᭒᮲᠒២᥈༢໒꧲႒᪒᪂᧒𑵒꯲᱒𑱒𑜲𑛂𑙒𑇒꧒꣒෨𑁨2𝟸𝟚𝟮𝟐𝟤₂²②⑵⒉❷➁➋⓶] > 2"
|
||||
- "[𞥓𐒣߃𖭓꤃𖩣𑓓𑑓𑋳𑄹꩓꘣᱃᭓᮳᠓៣᥉༣໓꧳႓᪓᪃᧓𑵓꯳᱓𑱓𑜳𑛃𑙓𑇓꧓꣓෩𑁩3𝟹𝟛𝟯𝟑𝟥₃³③⑶⒊❸➂➌⓷] > 3"
|
||||
- "[𞥔𐒤߄𖭔꤄𖩤𑓔𑑔𑋴𑄺꩔꘤᱄᭔᮴᠔៤᥊༤໔꧴႔᪔᪄᧔𑵔꯴᱔𑱔𑜴𑛄𑙔𑇔꧔꣔෪𑁪4𝟺𝟜𝟰𝟒𝟦₄⁴④⑷⒋❹➃➍⓸] > 4"
|
||||
- "[𞥕𐒥߅𖭕꤅𖩥𑓕𑑕𑋵𑄻꩕꘥᱅᭕᮵᠕៥᥋༥໕꧵႕᪕᪅᧕𑵕꯵᱕𑱕𑜵𑛅𑙕𑇕꧕꣕෫𑁫5𝟻𝟝𝟱𝟓𝟧₅⁵⑤⑸⒌❺➄➎⓹] > 5"
|
||||
- "[𞥖𐒦߆𖭖꤆𖩦𑓖𑑖𑋶𑄼꩖꘦᱆᭖᮶᠖៦᥌༦໖꧶႖᪖᪆᧖𑵖꯶᱖𑱖𑜶𑛆𑙖𑇖꧖꣖෬𑁬6𝟼𝟞𝟲𝟔𝟨₆⁶⑥⑹⒍❻➅➏⓺] > 6"
|
||||
- "[𞥗𐒧߇𖭗꤇𖩧𑓗𑑗𑋷𑄽꩗꘧᱇᭗᮷᠗៧᥍༧໗꧷႗᪗᪇᧗𑵗꯷᱗𑱗𑜷𑛇𑙗𑇗꧗꣗෭𑁭7𝟽𝟟𝟳𝟕𝟩₇⁷⑦⑺⒎❼➆➐⓻] > 7"
|
||||
- "[𞥘𐒨߈𖭘꤈𖩨𑓘𑑘𑋸𑄾꩘꘨᱈᭘᮸᠘៨᥎༨໘꧸႘᪘᪈᧘𑵘꯸᱘𑱘𑜸𑛈𑙘𑇘꧘꣘෮𑁮8𝟾𝟠𝟴𝟖𝟪₈⁸⑧⑻⒏❽➇➑⓼] > 8"
|
||||
- "[𞥙𐒩߉𖭙꤉𖩩𑓙𑑙𑋹𑄿꩙꘩᱉᭙᮹᠙៩᥏༩໙꧹႙᪙᪉᧙𑵙꯹᱙𑱙𑜹𑛉𑙙𑇙꧙꣙෯𑁯9𝟿𝟡𝟵𝟗𝟫₉⁹⑨⑼⒐❾➈➒⓽] > 9"
|
||||
- "[𑜺⑩⑽⒑❿➉➓⓾] > '10'"
|
||||
- "[𞥐𐒠߀𖭐꤀𖩠𑓐𑑐𑋰𑄶꩐꘠᱀᭐᮰᠐០᥆༠໐꧰႐᪐᪀᧐𑵐꯰᱐𑱐𑜰𑛀𑙐𑇐꧐꣐෦𑁦0𝟶𝟘𝟬𝟎𝟢₀⓿⓪⁰零] > 0"
|
||||
- "[𞥑𐒡߁𖭑꤁𖩡𑓑𑑑𑋱𑄷꩑꘡᱁᭑᮱᠑១᥇༡໑꧱႑᪑᪁᧑𑵑꯱᱑𑱑𑜱𑛁𑙑𑇑꧑꣑෧𑁧1𝟷𝟙𝟭𝟏𝟣₁¹①⑴⒈❶➀➊⓵一] > 1"
|
||||
- "[𞥒𐒢߂𖭒꤂𖩢𑓒𑑒𑋲𑄸꩒꘢᱂᭒᮲᠒២᥈༢໒꧲႒᪒᪂᧒𑵒꯲᱒𑱒𑜲𑛂𑙒𑇒꧒꣒෨𑁨2𝟸𝟚𝟮𝟐𝟤₂²②⑵⒉❷➁➋⓶二] > 2"
|
||||
- "[𞥓𐒣߃𖭓꤃𖩣𑓓𑑓𑋳𑄹꩓꘣᱃᭓᮳᠓៣᥉༣໓꧳႓᪓᪃᧓𑵓꯳᱓𑱓𑜳𑛃𑙓𑇓꧓꣓෩𑁩3𝟹𝟛𝟯𝟑𝟥₃³③⑶⒊❸➂➌⓷三] > 3"
|
||||
- "[𞥔𐒤߄𖭔꤄𖩤𑓔𑑔𑋴𑄺꩔꘤᱄᭔᮴᠔៤᥊༤໔꧴႔᪔᪄᧔𑵔꯴᱔𑱔𑜴𑛄𑙔𑇔꧔꣔෪𑁪4𝟺𝟜𝟰𝟒𝟦₄⁴④⑷⒋❹➃➍⓸四] > 4"
|
||||
- "[𞥕𐒥߅𖭕꤅𖩥𑓕𑑕𑋵𑄻꩕꘥᱅᭕᮵᠕៥᥋༥໕꧵႕᪕᪅᧕𑵕꯵᱕𑱕𑜵𑛅𑙕𑇕꧕꣕෫𑁫5𝟻𝟝𝟱𝟓𝟧₅⁵⑤⑸⒌❺➄➎⓹五] > 5"
|
||||
- "[𞥖𐒦߆𖭖꤆𖩦𑓖𑑖𑋶𑄼꩖꘦᱆᭖᮶᠖៦᥌༦໖꧶႖᪖᪆᧖𑵖꯶᱖𑱖𑜶𑛆𑙖𑇖꧖꣖෬𑁬6𝟼𝟞𝟲𝟔𝟨₆⁶⑥⑹⒍❻➅➏⓺六] > 6"
|
||||
- "[𞥗𐒧߇𖭗꤇𖩧𑓗𑑗𑋷𑄽꩗꘧᱇᭗᮷᠗៧᥍༧໗꧷႗᪗᪇᧗𑵗꯷᱗𑱗𑜷𑛇𑙗𑇗꧗꣗෭𑁭7𝟽𝟟𝟳𝟕𝟩₇⁷⑦⑺⒎❼➆➐⓻七] > 7"
|
||||
- "[𞥘𐒨߈𖭘꤈𖩨𑓘𑑘𑋸𑄾꩘꘨᱈᭘᮸᠘៨᥎༨໘꧸႘᪘᪈᧘𑵘꯸᱘𑱘𑜸𑛈𑙘𑇘꧘꣘෮𑁮8𝟾𝟠𝟴𝟖𝟪₈⁸⑧⑻⒏❽➇➑⓼八] > 8"
|
||||
- "[𞥙𐒩߉𖭙꤉𖩩𑓙𑑙𑋹𑄿꩙꘩᱉᭙᮹᠙៩᥏༩໙꧹႙᪙᪉᧙𑵙꯹᱙𑱙𑜹𑛉𑙙𑇙꧙꣙෯𑁯9𝟿𝟡𝟵𝟗𝟫₉⁹⑨⑼⒐❾➈➒⓽九] > 9"
|
||||
- "[𑜺⑩⑽⒑❿➉➓⓾十] > '10'"
|
||||
- "[⑪⑾⒒⓫] > '11'"
|
||||
- "[⑫⑿⒓⓬] > '12'"
|
||||
- "[⑬⒀⒔⓭] > '13'"
|
||||
|
@ -78,7 +78,3 @@ class TestTagJapanese:
|
||||
def test_housenumber_blocknumber_neighbourhood_quarter(self):
|
||||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase', neighbourhood='8')
|
||||
assert res == [('6-2','housenumber'),('kase8','place')]
|
||||
|
||||
def test_KANJI_MAP(self):
|
||||
res = self.run_sanitizer_on('address', block_number='六', housenumber='二', quarter='kase', neighbourhood='八')
|
||||
assert res == [('6-2','housenumber'),('kase8','place')]
|
||||
|
Loading…
Reference in New Issue
Block a user