fix Python linitin errors

This commit is contained in:
Sarah Hoffmann 2021-07-25 15:30:47 +02:00
parent 001b2aa9f9
commit d48793c22c

View File

@ -79,7 +79,6 @@ class LegacyICUTokenizer:
""" Do any required postprocessing to make the tokenizer data ready
for use.
"""
pass
def update_sql_functions(self, config):
@ -156,8 +155,27 @@ class LegacyICUTokenizer:
LOG.warning("Precomputing word tokens")
# get partial words and their frequencies
words = self._count_partial_terms(conn)
# copy them back into the word table
with CopyBuffer() as copystr:
for term, cnt in words.items():
copystr.add('w', term, json.dumps({'count': cnt}))
with conn.cursor() as cur:
copystr.copy_out(cur, 'word',
columns=['type', 'word_token', 'info'])
cur.execute("""UPDATE word SET word_id = nextval('seq_word')
WHERE word_id is null and type = 'w'""")
conn.commit()
def _count_partial_terms(self, conn):
""" Count the partial terms from the names in the place table.
"""
words = Counter()
name_proc = ICUNameProcessor(self.naming_rules)
with conn.cursor(name="words") as cur:
cur.execute(""" SELECT v, count(*) FROM
(SELECT svals(name) as v FROM place)x
@ -171,18 +189,7 @@ class LegacyICUTokenizer:
for term in terms:
words[term] += cnt
# copy them back into the word table
with CopyBuffer() as copystr:
for k, v in words.items():
copystr.add('w', k, json.dumps({'count': v}))
with conn.cursor() as cur:
copystr.copy_out(cur, 'word',
columns=['type', 'word_token', 'info'])
cur.execute("""UPDATE word SET word_id = nextval('seq_word')
WHERE word_id is null and type = 'w'""")
conn.commit()
return words
class LegacyICUNameAnalyzer: