mirror of
https://github.com/leon-ai/leon.git
synced 2024-12-25 09:44:22 +03:00
feat(server): differenciate cities from countries for location entities
This commit is contained in:
parent
1e6835a9e5
commit
bf9bf231f7
@ -13,5 +13,6 @@ setuptools = "==60.9.3"
|
||||
wheel = "==0.37.1"
|
||||
torch = "==1.9.0"
|
||||
python-dotenv = "==0.19.2"
|
||||
geonamescache = "==1.3.0"
|
||||
|
||||
[dev-packages]
|
||||
|
55
bridges/python/Pipfile.lock
generated
55
bridges/python/Pipfile.lock
generated
@ -1,7 +1,7 @@
|
||||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "59161bbf238c95ee5a94821b21c201075600d2b4f47845b0a6a31ad1c45c3344"
|
||||
"sha256": "48beede01d5281ab2105bb353ecc23485cf31205d0eef790a652b3b50aa09659"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {},
|
||||
@ -95,6 +95,14 @@
|
||||
],
|
||||
"version": "==2.0.6"
|
||||
},
|
||||
"geonamescache": {
|
||||
"hashes": [
|
||||
"sha256:5a1fc774a3282d324952b87eb5c2c13684055e68e829526909c76c2fffb84f8f",
|
||||
"sha256:efa1dd28a87632303c5d882cc52718f04cf28e85f1cc7d6afdf8a6ba7c5ab1dc"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==1.3.0"
|
||||
},
|
||||
"idna": {
|
||||
"hashes": [
|
||||
"sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407",
|
||||
@ -187,28 +195,29 @@
|
||||
},
|
||||
"numpy": {
|
||||
"hashes": [
|
||||
"sha256:03ae5850619abb34a879d5f2d4bb4dcd025d6d8fb72f5e461dae84edccfe129f",
|
||||
"sha256:076aee5a3763d41da6bef9565fdf3cb987606f567cd8b104aded2b38b7b47abf",
|
||||
"sha256:0b536b6840e84c1c6a410f3a5aa727821e6108f3454d81a5cd5900999ef04f89",
|
||||
"sha256:15efb7b93806d438e3bc590ca8ef2f953b0ce4f86f337ef4559d31ec6cf9d7dd",
|
||||
"sha256:168259b1b184aa83a514f307352c25c56af111c269ffc109d9704e81f72e764b",
|
||||
"sha256:2638389562bda1635b564490d76713695ff497242a83d9b684d27bb4a6cc9d7a",
|
||||
"sha256:3556c5550de40027d3121ebbb170f61bbe19eb639c7ad0c7b482cd9b560cd23b",
|
||||
"sha256:4a176959b6e7e00b5a0d6f549a479f869829bfd8150282c590deee6d099bbb6e",
|
||||
"sha256:515a8b6edbb904594685da6e176ac9fbea8f73a5ebae947281de6613e27f1956",
|
||||
"sha256:55535c7c2f61e2b2fc817c5cbe1af7cb907c7f011e46ae0a52caa4be1f19afe2",
|
||||
"sha256:59153979d60f5bfe9e4c00e401e24dfe0469ef8da6d68247439d3278f30a180f",
|
||||
"sha256:60cb8e5933193a3cc2912ee29ca331e9c15b2da034f76159b7abc520b3d1233a",
|
||||
"sha256:6767ad399e9327bfdbaa40871be4254d1995f4a3ca3806127f10cec778bd9896",
|
||||
"sha256:76a4f9bce0278becc2da7da3b8ef854bed41a991f4226911a24a9711baad672c",
|
||||
"sha256:8cf33634b60c9cef346663a222d9841d3bbbc0a2f00221d6bcfd0d993d5543f6",
|
||||
"sha256:94dd11d9f13ea1be17bac39c1942f527cbf7065f94953cf62dfe805653da2f8f",
|
||||
"sha256:aafa46b5a39a27aca566198d3312fb3bde95ce9677085efd02c86f7ef6be4ec7",
|
||||
"sha256:badca914580eb46385e7f7e4e426fea6de0a37b9e06bec252e481ae7ec287082",
|
||||
"sha256:d76a26c5118c4d96e264acc9e3242d72e1a2b92e739807b3b69d8d47684b6677"
|
||||
"sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676",
|
||||
"sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4",
|
||||
"sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce",
|
||||
"sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123",
|
||||
"sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1",
|
||||
"sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e",
|
||||
"sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5",
|
||||
"sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d",
|
||||
"sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a",
|
||||
"sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab",
|
||||
"sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75",
|
||||
"sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168",
|
||||
"sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4",
|
||||
"sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f",
|
||||
"sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18",
|
||||
"sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62",
|
||||
"sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe",
|
||||
"sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430",
|
||||
"sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802",
|
||||
"sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa"
|
||||
],
|
||||
"markers": "python_version >= '3.8'",
|
||||
"version": "==1.22.2"
|
||||
"version": "==1.22.3"
|
||||
},
|
||||
"packaging": {
|
||||
"hashes": [
|
||||
@ -320,7 +329,7 @@
|
||||
"sha256:71d14489da58b60ce12fc3ecb823facc59a8b23cd1b58edb97175640350d3a62",
|
||||
"sha256:75abf758717a92a8f53aa96953f0c245c8cedf8e1e4184903db3659b419d4c17"
|
||||
],
|
||||
"markers": "python_version >= '3.6' and python_version < '4.0'",
|
||||
"markers": "python_version >= '3.6' and python_version < '4'",
|
||||
"version": "==5.2.1"
|
||||
},
|
||||
"soupsieve": {
|
||||
@ -474,7 +483,7 @@
|
||||
"sha256:2393a695cd12afedd0dcb26fe5d50d0cf248e5a66f75dbd89a3d4eb333a61af4",
|
||||
"sha256:a637e5fae88995b256e3409dc4d52c2e2e0ba32c42a6365fee8bbd2238de3cfb"
|
||||
],
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4.0'",
|
||||
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3' and python_version < '4'",
|
||||
"version": "==1.24.3"
|
||||
},
|
||||
"wasabi": {
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
from sys import argv
|
||||
import spacy
|
||||
import geonamescache
|
||||
|
||||
lang = argv[1] or 'en'
|
||||
spacy_nlp = None
|
||||
@ -27,6 +28,28 @@ spacy_model_mapping = {
|
||||
}
|
||||
}
|
||||
|
||||
gc = geonamescache.GeonamesCache()
|
||||
countries = gc.get_countries()
|
||||
cities = gc.get_cities()
|
||||
|
||||
def gen_dict_extract(var, key):
|
||||
if isinstance(var, dict):
|
||||
for k, v in var.items():
|
||||
if k == key:
|
||||
yield v
|
||||
if isinstance(v, (dict, list)):
|
||||
yield from gen_dict_extract(v, key)
|
||||
elif isinstance(var, list):
|
||||
for d in var:
|
||||
yield from gen_dict_extract(d, key)
|
||||
|
||||
countries = [*gen_dict_extract(countries, 'name')]
|
||||
cities = [*gen_dict_extract(cities, 'name')]
|
||||
|
||||
"""
|
||||
Functions called from TCPServer class
|
||||
"""
|
||||
|
||||
def load_spacy_model():
|
||||
global spacy_nlp
|
||||
|
||||
@ -44,6 +67,12 @@ def extract_spacy_entities(utterance):
|
||||
for ent in doc.ents:
|
||||
if ent.label_ in spacy_model_mapping[lang]['entity_mapping']:
|
||||
entity = spacy_model_mapping[lang]['entity_mapping'][ent.label_]
|
||||
if entity == 'location':
|
||||
if ent.text.casefold() in (country.casefold() for country in countries):
|
||||
entity += ':country'
|
||||
elif ent.text.casefold() in (city.casefold() for city in cities):
|
||||
entity += ':city'
|
||||
|
||||
entities.append({
|
||||
'start': ent.start_char,
|
||||
'end': ent.end_char,
|
||||
|
@ -42,7 +42,7 @@
|
||||
"slots": [
|
||||
{
|
||||
"name": "birth_date",
|
||||
"type": "date"
|
||||
"entity": "date"
|
||||
}
|
||||
],
|
||||
"answers": {
|
||||
|
@ -34,7 +34,7 @@
|
||||
"slots": [
|
||||
{
|
||||
"name": "color",
|
||||
"type": "color",
|
||||
"entity": "color",
|
||||
"answers": [
|
||||
"What color do you want to know about?"
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user