1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-12-25 01:31:47 +03:00

refactor(server): TCP server for better structure

This commit is contained in:
louistiti 2022-03-06 14:26:36 +08:00
parent 1e988350b4
commit 0f7a6f6bed
No known key found for this signature in database
GPG Key ID: 0A1C3B043E70C77D
5 changed files with 140 additions and 101 deletions

View File

@ -1,100 +0,0 @@
""" TCP server to allow IPC with third-party such as spaCy """
import socket
import select
import os
import json
from sys import argv
from os.path import join, dirname
from dotenv import load_dotenv
import spacy
dotenv_path = join(dirname(__file__), '../../.env')
load_dotenv(dotenv_path)
lang = argv[1] or 'en'
model_mapping = {
'en': {
'model': 'en_core_web_trf',
'disable': ['tagger', 'parser', 'attribute_ruler', 'lemmatizer'],
'entity_mapping': {
'PERSON': 'person',
'GPE': 'location',
'ORG': 'organization'
}
},
'fr': {
'model': 'fr_core_news_md',
'disable': ['tok2vec', 'morphologizer', 'parser', 'senter', 'attribute_ruler', 'lemmatizer'],
'entity_mapping': {
'PER': 'person',
'LOC': 'location',
'ORG': 'organization'
}
}
}
nlp = spacy.load(model_mapping[lang]['model'], disable=model_mapping[lang]['disable'])
ws_server_host = os.environ.get('LEON_PY_WS_SERVER_HOST', '0.0.0.0')
ws_server_port = os.environ.get('LEON_PY_WS_SERVER_PORT', 1342)
tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Make sure to establish TCP connection by reusing the address so it does not conflict with port already in use
tcp_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
tcp_socket.bind((ws_server_host, int(ws_server_port)))
tcp_socket.listen()
def extract_spacy_entities(utterance):
doc = nlp(utterance)
entities = []
for ent in doc.ents:
if ent.label_ in model_mapping[lang]['entity_mapping']:
entity = model_mapping[lang]['entity_mapping'][ent.label_]
entities.append({
'start': ent.start_char,
'end': ent.end_char,
'len': len(ent.text),
'sourceText': ent.text,
'utteranceText': ent.text,
'entity': entity,
'resolution': {
'value': ent.text
}
})
return entities
while True:
print('Waiting for connection...')
conn, addr = tcp_socket.accept()
try:
print(f'Client connected: {addr}')
while True:
data = conn.recv(1024)
if not data:
break
data_dict = json.loads(data)
if data_dict['topic'] == 'get-spacy-entities':
utterance = data_dict['data']
entities = extract_spacy_entities(utterance)
res = {
'topic': 'spacy-entities-received',
'data': {
'spacyEntities': entities,
'lang': lang
}
}
conn.sendall(json.dumps(res).encode('utf-8'))
finally:
print(f'Client disconnected: {addr}')
conn.close()

View File

@ -0,0 +1,60 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import socket
import json
import lib.nlp as nlp
class TCPServer:
def __init__(self, host, port):
self.host = host
self.port = port
self.tcp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.conn = None
self.addr = None
def init(self):
# Make sure to establish TCP connection by reusing the address so it does not conflict with port already in use
self.tcp_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
self.tcp_socket.bind((self.host, int(self.port)))
self.tcp_socket.listen()
while True:
print('Waiting for connection...')
# Our TCP server only needs to support one connection
self.conn, self.addr = self.tcp_socket.accept()
try:
print(f'Client connected: {self.addr}')
while True:
socket_data = self.conn.recv(1024)
if not socket_data:
break
data_dict = json.loads(socket_data)
# Verify the received topic can execute the method
method = data_dict['topic'].lower().replace('-', '_')
if hasattr(self.__class__, method) and callable(getattr(self.__class__, method)):
data = data_dict['data']
method = getattr(self, method)
res = method(data)
self.conn.sendall(json.dumps(res).encode('utf-8'))
finally:
print(f'Client disconnected: {self.addr}')
self.conn.close()
def get_spacy_entities(self, utterance):
entities = nlp.extract_spacy_entities(utterance)
return {
'topic': 'spacy-entities-received',
'data': {
'spacyEntities': entities
}
}

View File

@ -0,0 +1,59 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from sys import argv
import spacy
lang = argv[1] or 'en'
spacy_nlp = None
spacy_model_mapping = {
'en': {
'model': 'en_core_web_trf',
'disable': ['tagger', 'parser', 'attribute_ruler', 'lemmatizer'],
'entity_mapping': {
'PERSON': 'person',
'GPE': 'location',
'ORG': 'organization'
}
},
'fr': {
'model': 'fr_core_news_md',
'disable': ['tok2vec', 'morphologizer', 'parser', 'senter', 'attribute_ruler', 'lemmatizer'],
'entity_mapping': {
'PER': 'person',
'LOC': 'location',
'ORG': 'organization'
}
}
}
def load_spacy_model():
global spacy_nlp
model = spacy_model_mapping[lang]['model']
disable = spacy_model_mapping[lang]['disable']
print(f'Loading {model} spaCy model...')
spacy_nlp = spacy.load(model, disable=disable)
print('spaCy model loaded')
def extract_spacy_entities(utterance):
doc = spacy_nlp(utterance)
entities = []
for ent in doc.ents:
if ent.label_ in spacy_model_mapping[lang]['entity_mapping']:
entity = spacy_model_mapping[lang]['entity_mapping'][ent.label_]
entities.append({
'start': ent.start_char,
'end': ent.end_char,
'len': len(ent.text),
'sourceText': ent.text,
'utteranceText': ent.text,
'entity': entity,
'resolution': {
'value': ent.text
}
})
return entities

View File

@ -0,0 +1,20 @@
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import os
from os.path import join, dirname
from dotenv import load_dotenv
import lib.nlp as nlp
from lib.TCPServer import TCPServer
dotenv_path = join(dirname(__file__), '../../../.env')
load_dotenv(dotenv_path)
nlp.load_spacy_model()
tcp_server_host = os.environ.get('LEON_PY_WS_SERVER_HOST', '0.0.0.0')
tcp_server_port = os.environ.get('LEON_PY_WS_SERVER_PORT', 1342)
tcp_server = TCPServer(tcp_server_host, tcp_server_port)
tcp_server.init()

View File

@ -8,7 +8,7 @@ import server from '@/core/http-server/server'
(async () => {
dotenv.config()
spawn(`pipenv run python bridges/python/tcp-server.py ${lang.getShortCode(process.env.LEON_LANG)}`, { shell: true, detached: true })
spawn(`pipenv run python bridges/python/tcp_server/main.py ${lang.getShortCode(process.env.LEON_LANG)}`, { shell: true, detached: true })
global.tcpClient = new TcpClient(
process.env.LEON_PY_WS_SERVER_HOST,