1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-12-26 02:04:08 +03:00

refactor(server): adapt to new NER

This commit is contained in:
louistiti 2021-03-24 20:26:34 +08:00
parent 37165ef658
commit da592f4413
No known key found for this signature in database
GPG Key ID: 7ECA3DD523793FE6
3 changed files with 28 additions and 19 deletions

View File

@ -52,6 +52,8 @@
"@ffprobe-installer/ffprobe": "^1.1.0", "@ffprobe-installer/ffprobe": "^1.1.0",
"@google-cloud/speech": "^4.2.0", "@google-cloud/speech": "^4.2.0",
"@google-cloud/text-to-speech": "^3.1.3", "@google-cloud/text-to-speech": "^3.1.3",
"@nlpjs/core": "^4.21.1",
"@nlpjs/ner": "^4.21.1",
"archiver": "^5.2.0", "archiver": "^5.2.0",
"async": "^3.2.0", "async": "^3.2.0",
"body-parser": "^1.19.0", "body-parser": "^1.19.0",

View File

@ -1,4 +1,9 @@
import { NerManager } from 'node-nlp' import { containerBootstrap } from '@nlpjs/core'
import {
ExtractorRegex,
ExtractorTrim,
Ner as NerManager
} from '@nlpjs/ner'
import fs from 'fs' import fs from 'fs'
import log from '@/helpers/log' import log from '@/helpers/log'
@ -6,12 +11,16 @@ import string from '@/helpers/string'
class Ner { class Ner {
constructor () { constructor () {
this.nerManager = { } this.ner = { }
this.container = containerBootstrap()
this.supportedEntityTypes = [ this.supportedEntityTypes = [
'regex', 'regex',
'trim' 'trim'
] ]
this.container.use(ExtractorRegex)
this.container.use(ExtractorTrim)
log.title('NER') log.title('NER')
log.success('New instance') log.success('New instance')
} }
@ -29,9 +38,9 @@ class Ner {
log.info('Searching for entities...') log.info('Searching for entities...')
// Need to instanciate on the fly to flush entities // Need to instanciate on the fly to flush entities
this.nerManager = new NerManager() this.ner = new NerManager({ container: this.container })
const { entities, classification } = obj const { entities: builtInEntities, classification } = obj
// Remove end-punctuation and add an end-whitespace // Remove end-punctuation and add an end-whitespace
const query = `${string.removeEndPunctuation(obj.query)} ` const query = `${string.removeEndPunctuation(obj.query)} `
const expressionsObj = JSON.parse(fs.readFileSync(expressionsFilePath, 'utf8')) const expressionsObj = JSON.parse(fs.readFileSync(expressionsFilePath, 'utf8'))
@ -64,8 +73,8 @@ class Ner {
// Merge built-in and named entities // Merge built-in and named entities
const nerEntities = ( const nerEntities = (
await this.nerManager.findBuiltinEntities(query, lang) await this.ner.process({ locale: lang, text: query })
).concat(await this.nerManager.findNamedEntities(query, lang)) ).entities.concat(builtInEntities)
// Trim whitespace at the beginning and the end of the entity value // Trim whitespace at the beginning and the end of the entity value
nerEntities.map((e) => { nerEntities.map((e) => {
@ -79,13 +88,13 @@ class Ner {
resolve(nerEntities) resolve(nerEntities)
} else { } else {
if (entities.length > 0) { if (builtInEntities.length > 0) {
Ner.logExtraction(entities) Ner.logExtraction(builtInEntities)
} else { } else {
log.info('No entity found') log.info('No entity found')
} }
resolve(entities) resolve(builtInEntities)
} }
}) })
} }
@ -95,19 +104,17 @@ class Ner {
*/ */
injectTrimEntity (lang, entity) { injectTrimEntity (lang, entity) {
return new Promise((resolve) => { return new Promise((resolve) => {
const e = this.nerManager.addNamedEntity(entity.name, entity.type)
for (let j = 0; j < entity.conditions.length; j += 1) { for (let j = 0; j < entity.conditions.length; j += 1) {
const condition = entity.conditions[j] const condition = entity.conditions[j]
const conditionMethod = `add${string.snakeToPascalCase(condition.type)}Condition` const conditionMethod = `add${string.snakeToPascalCase(condition.type)}Condition`
if (condition.type === 'between') { if (condition.type === 'between') {
// e.g. list.addBetweenCondition('en', 'create a', 'list') // e.g. list.addBetweenCondition('en', 'list', 'create a', 'list')
e[conditionMethod](lang, condition.from, condition.to) this.ner[conditionMethod](lang, entity.name, condition.from, condition.to)
} else if (condition.type.indexOf('after') !== -1) { } else if (condition.type.indexOf('after') !== -1) {
e[conditionMethod](lang, condition.from) this.ner[conditionMethod](lang, entity.name, condition.from)
} else if (condition.type.indexOf('before') !== -1) { } else if (condition.type.indexOf('before') !== -1) {
e[conditionMethod](lang, condition.to) this.ner[conditionMethod](lang, entity.name, condition.to)
} }
} }
@ -120,9 +127,7 @@ class Ner {
*/ */
injectRegexEntity (lang, entity) { injectRegexEntity (lang, entity) {
return new Promise((resolve) => { return new Promise((resolve) => {
const e = this.nerManager.addNamedEntity(entity.name, entity.type) this.ner.addRegexRule(lang, entity.name, new RegExp(entity.regex, 'g'))
e.addRegex(lang, new RegExp(entity.regex, 'g'))
resolve() resolve()
}) })

View File

@ -33,7 +33,9 @@ class Nlu {
try { try {
const data = fs.readFileSync(classifierFile, 'utf8') const data = fs.readFileSync(classifierFile, 'utf8')
const nlpManager = new NlpManager() const nlpManager = new NlpManager({
forceNER: true
})
nlpManager.import(data) nlpManager.import(data)
this.classifier = nlpManager this.classifier = nlpManager