1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-12-25 09:44:22 +03:00

refactor(server): adapt to new NER

This commit is contained in:
louistiti 2021-03-24 20:26:34 +08:00
parent 37165ef658
commit da592f4413
No known key found for this signature in database
GPG Key ID: 7ECA3DD523793FE6
3 changed files with 28 additions and 19 deletions

View File

@ -52,6 +52,8 @@
"@ffprobe-installer/ffprobe": "^1.1.0",
"@google-cloud/speech": "^4.2.0",
"@google-cloud/text-to-speech": "^3.1.3",
"@nlpjs/core": "^4.21.1",
"@nlpjs/ner": "^4.21.1",
"archiver": "^5.2.0",
"async": "^3.2.0",
"body-parser": "^1.19.0",

View File

@ -1,4 +1,9 @@
import { NerManager } from 'node-nlp'
import { containerBootstrap } from '@nlpjs/core'
import {
ExtractorRegex,
ExtractorTrim,
Ner as NerManager
} from '@nlpjs/ner'
import fs from 'fs'
import log from '@/helpers/log'
@ -6,12 +11,16 @@ import string from '@/helpers/string'
class Ner {
constructor () {
this.nerManager = { }
this.ner = { }
this.container = containerBootstrap()
this.supportedEntityTypes = [
'regex',
'trim'
]
this.container.use(ExtractorRegex)
this.container.use(ExtractorTrim)
log.title('NER')
log.success('New instance')
}
@ -29,9 +38,9 @@ class Ner {
log.info('Searching for entities...')
// Need to instanciate on the fly to flush entities
this.nerManager = new NerManager()
this.ner = new NerManager({ container: this.container })
const { entities, classification } = obj
const { entities: builtInEntities, classification } = obj
// Remove end-punctuation and add an end-whitespace
const query = `${string.removeEndPunctuation(obj.query)} `
const expressionsObj = JSON.parse(fs.readFileSync(expressionsFilePath, 'utf8'))
@ -64,8 +73,8 @@ class Ner {
// Merge built-in and named entities
const nerEntities = (
await this.nerManager.findBuiltinEntities(query, lang)
).concat(await this.nerManager.findNamedEntities(query, lang))
await this.ner.process({ locale: lang, text: query })
).entities.concat(builtInEntities)
// Trim whitespace at the beginning and the end of the entity value
nerEntities.map((e) => {
@ -79,13 +88,13 @@ class Ner {
resolve(nerEntities)
} else {
if (entities.length > 0) {
Ner.logExtraction(entities)
if (builtInEntities.length > 0) {
Ner.logExtraction(builtInEntities)
} else {
log.info('No entity found')
}
resolve(entities)
resolve(builtInEntities)
}
})
}
@ -95,19 +104,17 @@ class Ner {
*/
injectTrimEntity (lang, entity) {
return new Promise((resolve) => {
const e = this.nerManager.addNamedEntity(entity.name, entity.type)
for (let j = 0; j < entity.conditions.length; j += 1) {
const condition = entity.conditions[j]
const conditionMethod = `add${string.snakeToPascalCase(condition.type)}Condition`
if (condition.type === 'between') {
// e.g. list.addBetweenCondition('en', 'create a', 'list')
e[conditionMethod](lang, condition.from, condition.to)
// e.g. list.addBetweenCondition('en', 'list', 'create a', 'list')
this.ner[conditionMethod](lang, entity.name, condition.from, condition.to)
} else if (condition.type.indexOf('after') !== -1) {
e[conditionMethod](lang, condition.from)
this.ner[conditionMethod](lang, entity.name, condition.from)
} else if (condition.type.indexOf('before') !== -1) {
e[conditionMethod](lang, condition.to)
this.ner[conditionMethod](lang, entity.name, condition.to)
}
}
@ -120,9 +127,7 @@ class Ner {
*/
injectRegexEntity (lang, entity) {
return new Promise((resolve) => {
const e = this.nerManager.addNamedEntity(entity.name, entity.type)
e.addRegex(lang, new RegExp(entity.regex, 'g'))
this.ner.addRegexRule(lang, entity.name, new RegExp(entity.regex, 'g'))
resolve()
})

View File

@ -33,7 +33,9 @@ class Nlu {
try {
const data = fs.readFileSync(classifierFile, 'utf8')
const nlpManager = new NlpManager()
const nlpManager = new NlpManager({
forceNER: true
})
nlpManager.import(data)
this.classifier = nlpManager