1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-12-25 09:44:22 +03:00

Merge branch 'nlp-ner' into develop

This commit is contained in:
Louistiti 2019-05-18 21:28:50 +08:00
commit 82a910b59b
12 changed files with 473 additions and 21 deletions

View File

@ -4,6 +4,34 @@
"expressions": [ "expressions": [
"Create the list", "Create the list",
"Create a list" "Create a list"
],
"entities": [
{
"type": "trim",
"name": "list",
"conditions": [
{
"type": "between",
"from": "the",
"to": "list"
},
{
"type": "between",
"from": "a",
"to": "list"
},
{
"type": "between",
"from": "an",
"to": "list"
},
{
"type": "between",
"from": "my",
"to": "list"
}
]
}
] ]
}, },
"rename_list": { "rename_list": {

View File

@ -2,8 +2,8 @@
"todolist": { "todolist": {
"create_list": { "create_list": {
"expressions": [ "expressions": [
"Créé la liste", "Crée la liste",
"Créé une liste" "Crée une liste"
] ]
}, },
"rename_list": { "rename_list": {

129
server/src/core/ner.js Normal file
View File

@ -0,0 +1,129 @@
'use strict'
import { NerManager } from 'node-nlp'
import fs from 'fs'
import log from '@/helpers/log'
import string from '@/helpers/string'
class Ner {
constructor () {
this.nerManager = { }
this.supportedEntityTypes = [
'regex',
'trim'
]
log.title('NER')
log.success('New instance')
}
static logExtraction (entities) {
entities.forEach(ent => log.success(`{ value: ${ent.sourceText}, entity: ${ent.entity} }`))
}
/**
* Grab action entities and match them with the query
*/
extractActionEntities (lang, expressionsFilePath, obj) {
return new Promise(async (resolve, reject) => {
log.title('NER')
log.info('Searching for entities...')
// Need to instanciate on the fly to flush entities
this.nerManager = new NerManager()
const { entities, classification } = obj
// Remove end-punctuation and add an end-whitespace
const query = `${string.removeEndPunctuation(obj.query)} `
const expressionsObj = JSON.parse(fs.readFileSync(expressionsFilePath, 'utf8'))
const { module, action } = classification
const promises = []
// Verify the action has entities
if (typeof expressionsObj[module][action].entities !== 'undefined') {
const actionEntities = expressionsObj[module][action].entities
/**
* Browse action entities
* Dynamic injection of the action entities depending of the entity type
*/
for (let i = 0; i < actionEntities.length; i += 1) {
const entity = actionEntities[i]
if (!this.supportedEntityTypes.includes(entity.type)) {
reject({ type: 'warning', obj: new Error(`"${entity.type}" action entity type not supported`), code: 'random_ner_type_not_supported', data: { '%entity_type%': entity.type } })
} else if (entity.type === 'regex') {
promises.push(this.injectRegexEntity(lang, entity))
} else if (entity.type === 'trim') {
promises.push(this.injectTrimEntity(lang, entity))
}
}
await Promise.all(promises)
const nerEntities = await this.nerManager.findEntities(query, lang)
// Trim whitespace at the beginning and the end of the entity value
nerEntities.map((e) => {
e.sourceText = e.sourceText.trim()
e.utteranceText = e.utteranceText.trim()
return e
})
Ner.logExtraction(nerEntities)
resolve(nerEntities)
} else {
if (entities.length > 0) {
Ner.logExtraction(entities)
} else {
log.info('No entity found')
}
resolve(entities)
}
})
}
/**
* Inject trim type entities
*/
injectTrimEntity (lang, entity) {
return new Promise((resolve) => {
const e = this.nerManager.addNamedEntity(entity.name, entity.type)
for (let j = 0; j < entity.conditions.length; j += 1) {
const condition = entity.conditions[j]
const conditionMethod = `add${string.snakeToPascalCase(condition.type)}Condition`
if (condition.type === 'between') {
// e.g. list.addBetweenCondition('en', 'create a', 'list')
e[conditionMethod](lang, condition.from, condition.to)
} else if (condition.type.indexOf('after') !== -1) {
e[conditionMethod](lang, condition.from)
} else if (condition.type.indexOf('before') !== -1) {
e[conditionMethod](lang, condition.to)
}
}
resolve()
})
}
/**
* Inject regex type entities
*/
injectRegexEntity (lang, entity) {
return new Promise((resolve) => {
const e = this.nerManager.addNamedEntity(entity.name, entity.type)
e.addRegex(lang, new RegExp(entity.regex, 'g'))
resolve()
})
}
}
export default Ner

View File

@ -3,9 +3,11 @@
import { NlpManager } from 'node-nlp' import { NlpManager } from 'node-nlp'
import request from 'superagent' import request from 'superagent'
import fs from 'fs' import fs from 'fs'
import path from 'path'
import { langs } from '@@/core/langs.json' import { langs } from '@@/core/langs.json'
import { version } from '@@/package.json' import { version } from '@@/package.json'
import Ner from '@/core/ner'
import log from '@/helpers/log' import log from '@/helpers/log'
import string from '@/helpers/string' import string from '@/helpers/string'
@ -14,6 +16,7 @@ class Nlu {
this.brain = brain this.brain = brain
this.request = request this.request = request
this.classifier = { } this.classifier = { }
this.ner = new Ner()
log.title('NLU') log.title('NLU')
log.success('New instance') log.success('New instance')
@ -32,10 +35,10 @@ class Nlu {
try { try {
const data = fs.readFileSync(classifierFile, 'utf8') const data = fs.readFileSync(classifierFile, 'utf8')
const manager = new NlpManager() const nlpManager = new NlpManager()
manager.import(data) nlpManager.import(data)
this.classifier = manager this.classifier = nlpManager
log.success('Classifier loaded') log.success('Classifier loaded')
resolve() resolve()
@ -58,7 +61,7 @@ class Nlu {
log.title('NLU') log.title('NLU')
log.info('Processing...') log.info('Processing...')
query = string.removeAccents(string.ucfirst(query)) query = string.ucfirst(query)
if (Object.keys(this.classifier).length === 0) { if (Object.keys(this.classifier).length === 0) {
this.brain.talk(`${this.brain.wernicke('random_errors')}!`) this.brain.talk(`${this.brain.wernicke('random_errors')}!`)
@ -69,7 +72,8 @@ class Nlu {
return false return false
} }
const result = await this.classifier.process(langs[process.env.LEON_LANG].short, query) const lang = langs[process.env.LEON_LANG].short
const result = await this.classifier.process(lang, query)
const { domain, intent, score, entities } = result const { domain, intent, score, entities } = result
const [moduleName, actionName] = intent.split('.') const [moduleName, actionName] = intent.split('.')
let obj = { let obj = {
@ -91,7 +95,7 @@ class Nlu {
.send({ .send({
version, version,
query, query,
lang: langs[process.env.LEON_LANG].short, lang,
classification: obj.classification classification: obj.classification
}) })
.then(() => { /* */ }) .then(() => { /* */ })
@ -118,10 +122,22 @@ class Nlu {
log.success('Query found') log.success('Query found')
try { try {
await this.brain.execute(obj) obj.entities = await this.ner.extractActionEntities(
} catch (e) { lang,
/* istanbul ignore next */ path.join(__dirname, '../../../packages', obj.classification.package, `data/expressions/${lang}.json`),
obj
)
} catch (e) /* istanbul ignore next */ {
log[e.type](e.obj.message) log[e.type](e.obj.message)
this.brain.talk(`${this.brain.wernicke(e.code, '', e.data)}!`)
}
try {
// Inject action entities with the others if there is
await this.brain.execute(obj)
} catch (e) /* istanbul ignore next */ {
log[e.type](e.obj.message)
this.brain.socket.emit('is-typing', false)
} }
return true return true
@ -146,6 +162,7 @@ class Nlu {
} }
if (JSON.stringify(tmpWords) === JSON.stringify(fallbacks[i].words)) { if (JSON.stringify(tmpWords) === JSON.stringify(fallbacks[i].words)) {
obj.entities = []
obj.classification.package = fallbacks[i].package obj.classification.package = fallbacks[i].package
obj.classification.module = fallbacks[i].module obj.classification.module = fallbacks[i].module
obj.classification.action = fallbacks[i].action obj.classification.action = fallbacks[i].action

View File

@ -20,10 +20,10 @@
"Sorry, I cannot do that because I'm broken" "Sorry, I cannot do that because I'm broken"
], ],
"random_package_module_errors": [ "random_package_module_errors": [
"Sorry, it seems I have a problem with the %module_name% module of my %package_name% package", "Sorry, it seems I have a problem with the \"%module_name%\" module of my \"%package_name%\" package",
"Sorry, I have an issue with the %module_name% module of my %package_name% package", "Sorry, I have an issue with the \"%module_name%\" module of my \"%package_name%\" package",
"Sorry, I've got an error with the %module_name% module of my %package_name% package", "Sorry, I've got an error with the \"%module_name%\" module of my \"%package_name%\" package",
"Sorry, the %module_name% module of my %package_name% package is broken" "Sorry, the \"%module_name%\" module of my \"%package_name%\" package is broken"
], ],
"random_unknown_queries": [ "random_unknown_queries": [
"Sorry, I still don't know this, but you can help me to understand by <a href=\"https://github.com/leon-ai/leon/blob/develop/.github/CONTRIBUTING.md\" target=\"_blank\">creating a pull request</a>", "Sorry, I still don't know this, but you can help me to understand by <a href=\"https://github.com/leon-ai/leon/blob/develop/.github/CONTRIBUTING.md\" target=\"_blank\">creating a pull request</a>",
@ -43,6 +43,10 @@
"random_not_able": [ "random_not_able": [
"Sorry, I'm not able to answer. I understand what you said, but please repeat in another way", "Sorry, I'm not able to answer. I understand what you said, but please repeat in another way",
"Sorry, I have a blackout, I cannot answer that. I understand what you said, but try to repeat in another way" "Sorry, I have a blackout, I cannot answer that. I understand what you said, but try to repeat in another way"
],
"random_ner_type_not_supported": [
"Sorry, the \"%entity_type%\" action entity type is not supported by my NER. Please <a href=\"https://github.com/leon-ai/leon/issues\" target=\"_blank\">open an issue</a> to report, that will be much appreciated",
"Sorry, my NER does not recognize the \"%entity_type%\" action entity type. Please <a href=\"https://github.com/leon-ai/leon/issues\" target=\"_blank\">open an issue</a> to report, it'll be highly appreciated"
] ]
} }
} }

View File

@ -20,10 +20,10 @@
"Désolé, je ne peux aboutir à votre demande parce que je suis cassé" "Désolé, je ne peux aboutir à votre demande parce que je suis cassé"
], ],
"random_package_module_errors": [ "random_package_module_errors": [
"Désolé, il semblerait y avoir un problème avec le module %module_name% de mon paquet %package_name%", "Désolé, il semblerait y avoir un problème avec le module \"%module_name%\" de mon paquet \"%package_name%\"",
"Désolé, j'ai un problème avec le module %module_name% de mon paquet %package_name%", "Désolé, j'ai un problème avec le module \"%module_name%\" de mon paquet \"%package_name%\"",
"Désolé, j'ai une erreur avec le module %module_name% de mon paquet %package_name%", "Désolé, j'ai une erreur avec le module \"%module_name%\" de mon paquet \"%package_name%\"",
"Désolé, le module %module_name% de mon paquet %package_name% est cassé" "Désolé, le module \"%module_name%\" de mon paquet \"%package_name%\" est cassé"
], ],
"random_unknown_queries": [ "random_unknown_queries": [
"Désolé, je ne connais pas encore ça, mais vous pouvez m'aider à comprendre en <a href=\"https://github.com/leon-ai/leon/blob/develop/.github/CONTRIBUTING.md\" target=\"_blank\">créant une pull request</a>", "Désolé, je ne connais pas encore ça, mais vous pouvez m'aider à comprendre en <a href=\"https://github.com/leon-ai/leon/blob/develop/.github/CONTRIBUTING.md\" target=\"_blank\">créant une pull request</a>",
@ -43,6 +43,10 @@
"random_not_able": [ "random_not_able": [
"Désolé, je ne suis pas capable de répondre. J'ai compris ce que vous avez dit, mais je vous prie de répéter d'une autre façon", "Désolé, je ne suis pas capable de répondre. J'ai compris ce que vous avez dit, mais je vous prie de répéter d'une autre façon",
"Désolé, j'ai un trou de mémoire, je ne peux pas répondre à ça. J'ai compris ce que vous disiez, mais essayez voir d'une autre façon s'il vous plaît" "Désolé, j'ai un trou de mémoire, je ne peux pas répondre à ça. J'ai compris ce que vous disiez, mais essayez voir d'une autre façon s'il vous plaît"
],
"random_ner_type_not_supported": [
"Désolé, le type \"%entity_type%\" d'entité d'action n'est pas supporté par ma reconnaissance d'entité. Merci d'<a href=\"https://github.com/leon-ai/leon/issues\" target=\"_blank\">ouvrir une issue</a> afin de reporter ce cas, ce sera très apprécié",
"Désolé, ma reconnaissance d'entité ne reconnaît pas le type \"%entity_type%\" d'entité d'action. Merci d'<a href=\"https://github.com/leon-ai/leon/issues\" target=\"_blank\">ouvrir une issue</a> afin de reporter ce cas, ce serait très appréciable"
] ]
} }
} }

View File

@ -12,6 +12,11 @@ string.pnr = (s, obj) => s.replace(new RegExp(Object.keys(obj).join('|'), 'gi'),
*/ */
string.ucfirst = s => s.charAt(0).toUpperCase() + s.substr(1) string.ucfirst = s => s.charAt(0).toUpperCase() + s.substr(1)
/**
* Transform snake_case string to PascalCase
*/
string.snakeToPascalCase = s => s.split('_').map(chunk => string.ucfirst(chunk)).join('')
/** /**
* Random string * Random string
*/ */
@ -22,4 +27,17 @@ string.random = n => Math.random().toString(36).slice(-n)
*/ */
string.removeAccents = s => s.normalize('NFD').replace(/[\u0300-\u036f]/g, '') string.removeAccents = s => s.normalize('NFD').replace(/[\u0300-\u036f]/g, '')
/**
* Remove end-punctuation
*/
string.removeEndPunctuation = (s) => {
const punctuations = ['.', ';', ':', '?', '!']
if (punctuations.includes(s[s.length - 1])) {
return s.substr(s, s.length - 1)
}
return s
}
export default string export default string

View File

@ -0,0 +1,57 @@
{
"color": {
"run": {
"expressions": [
"I like that color"
],
"entities": [
{
"type": "regex",
"name": "color",
"regex": "blue|white|red"
}
]
}
},
"unittest": {
"do_not_support_entity": {
"expressions": [
"Just an expression"
],
"entities": [
{
"type": "not_supported_entity"
}
]
}
},
"mockingbird": {
"test": {
"expressions": [
"Whistle as a bird"
],
"entities": [
{
"type": "trim",
"name": "start",
"conditions": [
{
"type": "before",
"to": "bird"
}
]
},
{
"type": "trim",
"name": "animal",
"conditions": [
{
"type": "after_last",
"from": "a"
}
]
}
]
}
}
}

View File

@ -4,6 +4,7 @@ global.paths = {
server: `${__dirname}/../server/src`, server: `${__dirname}/../server/src`,
classifier: `${__dirname}/../server/src/data/expressions/classifier.json`, classifier: `${__dirname}/../server/src/data/expressions/classifier.json`,
broken_classifier: `${__dirname}/assets/broken-classifier.json`, broken_classifier: `${__dirname}/assets/broken-classifier.json`,
expressions: `${__dirname}/assets/expressions.json`,
wave_speech: `${__dirname}/assets/speech-test.wav`, wave_speech: `${__dirname}/assets/speech-test.wav`,
wave_speech_8: `${__dirname}/assets/speech-8kHz-test.wav` wave_speech_8: `${__dirname}/assets/speech-8kHz-test.wav`
} }

View File

@ -0,0 +1,174 @@
'use strict'
import path from 'path'
import Ner from '@/core/ner'
describe('NER', () => {
describe('constructor()', () => {
test('creates a new instance of Ner', () => {
const ner = new Ner()
expect(ner).toBeInstanceOf(Ner)
})
})
describe('logExtraction()', () => {
test('logs entities extractions', async () => {
console.log = jest.fn()
Ner.logExtraction([
{ sourceText: 'shopping', entity: 'list' },
{ sourceText: 'red', entity: 'color' }
])
expect(console.log.mock.calls[0][1]).toBe('{ value: shopping, entity: list }')
expect(console.log.mock.calls[1][1]).toBe('{ value: red, entity: color }')
})
})
describe('extractActionEntities()', () => {
test('finds no entity', async () => {
const ner = new Ner()
const entities = await ner.extractActionEntities(
'en',
path.join(__dirname, '../../../../packages/leon/data/expressions/en.json'),
{
query: 'Give me a random number',
entities: [],
classification: {
package: 'leon',
module: 'randomnumber',
action: 'run',
confidence: 1
}
}
)
expect(entities).toEqual([])
})
test('extracts built-in entities', async () => {
const ner = new Ner()
Ner.logExtraction = jest.fn()
const entities = await ner.extractActionEntities(
'en',
path.join(__dirname, '../../../../packages/trend/data/expressions/en.json'),
{
query: 'Give me the 2 latest GitHub trends',
entities: [{ sourceText: 2, entity: 'number' }],
classification: {
package: 'trend',
module: 'github',
action: 'run',
confidence: 1
}
}
)
expect(Ner.logExtraction).toHaveBeenCalledTimes(1)
expect(entities.length).toBe(1)
})
test('does not support entity type', async () => {
const ner = new Ner()
try {
await ner.extractActionEntities(
'en',
global.paths.expressions,
{
query: 'Just a query',
entities: [],
classification: {
package: 'doesnotmatter',
module: 'unittest',
action: 'do_not_support_entity',
confidence: 1
}
}
)
} catch (e) {
expect(e.code).toBe('random_ner_type_not_supported')
}
})
test('extracts trim custom entities with between conditions', async () => {
const ner = new Ner()
Ner.logExtraction = jest.fn()
const entities = await ner.extractActionEntities(
'en',
path.join(__dirname, '../../../../packages/calendar/data/expressions/en.json'),
{
query: 'Create a shopping list',
entities: [],
classification: {
package: 'calendar',
module: 'todolist',
action: 'create_list',
confidence: 1
}
}
)
expect(Ner.logExtraction).toHaveBeenCalledTimes(1)
expect(entities.length).toBe(1)
expect(entities[0].entity).toBe('list')
expect(entities[0].sourceText).toBe('shopping')
})
test('extracts trim custom entities with before and after conditions', async () => {
const ner = new Ner()
Ner.logExtraction = jest.fn()
const entities = await ner.extractActionEntities(
'en',
global.paths.expressions,
{
query: 'Please whistle as a bird',
entities: [],
classification: {
package: 'doesnotmatter',
module: 'mockingbird',
action: 'test',
confidence: 1
}
}
)
expect(Ner.logExtraction).toHaveBeenCalledTimes(1)
console.log('entities', entities)
expect(entities.length).toBe(2)
expect(entities.map(e => e.entity)).toEqual(['start', 'animal'])
expect(entities.map(e => e.sourceText)).toEqual(['Please whistle as a', 'bird'])
})
test('extracts regex custom entities', async () => {
const ner = new Ner()
Ner.logExtraction = jest.fn()
const entities = await ner.extractActionEntities(
'en',
global.paths.expressions,
{
query: 'I love the color blue, white and red',
entities: [],
classification: {
package: 'preference',
module: 'color',
action: 'run',
confidence: 1
}
}
)
expect(Ner.logExtraction).toHaveBeenCalledTimes(1)
expect(entities.length).toBe(3)
expect(entities.map(e => e.entity)).toEqual(['color', 'color', 'color'])
expect(entities.map(e => e.sourceText)).toEqual(['blue', 'white', 'red'])
})
})
})

View File

@ -61,13 +61,18 @@ describe('NLU', () => {
}) })
test('executes brain with the fallback value (object)', async () => { test('executes brain with the fallback value (object)', async () => {
const fallbackObj = { foo: 'bar' } const query = 'Thisisaqueryexampletotestfallbacks'
const fallbackObj = {
query,
entities: [],
classification: { package: 'leon', module: 'randomnumber', action: 'run' }
}
const nlu = new Nlu() const nlu = new Nlu()
nlu.brain = { execute: jest.fn() } nlu.brain = { execute: jest.fn() }
Nlu.fallback = jest.fn(() => fallbackObj) Nlu.fallback = jest.fn(() => fallbackObj)
await nlu.loadModel(global.paths.classifier) await nlu.loadModel(global.paths.classifier)
expect(await nlu.process('Thisisaqueryexampletotestfallbacks')).toBeTruthy() expect(await nlu.process(query)).toBeTruthy()
expect(nlu.brain.execute.mock.calls[0][0]).toBe(fallbackObj) expect(nlu.brain.execute.mock.calls[0][0]).toBe(fallbackObj)
Nlu.fallback = nluFallbackTmp // Need to give back the real fallback method Nlu.fallback = nluFallbackTmp // Need to give back the real fallback method
}) })

View File

@ -15,6 +15,13 @@ describe('string helper', () => {
}) })
}) })
describe('snakeToPascalCase()', () => {
test('transforms snake_case string to PascalCase', () => {
expect(string.snakeToPascalCase('leon')).toBe('Leon')
expect(string.snakeToPascalCase('this_is_leon')).toBe('ThisIsLeon')
})
})
describe('random()', () => { describe('random()', () => {
test('generates a random string with a length defined by a given number', () => { test('generates a random string with a length defined by a given number', () => {
const s = string.random(6) const s = string.random(6)
@ -28,4 +35,12 @@ describe('string helper', () => {
expect(string.removeAccents('àâèéêëîïôöûüùÛÜç')).toBe('aaeeeeiioouuuUUc') expect(string.removeAccents('àâèéêëîïôöûüùÛÜç')).toBe('aaeeeeiioouuuUUc')
}) })
}) })
describe('removeEndPunctuation()', () => {
test('removes end-punctuation', () => {
expect(string.removeEndPunctuation('Who are you?')).toBe('Who are you')
expect(string.removeEndPunctuation('This is great.')).toBe('This is great')
expect(string.removeEndPunctuation('This string has no punctuation')).toBe('This string has no punctuation')
})
})
}) })