1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-09-11 10:25:40 +03:00

refactor(server): switch from classifier to NLP model

This commit is contained in:
louistiti 2021-04-01 17:24:59 +08:00
parent ed7f0bf455
commit 99dba81122
No known key found for this signature in database
GPG Key ID: 7ECA3DD523793FE6
14 changed files with 31 additions and 32 deletions

2
.gitignore vendored
View File

@ -23,6 +23,6 @@ debug.log
!**/*.sample*
packages/**/config/config.json
packages/**/data/db/*.json
server/src/data/expressions/leon-model.nlp
server/src/data/leon-model.nlp
package.json.backup
.python-version

View File

@ -22,7 +22,7 @@ export default () => new Promise(async (resolve, reject) => {
const googleCloudPath = 'server/src/config/voice/google-cloud.json'
const watsonSttPath = 'server/src/config/voice/watson-stt.json'
const watsonTtsPath = 'server/src/config/voice/watson-tts.json'
const classifierPath = 'server/src/data/expressions/classifier.json'
const nlpModelPath = 'server/src/data/leon-model.nlp'
const report = {
can_run: { title: 'Run', type: 'error', v: true },
can_run_module: { title: 'Run modules', type: 'error', v: true },
@ -87,13 +87,13 @@ export default () => new Promise(async (resolve, reject) => {
log.error(`${e}\n`)
}
// Classifier checking
// NLP model checking
log.info('Classifier state')
if (!fs.existsSync(classifierPath) || !Object.keys(fs.readFileSync(classifierPath)).length) {
log.info('NLP model state')
if (!fs.existsSync(nlpModelPath) || !Object.keys(fs.readFileSync(nlpModelPath)).length) {
report.can_text.v = false
Object.keys(report).forEach((item) => { if (item.indexOf('stt') !== -1 || item.indexOf('tts') !== -1) report[item].v = false })
log.error('Classifier not found or broken. Try to generate a new one: "npm run train expressions"\n')
log.error('NLP model not found or broken. Try to generate a new one: "npm run train expressions"\n')
} else {
log.success('Found and valid\n')
}

View File

@ -20,7 +20,7 @@ dotenv.config()
export default () => new Promise(async (resolve, reject) => {
const { argv } = process
const packagesDir = 'packages'
const modelFileName = 'server/src/data/expressions/leon-model.nlp'
const modelFileName = 'server/src/data/leon-model.nlp'
let type = (argv[2]) ? argv[2].toLowerCase() : 'expressions'
let lang = ''

View File

@ -11,13 +11,13 @@ class Brain {
constructor (socket, lang) {
this.socket = socket
this.lang = lang
this.broca = JSON.parse(fs.readFileSync(`${__dirname}/../data/expressions/en.json`, 'utf8'))
this.broca = JSON.parse(fs.readFileSync(`${__dirname}/../data/en.json`, 'utf8'))
this.process = { }
this.interOutput = { }
this.finalOutput = { }
// Read into the language file
const file = `${__dirname}/../data/expressions/${this.lang}.json`
const file = `${__dirname}/../data/${this.lang}.json`
if (fs.existsSync(file)) {
this.broca = JSON.parse(fs.readFileSync(file, 'utf8'))
}

View File

@ -26,7 +26,7 @@ class Nlu {
*/
loadModel () {
return new Promise(async (resolve, reject) => {
if (!fs.existsSync(join(__dirname, '../data/expressions/leon-model.nlp'))) {
if (!fs.existsSync(join(__dirname, '../data/leon-model.nlp'))) {
log.title('NLU')
reject({ type: 'warning', obj: new Error('The NLP model does not exist, please run: npm run train expressions') })
} else {
@ -46,7 +46,7 @@ class Nlu {
this.nlp = container.get('nlp')
this.nlp.settings.forceNER = true
await this.nlp.load(join(__dirname, '../data/expressions/leon-model.nlp'))
await this.nlp.load(join(__dirname, '../data/leon-model.nlp'))
log.success('NLP model loaded')
resolve()

View File

@ -24,5 +24,5 @@ process.env.LEON_NODE_ENV = 'testing'
process.env.LEON_TIME_ZONE = global.date.time_zone
beforeAll(async () => {
await global.nlu.loadModel(global.paths.classifier)
await global.nlu.loadModel(global.paths.nlp_model)
})

View File

@ -35,10 +35,10 @@ describe('NLU modules', () => {
beforeAll(async () => {
process.env.LEON_LANG = langKeys[i]
// Generate new classifier for the tested language
// Generate new NLP model for the tested language
await command(`npm run train expressions:${lang.short}`, { shell: true })
// Load the new classifier
await nlu.loadModel(global.paths.classifier)
// Load the new NLP model
await nlu.loadModel(global.paths.nlp_model)
})
for (let j = 0; j < packages.length; j += 1) {

View File

@ -3,7 +3,7 @@ import path from 'path'
describe('no punctuation', () => {
const rootFolders = [
'server/src/data/expressions'
'server/src/data'
]
const punctuations = ['.', ';', ':', '?', '!']
const findPunctuation = (s) => punctuations.includes(s[s.length - 1])
@ -33,8 +33,7 @@ describe('no punctuation', () => {
const way = path.join(dir, entities[i])
if (fs.statSync(way).isDirectory()) {
list(way)
} else if (entities[i].indexOf('.json') !== -1
&& entities[i].indexOf('classifier.json') === -1) {
} else if (entities[i].indexOf('.json') !== -1) {
const jsonFile = path.join(global.paths.root, dir, entities[i])
const json = JSON.parse(fs.readFileSync(jsonFile, 'utf8'))

View File

@ -2,8 +2,8 @@ global.paths = {
root: `${__dirname}/..`,
packages: `${__dirname}/../packages`,
server: `${__dirname}/../server/src`,
classifier: `${__dirname}/../server/src/data/expressions/classifier.json`,
broken_classifier: `${__dirname}/assets/broken-classifier.json`,
nlp_model: `${__dirname}/../server/src/data/leon-model.nlp`,
broken_nlp_model: `${__dirname}/assets/broken-leon-model.nlp`,
expressions: `${__dirname}/assets/expressions.json`,
wave_speech: `${__dirname}/assets/speech-test.wav`,
wave_speech_8: `${__dirname}/assets/speech-8kHz-test.wav`

View File

@ -10,39 +10,39 @@ describe('NLU', () => {
})
describe('loadModel()', () => {
test('returns warning classifier does not exist', async () => {
test('returns warning NLP model does not exist', async () => {
const nlu = new Nlu()
try {
await nlu.loadModel('ghost-classifier.json')
await nlu.loadModel('ghost-model.nlp')
} catch (e) {
expect(e.type).toBe('warning')
}
})
test('rejects because of a broken classifier', async () => {
test('rejects because of a broken NLP model', async () => {
const nlu = new Nlu()
nlu.brain = { talk: jest.fn(), wernicke: jest.fn(), socket: { emit: jest.fn() } }
try {
await nlu.loadModel(global.paths.broken_classifier)
await nlu.loadModel(global.paths.broken_nlp_model)
} catch (e) {
expect(e.type).toBe('error')
}
})
test('loads the classifier', async () => {
test('loads the NLP model', async () => {
const nlu = new Nlu()
await nlu.loadModel(global.paths.classifier)
expect(nlu.classifier).not.toBeEmpty()
await nlu.loadModel(global.paths.nlp_model)
expect(nlu.nlp_model).not.toBeEmpty()
})
})
describe('process()', () => {
const nluFallbackTmp = Nlu.fallback
test('returns false because the classifier is empty', async () => {
test('returns false because the NLP model is empty', async () => {
const nlu = new Nlu()
nlu.brain = { talk: jest.fn(), wernicke: jest.fn(), socket: { emit: jest.fn() } }
@ -53,7 +53,7 @@ describe('NLU', () => {
const nlu = new Nlu()
nlu.brain = { talk: jest.fn(), wernicke: jest.fn(), socket: { emit: jest.fn() } }
await nlu.loadModel(global.paths.classifier)
await nlu.loadModel(global.paths.nlp_model)
expect(await nlu.process('Unknown query')).toBeFalsy()
expect(nlu.brain.talk).toHaveBeenCalledTimes(1)
})
@ -69,7 +69,7 @@ describe('NLU', () => {
nlu.brain = { execute: jest.fn() }
Nlu.fallback = jest.fn(() => fallbackObj)
await nlu.loadModel(global.paths.classifier)
await nlu.loadModel(global.paths.nlp_model)
expect(await nlu.process(query)).toBeTruthy()
expect(nlu.brain.execute.mock.calls[0][0]).toBe(fallbackObj)
Nlu.fallback = nluFallbackTmp // Need to give back the real fallback method
@ -79,7 +79,7 @@ describe('NLU', () => {
const nlu = new Nlu()
nlu.brain = { execute: jest.fn() }
await nlu.loadModel(global.paths.classifier)
await nlu.loadModel(global.paths.nlp_model)
expect(await nlu.process('Hello')).toBeTruthy()
expect(nlu.brain.execute).toHaveBeenCalledTimes(1)
})

View File

@ -1,7 +1,7 @@
import 'jest-extended'
import moment from 'moment-timezone'
import expressions from '@/data/expressions/en.json'
import expressions from '@/data/en.json'
jest.setTimeout(60000)