1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-12-11 09:12:40 +03:00
leon/scripts/train.js

167 lines
5.6 KiB
JavaScript

import { containerBootstrap } from '@nlpjs/core-loader'
import { Nlp } from '@nlpjs/nlp'
import { LangAll } from '@nlpjs/lang-all'
import dotenv from 'dotenv'
import fs from 'fs'
import path from 'path'
import log from '@/helpers/log'
import lang from '@/helpers/lang'
import domain from '@/helpers/domain'
import string from '@/helpers/string'
import json from '@/helpers/json'
dotenv.config()
/**
* Training utterance samples script
*
* npm run train [en or fr]
*/
export default () => new Promise(async (resolve, reject) => {
const modelFileName = 'core/data/leon-model.nlp'
try {
const container = await containerBootstrap()
container.use(Nlp)
container.use(LangAll)
const nlp = container.get('nlp')
const nluManager = container.get('nlu-manager')
// const slotManager = container.get('SlotManager')
nluManager.settings.log = false
nluManager.settings.trainByDomain = true
// slotManager.settings.
nlp.settings.forceNER = true // https://github.com/axa-group/nlp.js/blob/master/examples/17-ner-nlg/index.js
nlp.settings.calculateSentiment = true
nlp.settings.modelFileName = modelFileName
nlp.settings.threshold = 0.8
const [domainKeys, domains] = await Promise.all([domain.list(), domain.getDomainsObj()])
const shortLangs = lang.getShortLangs()
for (let h = 0; h < shortLangs.length; h += 1) {
const lang = shortLangs[h]
nlp.addLanguage(lang)
for (let i = 0; i < domainKeys.length; i += 1) {
const currentDomain = domains[domainKeys[i]]
const skillKeys = Object.keys(currentDomain.skills)
log.info(`[${lang}] Training "${domainKeys[i]}" domain model...`)
for (let j = 0; j < skillKeys.length; j += 1) {
const { name: skillName } = currentDomain.skills[skillKeys[j]]
const currentSkill = currentDomain.skills[skillKeys[j]]
log.info(`[${lang}] Using "${skillKeys[j]}" skill NLU data`)
const nluFilePath = path.join(currentSkill.path, 'nlu', `${lang}.json`)
if (fs.existsSync(nluFilePath)) {
const {
actions,
entities,
variables
} = await json.loadNluData(nluFilePath, lang) // eslint-disable-line no-await-in-loop
const actionsKeys = Object.keys(actions)
for (let k = 0; k < actionsKeys.length; k += 1) {
const actionName = actionsKeys[k]
const actionObj = actions[actionName]
const intent = `${skillName}.${actionName}`
const { utterance_samples: utteranceSamples, answers } = actionObj
nlp.assignDomain(lang, `${skillName}.${actionName}`, currentDomain.name)
/**
* TODO:
* 1. Merge person, location and organization to the
* NER before processing NLU (cf. line 210 in nlu.js): OK
* 2. Grab intents with slots
* 3. .addSlot() as per the slots config
* 4. Train resolvers (affirm_deny)
*/
if (intent === 'guess_the_number.start') {
console.log('iiin')
// nlp.slotManager.addSlot(intent, 'number', true, { [lang]: 'How many players?' })
// nlp.slotManager.addSlot(intent, 'person', true, { [lang]: 'How many players?' })
nlp.slotManager.addSlot(intent, 'boolean', true, { [lang]: 'How many players?' })
}
for (let l = 0; l < utteranceSamples.length; l += 1) {
nlp.addDocument(lang, utteranceSamples[l], intent)
}
// Train NLG if the skill has a dialog type
if (currentSkill.type === 'dialog') {
const variablesObj = { }
// Dynamic variables binding if any variable is declared
if (variables) {
const variableKeys = Object.keys(variables)
for (let l = 0; l < variableKeys.length; l += 1) {
const key = variableKeys[l]
variablesObj[`%${key}%`] = variables[variableKeys[l]]
}
}
for (let l = 0; l < answers?.length; l += 1) {
const variableKeys = Object.keys(variablesObj)
if (variableKeys.length > 0) {
answers[l] = string.pnr(answers[l], variablesObj)
}
nlp.addAnswer(lang, `${skillName}.${actionName}`, answers[l])
}
}
// Add entities annotations (@...)
if (entities) {
const newEntitiesObj = { }
const entityKeys = Object.keys(entities)
for (let l = 0; l < entityKeys.length; l += 1) {
const entity = entities[entityKeys[l]]
const optionKeys = Object.keys(entity.options)
const options = { }
for (let m = 0; m < optionKeys.length; m += 1) {
const option = entity.options[optionKeys[m]]
options[optionKeys[m]] = option.synonyms
}
newEntitiesObj[entityKeys[l]] = { options }
}
nlp.addEntities(newEntitiesObj, lang)
}
}
}
}
log.success(`[${lang}] "${domainKeys[i]}" domain trained`)
}
}
try {
await nlp.train()
log.success(`NLP model saved in ${modelFileName}`)
resolve()
} catch (e) {
log.error(`Failed to save NLP model: ${e}`)
reject()
}
} catch (e) {
log.error(e.message)
reject(e)
}
})