1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-10-26 18:18:46 +03:00

feat: new NLP training

This commit is contained in:
louistiti 2022-02-13 12:20:27 +08:00
parent db1246735c
commit d8023308d0
No known key found for this signature in database
GPG Key ID: 7ECA3DD523793FE6
3 changed files with 98 additions and 20 deletions

View File

@ -6,8 +6,8 @@ import fs from 'fs'
import path from 'path'
import log from '@/helpers/log'
import string from '@/helpers/string'
import lang from '@/helpers/lang'
import domain from '@/helpers/domain'
dotenv.config()
@ -17,7 +17,6 @@ dotenv.config()
* npm run train [en or fr]
*/
export default () => new Promise(async (resolve, reject) => {
const packagesDir = 'packages'
const modelFileName = 'server/src/data/leon-model.nlp'
try {
@ -35,6 +34,7 @@ export default () => new Promise(async (resolve, reject) => {
nlp.settings.modelFileName = modelFileName
nlp.settings.threshold = 0.8
const [domainKeys, domains] = await Promise.all([domain.list(), domain.getDomainsObj()])
const shortLangs = lang.getShortLangs()
for (let h = 0; h < shortLangs.length; h += 1) {
@ -42,33 +42,39 @@ export default () => new Promise(async (resolve, reject) => {
nlp.addLanguage(lang)
const packages = fs.readdirSync(packagesDir)
.filter((entity) => fs.statSync(path.join(packagesDir, entity)).isDirectory())
let utteranceSamplesObj = { }
for (let i = 0; i < domainKeys.length; i += 1) {
const currentDomain = domains[domainKeys[i]]
const skillKeys = Object.keys(currentDomain.skills)
for (let i = 0; i < packages.length; i += 1) {
log.info(`[${lang}] Training "${string.ucfirst(packages[i])}" package modules utterance samples...`)
log.info(`[${lang}] Training "${domainKeys[i]}" domain model...`)
utteranceSamplesObj = JSON.parse(fs.readFileSync(`${packagesDir}/${packages[i]}/data/expressions/${lang}.json`, 'utf8'))
for (let j = 0; j < skillKeys.length; j += 1) {
const { name: skillName } = currentDomain.skills[skillKeys[j]]
const currentSkill = currentDomain.skills[skillKeys[j]]
const modules = Object.keys(utteranceSamplesObj)
for (let j = 0; j < modules.length; j += 1) {
const module = modules[j]
const actions = Object.keys(utteranceSamplesObj[module])
log.info(`[${lang}] Using "${skillKeys[j]}" skill utterance samples`)
for (let k = 0; k < actions.length; k += 1) {
const action = actions[k]
const exprs = utteranceSamplesObj[module][action].utterance_samples
const nluFilePath = path.join(currentSkill.path, 'nlu', `${lang}.json`)
nlp.assignDomain(lang, `${module}.${action}`, packages[i])
if (fs.existsSync(nluFilePath)) {
const { actions } = JSON.parse(fs.readFileSync(nluFilePath, 'utf8'))
const actionsKeys = Object.keys(actions)
for (let l = 0; l < exprs.length; l += 1) {
nlp.addDocument(lang, exprs[l], `${module}.${action}`)
for (let k = 0; k < actionsKeys.length; k += 1) {
const actionName = actionsKeys[k]
const actionObj = actions[actionName]
const { utterance_samples: utteranceSamples } = actionObj
nlp.assignDomain(lang, `${skillName}.${actionName}`, currentDomain.name)
for (let l = 0; l < utteranceSamples.length; l += 1) {
nlp.addDocument(lang, utteranceSamples[l], `${skillName}.${actionName}`)
}
}
}
log.success(`[${lang}] "${string.ucfirst(module)}" module utterance samples trained`)
}
log.success(`[${lang}] "${domainKeys[i]}" domain trained`)
}
}

View File

@ -0,0 +1,46 @@
import fs from 'fs'
import path from 'path'
const domain = { }
domain.getDomainsObj = async () => {
const domainsObj = { }
const domainsDir = path.join(process.cwd(), 'skills')
await Promise.all(fs.readdirSync(domainsDir).map(async (entity) => {
const domainPath = path.join(domainsDir, entity)
if (fs.statSync(domainPath).isDirectory()) {
const skillObj = { }
const { name: domainName } = await import(path.join(domainPath, 'domain.json'))
const skillFolders = fs.readdirSync(domainPath)
for (let i = 0; i < skillFolders.length; i += 1) {
const skillPath = path.join(domainPath, skillFolders[i])
if (fs.statSync(skillPath).isDirectory()) {
const { name: skillName } = JSON.parse(fs.readFileSync(path.join(skillPath, 'skill.json'), 'utf8'))
skillObj[skillName] = {
name: skillFolders[i],
path: skillPath
}
}
domainsObj[domainName] = {
name: entity,
path: domainPath,
skills: skillObj
}
}
}
return null
}))
return domainsObj
}
domain.list = async () => Object.keys(await domain.getDomainsObj())
export default domain

View File

@ -0,0 +1,26 @@
{
"actions": {
"create_list": {
"utterance_samples": [
"Crée la liste x",
"Crée une liste x"
],
"entities": [
{
"type": "trim",
"name": "list",
"conditions": [
{
"type": "after_last",
"from": "liste"
}
]
}
],
"answers": [
"Entendu, j'ai créé la liste \"%list%\".",
"C'est fait, j'ai créé votre liste \"%list%\"."
]
}
}
}