1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-12-20 07:11:40 +03:00
leon/scripts/train.js

98 lines
2.8 KiB
JavaScript
Raw Normal View History

import { containerBootstrap } from '@nlpjs/core-loader'
import { Nlp } from '@nlpjs/nlp'
2019-02-10 15:26:50 +03:00
import dotenv from 'dotenv'
import fs from 'fs'
import path from 'path'
import log from '@/helpers/log'
import string from '@/helpers/string'
import { langs } from '../core/langs.json'
dotenv.config()
/**
* Training script
*
* npm run train expressions
* npm run train expressions:en
*/
2019-02-28 17:08:37 +03:00
export default () => new Promise(async (resolve, reject) => {
2019-02-10 15:26:50 +03:00
const { argv } = process
const packagesDir = 'packages'
const modelFileName = 'server/src/data/expressions/leon-model.nlp'
2019-02-10 15:26:50 +03:00
let type = (argv[2]) ? argv[2].toLowerCase() : 'expressions'
let lang = ''
if (type.indexOf(':') !== -1) {
[type, lang] = type.split(':')
} else {
lang = langs[process.env.LEON_LANG].short.toLowerCase().substr(0, 2)
}
try {
if (type === 'expressions') {
const container = await containerBootstrap()
container.use(Nlp)
if (lang === 'fr') {
const { LangFr } = require('@nlpjs/lang-fr') // eslint-disable-line global-require
container.use(LangFr)
} else {
const { LangEn } = require('@nlpjs/lang-en') // eslint-disable-line global-require
container.use(LangEn)
2019-02-10 15:26:50 +03:00
}
2021-03-07 07:15:06 +03:00
const nlp = container.get('nlp')
nlp.settings.modelFileName = modelFileName
nlp.addLanguage(lang)
2019-02-10 15:26:50 +03:00
const packages = fs.readdirSync(packagesDir)
2021-03-15 20:39:52 +03:00
.filter((entity) => fs.statSync(path.join(packagesDir, entity)).isDirectory())
2019-05-03 18:31:09 +03:00
let expressionsObj = { }
2019-02-10 15:26:50 +03:00
for (let i = 0; i < packages.length; i += 1) {
log.info(`Training "${string.ucfirst(packages[i])}" package modules expressions...`)
2019-05-03 18:31:09 +03:00
expressionsObj = JSON.parse(fs.readFileSync(`${packagesDir}/${packages[i]}/data/expressions/${lang}.json`, 'utf8'))
2019-02-10 15:26:50 +03:00
2019-05-03 18:31:09 +03:00
const modules = Object.keys(expressionsObj)
2019-02-10 15:26:50 +03:00
for (let j = 0; j < modules.length; j += 1) {
2019-05-03 18:31:09 +03:00
const module = modules[j]
const actions = Object.keys(expressionsObj[module])
for (let k = 0; k < actions.length; k += 1) {
const action = actions[k]
const exprs = expressionsObj[module][action].expressions
nlp.assignDomain(lang, `${module}.${action}`, packages[i])
2019-05-03 18:31:09 +03:00
for (let l = 0; l < exprs.length; l += 1) {
nlp.addDocument(lang, exprs[l], `${module}.${action}`)
2019-05-03 18:31:09 +03:00
}
2019-02-10 15:26:50 +03:00
}
2019-05-03 18:31:09 +03:00
log.success(`"${string.ucfirst(module)}" module expressions trained`)
2019-02-10 15:26:50 +03:00
}
}
try {
await nlp.train()
2019-02-28 17:08:37 +03:00
log.success(`NLP model saved in ${modelFileName}`)
resolve()
} catch (e) {
log.error(`Failed to save NLP model: ${e}`)
reject()
}
2019-02-10 15:26:50 +03:00
} else {
log.error(`"${type}" training type is unknown. Try "npm run train expressions"`)
reject()
}
} catch (e) {
log.error(e.message)
reject(e)
}
})