2021-04-03 05:59:07 +03:00
|
|
|
import { dockStart } from '@nlpjs/basic'
|
2019-02-10 15:26:50 +03:00
|
|
|
import dotenv from 'dotenv'
|
|
|
|
import fs from 'fs'
|
|
|
|
import path from 'path'
|
|
|
|
|
|
|
|
import log from '@/helpers/log'
|
|
|
|
import string from '@/helpers/string'
|
|
|
|
|
|
|
|
import { langs } from '../core/langs.json'
|
|
|
|
|
|
|
|
dotenv.config()
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Training script
|
|
|
|
*
|
|
|
|
* npm run train expressions
|
|
|
|
* npm run train expressions:en
|
|
|
|
*/
|
2019-02-28 17:08:37 +03:00
|
|
|
export default () => new Promise(async (resolve, reject) => {
|
2019-02-10 15:26:50 +03:00
|
|
|
const { argv } = process
|
|
|
|
const packagesDir = 'packages'
|
2021-04-01 12:24:59 +03:00
|
|
|
const modelFileName = 'server/src/data/leon-model.nlp'
|
2019-02-10 15:26:50 +03:00
|
|
|
let type = (argv[2]) ? argv[2].toLowerCase() : 'expressions'
|
|
|
|
let lang = ''
|
|
|
|
|
|
|
|
if (type.indexOf(':') !== -1) {
|
|
|
|
[type, lang] = type.split(':')
|
|
|
|
} else {
|
|
|
|
lang = langs[process.env.LEON_LANG].short.toLowerCase().substr(0, 2)
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
|
|
|
if (type === 'expressions') {
|
2021-04-03 05:59:07 +03:00
|
|
|
const dock = await dockStart({ use: ['Basic'] })
|
|
|
|
|
|
|
|
const nlp = dock.get('nlp')
|
2021-03-28 08:02:54 +03:00
|
|
|
nlp.settings.modelFileName = modelFileName
|
2021-04-03 05:59:07 +03:00
|
|
|
nlp.settings.threshold = 0.8
|
2021-03-28 08:02:54 +03:00
|
|
|
|
|
|
|
nlp.addLanguage(lang)
|
|
|
|
|
2019-02-10 15:26:50 +03:00
|
|
|
const packages = fs.readdirSync(packagesDir)
|
2021-03-15 20:39:52 +03:00
|
|
|
.filter((entity) => fs.statSync(path.join(packagesDir, entity)).isDirectory())
|
2019-05-03 18:31:09 +03:00
|
|
|
let expressionsObj = { }
|
2019-02-10 15:26:50 +03:00
|
|
|
|
|
|
|
for (let i = 0; i < packages.length; i += 1) {
|
|
|
|
log.info(`Training "${string.ucfirst(packages[i])}" package modules expressions...`)
|
|
|
|
|
2019-05-03 18:31:09 +03:00
|
|
|
expressionsObj = JSON.parse(fs.readFileSync(`${packagesDir}/${packages[i]}/data/expressions/${lang}.json`, 'utf8'))
|
2019-02-10 15:26:50 +03:00
|
|
|
|
2019-05-03 18:31:09 +03:00
|
|
|
const modules = Object.keys(expressionsObj)
|
2019-02-10 15:26:50 +03:00
|
|
|
for (let j = 0; j < modules.length; j += 1) {
|
2019-05-03 18:31:09 +03:00
|
|
|
const module = modules[j]
|
|
|
|
const actions = Object.keys(expressionsObj[module])
|
|
|
|
|
|
|
|
for (let k = 0; k < actions.length; k += 1) {
|
|
|
|
const action = actions[k]
|
|
|
|
const exprs = expressionsObj[module][action].expressions
|
|
|
|
|
2021-03-28 08:02:54 +03:00
|
|
|
nlp.assignDomain(lang, `${module}.${action}`, packages[i])
|
2019-05-03 18:31:09 +03:00
|
|
|
|
|
|
|
for (let l = 0; l < exprs.length; l += 1) {
|
2021-03-28 08:02:54 +03:00
|
|
|
nlp.addDocument(lang, exprs[l], `${module}.${action}`)
|
2019-05-03 18:31:09 +03:00
|
|
|
}
|
2019-02-10 15:26:50 +03:00
|
|
|
}
|
|
|
|
|
2019-05-03 18:31:09 +03:00
|
|
|
log.success(`"${string.ucfirst(module)}" module expressions trained`)
|
2019-02-10 15:26:50 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-28 08:02:54 +03:00
|
|
|
try {
|
|
|
|
await nlp.train()
|
2019-02-28 17:08:37 +03:00
|
|
|
|
2021-03-28 08:02:54 +03:00
|
|
|
log.success(`NLP model saved in ${modelFileName}`)
|
|
|
|
resolve()
|
|
|
|
} catch (e) {
|
|
|
|
log.error(`Failed to save NLP model: ${e}`)
|
|
|
|
reject()
|
|
|
|
}
|
2019-02-10 15:26:50 +03:00
|
|
|
} else {
|
|
|
|
log.error(`"${type}" training type is unknown. Try "npm run train expressions"`)
|
|
|
|
reject()
|
|
|
|
}
|
|
|
|
} catch (e) {
|
|
|
|
log.error(e.message)
|
|
|
|
reject(e)
|
|
|
|
}
|
|
|
|
})
|