feat(server): on-the-fly language switching

2024-11-23 20:12:08 +03:00 · 2022-02-13 01:42:29 +08:00 · 2022-02-13 01:42:29 +08:00 · f24513a223
commit f24513a223
parent 5edcc679f8
16 changed files with 90 additions and 66 deletions
--- a/.env.sample
+++ b/.env.sample
@ -28,6 +28,8 @@ LEON_TTS_PROVIDER=flite
 LEON_OVER_HTTP=true
 # HTTP API key (use "npm run generate:http-api-key" to regenerate one)
 LEON_HTTP_API_KEY=
+# Language used for the HTTP API
+LEON_HTTP_API_LANG=en-US

 # Enable/disable collaborative logger
 LEON_LOGGER=true
--- a/hotword/index.js
+++ b/hotword/index.js
@ -14,9 +14,12 @@ process.env.LEON_HOST = process.env.LEON_HOST || 'http://localhost'
 process.env.LEON_PORT = process.env.LEON_PORT || 1337
 const url = `${process.env.LEON_HOST}:${process.env.LEON_PORT}`
 const socket = io(url)
+const { argv } = process
+const lang = argv[2] || 'en'

 socket.on('connect', () => {
  socket.emit('init', 'hotword-node')
+  console.log('Language:', lang)
  console.log('Connected to the server')
  console.log('Waiting for hotword...')
 })
@ -33,9 +36,9 @@ request.get(`${url}/api/v1/info`)
      const models = new Models()

      models.add({
-        file: `${__dirname}/models/leon-${res.body.lang.short}.pmdl`,
+        file: `${__dirname}/models/leon-${lang}.pmdl`,
        sensitivity: '0.5',
-        hotwords: `leon-${res.body.lang.short}`
+        hotwords: `leon-${lang}`
      })

      const detector = new Detector({
--- a/scripts/generate/generate-pkgs-endpoints.js
+++ b/scripts/generate/generate-pkgs-endpoints.js
@ -18,7 +18,7 @@ export default () => new Promise(async (resolve, reject) => {
  const packagesDir = 'packages'
  const outputFile = '/core/pkgs-endpoints.json'
  const outputFilePath = path.join(__dirname, `../..${outputFile}`)
-  const lang = langs[process.env.LEON_LANG].short.toLowerCase().substr(0, 2)
+  const lang = langs[process.env.LEON_HTTP_API_LANG].short

  try {
    const packages = fs.readdirSync(packagesDir)
@ -49,6 +49,7 @@ export default () => new Promise(async (resolve, reject) => {
      }
    }

+    // Force if a language is given
    if (isFileNeedToBeGenerated) {
      log.info('Parsing packages configuration...')

--- a/scripts/setup/setup.js
+++ b/scripts/setup/setup.js
@ -16,7 +16,6 @@ import setupPythonPackages from './setup-python-packages'
 (async () => {
  try {
    // Required env vars to setup
-    process.env.LEON_LANG = 'en-US'
    process.env.PIPENV_PIPFILE = 'bridges/python/Pipfile'
    process.env.PIPENV_VENV_IN_PROJECT = 'true'

--- a/scripts/train.js
+++ b/scripts/train.js
@ -7,8 +7,7 @@ import path from 'path'

 import log from '@/helpers/log'
 import string from '@/helpers/string'
-
-// import { langs } from '@@/core/langs.json'
+import lang from '@/helpers/lang'

 dotenv.config()

@ -18,12 +17,8 @@ dotenv.config()
 * npm run train [en or fr]
 */
 export default () => new Promise(async (resolve, reject) => {
-  // const { argv } = process
  const packagesDir = 'packages'
  const modelFileName = 'server/src/data/leon-model.nlp'
-  /* const lang = argv[2]
-    ? argv[2].toLowerCase()
-    : langs[process.env.LEON_LANG].short.toLowerCase().substr(0, 2) */

  try {
    const container = await containerBootstrap()
@ -33,10 +28,6 @@ export default () => new Promise(async (resolve, reject) => {

    const nlp = container.get('nlp')
    const nluManager = container.get('nlu-manager')
-    // const dock = await dockStart({ use: ['Basic', 'LangEn', 'LangFr'] })
-
-    // const nlp = dock.get('nlp')
-    // const nluManager = dock.get('nlu-manager')

    nluManager.settings.log = false
    nluManager.settings.trainByDomain = true
@ -44,10 +35,11 @@ export default () => new Promise(async (resolve, reject) => {
    nlp.settings.modelFileName = modelFileName
    nlp.settings.threshold = 0.8

-    // TODO: grab from core/langs.json
-    const langs = ['en', 'fr']
+    const shortLangs = lang.getShortLangs()
+
+    for (let h = 0; h < shortLangs.length; h += 1) {
+      const lang = shortLangs[h]

-    langs.forEach(async (lang) => {
      nlp.addLanguage(lang)

      const packages = fs.readdirSync(packagesDir)
@ -78,7 +70,7 @@ export default () => new Promise(async (resolve, reject) => {
          log.success(`[${lang}] "${string.ucfirst(module)}" module utterance samples trained`)
        }
      }
-    })
+    }

    try {
      await nlp.train()
--- a/server/src/core/brain.js
+++ b/server/src/core/brain.js
@ -5,23 +5,18 @@ import { langs } from '@@/core/langs.json'
 import log from '@/helpers/log'
 import string from '@/helpers/string'
 import Synchronizer from '@/core/synchronizer'
+import lang from '@/helpers/lang'

 class Brain {
-  constructor (lang) {
-    this.lang = lang
-    this.broca = JSON.parse(fs.readFileSync(`${__dirname}/../data/en.json`, 'utf8'))
+  constructor () {
+    this._lang = 'en'
+    this.broca = JSON.parse(fs.readFileSync(`${__dirname}/../data/${this._lang}.json`, 'utf8'))
    this.process = { }
    this.interOutput = { }
    this.finalOutput = { }
    this._socket = { }
    this._tts = { }

-    // Read into the language file
-    const file = `${__dirname}/../data/${this.lang}.json`
-    if (fs.existsSync(file)) {
-      this.broca = JSON.parse(fs.readFileSync(file, 'utf8'))
-    }
-
    log.title('Brain')
    log.success('New instance')
  }
@ -42,6 +37,23 @@ class Brain {
    this._tts = newTts
  }

+  get lang () {
+    return this._lang
+  }
+
+  set lang (newLang) {
+    this._lang = newLang
+    // Update broca
+    this.broca = JSON.parse(fs.readFileSync(`${__dirname}/../data/${this._lang}.json`, 'utf8'))
+
+    if (process.env.LEON_TTS === 'true') {
+      this._tts.init(this._lang, () => {
+        log.title('Brain')
+        log.info('Language has changed')
+      })
+    }
+  }
+
  /**
   * Delete intent object file
   */
@ -114,7 +126,7 @@ class Brain {
      const speeches = []

      // Ask to repeat if Leon is not sure about the request
-      if (obj.classification.confidence < langs[process.env.LEON_LANG].min_confidence) {
+      if (obj.classification.confidence < langs[lang.getLongCode(this._lang)].min_confidence) {
        if (!opts.mute) {
          const speech = `${this.wernicke('random_not_sure')}.`

@ -143,7 +155,7 @@ class Brain {
           */
          const intentObj = {
            id: utteranceId,
-            lang: langs[process.env.LEON_LANG].short,
+            lang: this._lang,
            package: obj.classification.package,
            module: obj.classification.module,
            action: obj.classification.action,
@ -198,7 +210,6 @@ class Brain {

        // Handle error
        this.process.stderr.on('data', (data) => {
-          console.log('data', data.toString())
          const speech = `${this.wernicke('random_package_module_errors', '',
            { '%module_name%': moduleName, '%package_name%': packageName })}!`
          if (!opts.mute) {
@ -269,7 +280,7 @@ class Brain {

          resolve({
            utteranceId,
-            lang: langs[process.env.LEON_LANG].short,
+            lang: this._lang,
            ...obj,
            speeches,
            executionTime // In ms, module execution time only
--- a/server/src/core/http-server/api/info/get.js
+++ b/server/src/core/http-server/api/info/get.js
@ -1,4 +1,3 @@
-import { langs } from '@@/core/langs.json'
 import { version } from '@@/package.json'
 import log from '@/helpers/log'

@ -25,7 +24,6 @@ const getInfo = async (fastify, options) => {
        enabled: process.env.LEON_TTS === 'true',
        provider: process.env.LEON_TTS_PROVIDER
      },
-      lang: langs[process.env.LEON_LANG],
      version
    })
  })
--- a/server/src/core/http-server/server.js
+++ b/server/src/core/http-server/server.js
@ -4,7 +4,6 @@ import socketio from 'socket.io'
 import { join } from 'path'

 import { version } from '@@/package.json'
-import { langs } from '@@/core/langs.json'
 import { endpoints } from '@@/core/pkgs-endpoints.json'
 import Nlu from '@/core/nlu'
 import Brain from '@/core/brain'
@ -166,7 +165,7 @@ server.handleOnConnection = (socket) => {
        ttsState = 'enabled'

        tts = new Tts(socket, process.env.LEON_TTS_PROVIDER)
-        tts.init((ttsInstance) => {
+        tts.init('en', (ttsInstance) => {
          brain.tts = ttsInstance
        })
      }
@ -278,18 +277,12 @@ server.init = async () => {
  log.success(`The current env is ${process.env.LEON_NODE_ENV}`)
  log.success(`The current version is ${version}`)

-  if (!Object.keys(langs).includes(process.env.LEON_LANG) === true) {
-    process.env.LEON_LANG = 'en-US'
-    log.warning('The language you chose is not supported, then the default language has been applied')
-  }
-
-  log.success(`The current language is ${process.env.LEON_LANG}`)
  log.success(`The current time zone is ${date.timeZone()}`)

  const sLogger = (process.env.LEON_LOGGER !== 'true') ? 'disabled' : 'enabled'
  log.success(`Collaborative logger ${sLogger}`)

-  brain = new Brain(langs[process.env.LEON_LANG].short)
+  brain = new Brain()
  nlu = new Nlu(brain)

  // Train modules utterance samples
--- a/server/src/core/nlu.js
+++ b/server/src/core/nlu.js
@ -10,6 +10,7 @@ import { version } from '@@/package.json'
 import Ner from '@/core/ner'
 import log from '@/helpers/log'
 import string from '@/helpers/string'
+import lang from '@/helpers/lang'

 class Nlu {
  constructor (brain) {
@ -72,6 +73,7 @@ class Nlu {
      opts = opts || {
        mute: false // Close Leon mouth e.g. over HTTP
      }
+
      utterance = string.ucfirst(utterance)

      if (Object.keys(this.nlp).length === 0) {
@ -85,17 +87,9 @@ class Nlu {
        return reject(msg)
      }

-      const lang = langs[process.env.LEON_LANG].short
-      const guessedLang = await this.nlp.guessLanguage(utterance)
-
-      console.log('guessedLang', guessedLang)
-
      const result = await this.nlp.process(utterance)
-
-      console.log('result', result)
-
      const {
-        domain, intent, score
+        locale, domain, intent, score
      } = result
      const [moduleName, actionName] = intent.split('.')
      let obj = {
@ -109,6 +103,10 @@ class Nlu {
        }
      }

+      if (this.brain.lang !== locale) {
+        this.brain.lang = locale
+      }
+
      /* istanbul ignore next */
      if (process.env.LEON_LOGGER === 'true' && process.env.LEON_NODE_ENV !== 'testing') {
        this.request
@ -117,7 +115,7 @@ class Nlu {
          .send({
            version,
            utterance,
-            lang,
+            lang: this.brain.lang,
            classification: obj.classification
          })
          .then(() => { /* */ })
@ -125,7 +123,7 @@ class Nlu {
      }

      if (intent === 'None') {
-        const fallback = Nlu.fallback(obj, langs[process.env.LEON_LANG].fallbacks)
+        const fallback = Nlu.fallback(obj, langs[lang.getLongCode(locale)].fallbacks)

        if (fallback === false) {
          if (!opts.mute) {
@ -154,8 +152,8 @@ class Nlu {

      try {
        obj.entities = await this.ner.extractEntities(
-          lang,
-          join(__dirname, '../../../packages', obj.classification.package, `data/expressions/${lang}.json`),
+          this.brain.lang,
+          join(__dirname, '../../../packages', obj.classification.package, `data/expressions/${this.brain.lang}.json`),
          obj
        )
      } catch (e) /* istanbul ignore next */ {
--- a/server/src/helpers/lang.js
+++ b/server/src/helpers/lang.js
@ -0,0 +1,21 @@
+import { langs } from '@@/core/langs.json'
+
+const lang = { }
+
+lang.getShortLangs = () => Object.keys(langs).map((lang) => langs[lang].short)
+
+lang.getLongCode = (shortLang) => {
+  const langsArr = Object.keys(langs)
+
+  for (let i = 0; i < langsArr.length; i += 1) {
+    const { short } = langs[langsArr[i]]
+
+    if (short === shortLang) {
+      return langsArr[i]
+    }
+  }
+
+  return null
+}
+
+export default lang
--- a/server/src/tts/amazon-polly/synthesizer.js
+++ b/server/src/tts/amazon-polly/synthesizer.js
@ -22,14 +22,15 @@ let client = { }

 synthesizer.conf = {
  OutputFormat: 'mp3',
-  VoiceId: voices[process.env.LEON_LANG].VoiceId
+  VoiceId: ''
 }

 /**
 * Initialize Amazon Polly based on credentials in the JSON file
 */
-synthesizer.init = () => {
+synthesizer.init = (lang) => {
  const config = JSON.parse(fs.readFileSync(`${__dirname}/../../config/voice/amazon.json`, 'utf8'))
+  synthesizer.conf.VoiceId = voices[lang].VoiceId

  try {
    client = new Polly(config)
--- a/server/src/tts/flite/synthesizer.js
+++ b/server/src/tts/flite/synthesizer.js
@ -21,11 +21,11 @@ synthesizer.conf = {
 /**
 * There is nothing to initialize for this synthesizer
 */
-synthesizer.init = () => {
+synthesizer.init = (lang) => {
  const flitePath = 'bin/flite/flite'

  /* istanbul ignore if */
-  if (process.env.LEON_LANG !== 'en-US') {
+  if (lang !== 'en-US') {
    log.warning('The Flite synthesizer only accepts the "en-US" language for the moment')
  }

--- a/server/src/tts/google-cloud-tts/synthesizer.js
+++ b/server/src/tts/google-cloud-tts/synthesizer.js
@ -26,7 +26,7 @@ const voices = {
 let client = { }

 synthesizer.conf = {
-  voice: voices[process.env.LEON_LANG],
+  voice: '',
  audioConfig: {
    audioEncoding: 'MP3'
  }
@ -36,8 +36,9 @@ synthesizer.conf = {
 * Initialize Google Cloud Text-to-Speech based on credentials in the JSON file
 * The env variable "GOOGLE_APPLICATION_CREDENTIALS" provides the JSON file path
 */
-synthesizer.init = () => {
+synthesizer.init = (lang) => {
  process.env.GOOGLE_APPLICATION_CREDENTIALS = `${__dirname}/../../config/voice/google-cloud.json`
+  synthesizer.conf.voice = voices[lang]

  try {
    client = new tts.TextToSpeechClient()
--- a/server/src/tts/tts.js
+++ b/server/src/tts/tts.js
@ -2,6 +2,7 @@ import events from 'events'
 import fs from 'fs'

 import log from '@/helpers/log'
+import lang from '@/helpers/lang'

 class Tts {
  constructor (socket, provider) {
@ -16,6 +17,7 @@ class Tts {
    this.synthesizer = { }
    this.em = new events.EventEmitter()
    this.speeches = []
+    this.lang = 'en'

    log.title('TTS')
    log.success('New instance')
@ -24,9 +26,11 @@ class Tts {
  /**
   * Initialize the TTS provider
   */
-  init (cb) {
+  init (newLang, cb) {
    log.info('Initializing TTS...')

+    this.lang = newLang || this.lang
+
    if (!this.providers.includes(this.provider)) {
      log.error(`The TTS provider "${this.provider}" does not exist or is not yet supported`)

@ -43,7 +47,7 @@ class Tts {

    // Dynamically attribute the synthesizer
    this.synthesizer = require(`${__dirname}/${this.provider}/synthesizer`) // eslint-disable-line global-require
-    this.synthesizer.default.init(this.synthesizer.default.conf)
+    this.synthesizer.default.init(lang.getLongCode(this.lang))

    this.onSaved()

--- a/server/src/tts/watson-tts/synthesizer.js
+++ b/server/src/tts/watson-tts/synthesizer.js
@ -22,15 +22,16 @@ const voices = {
 let client = { }

 synthesizer.conf = {
-  voice: voices[process.env.LEON_LANG].voice,
+  voice: '',
  accept: 'audio/wav'
 }

 /**
 * Initialize Watson Text-to-Speech based on credentials in the JSON file
 */
-synthesizer.init = () => {
+synthesizer.init = (lang) => {
  const config = JSON.parse(fs.readFileSync(`${__dirname}/../../config/voice/watson-tts.json`, 'utf8'))
+  synthesizer.conf.voice = voices[lang].voice

  try {
    client = new Tts({
--- a/test/unit/server/core/ner.spec.js
+++ b/test/unit/server/core/ner.spec.js
@ -138,7 +138,6 @@ describe('NER', () => {
      )

      expect(Ner.logExtraction).toHaveBeenCalledTimes(1)
-      console.log('entities', entities)
      expect(entities.length).toBe(2)
      expect(entities.map((e) => e.entity)).toEqual(['start', 'animal'])
      expect(entities.map((e) => e.sourceText)).toEqual(['Please whistle as a', 'bird'])