refactor: major improvement of the After Speech feature

2024-09-20 14:27:40 +03:00 · 2019-06-08 20:11:02 +08:00 · 2019-06-08 20:11:02 +08:00 · 77669cf3aa
commit 77669cf3aa
parent 3e133ea88c
15 changed files with 101 additions and 77 deletions
--- a/app/js/chatbot.es6.js
+++ b/app/js/chatbot.es6.js
@ -39,7 +39,7 @@ export default class Chatbot {

  isTyping (who, value) {
    if (who === 'leon') {
-      if (value === true) {
+      if (value) {
        this.enableTyping()
      } else if (value === false) {
        this.disableTyping()
@ -96,7 +96,7 @@ export default class Chatbot {

    this.feed.appendChild(container).appendChild(bubble)

-    if (save === true) {
+    if (save) {
      this.saveBubble(who, string)
    }
  }
--- a/app/js/client.es6.js
+++ b/app/js/client.es6.js
@ -12,6 +12,7 @@ export default class Client {
    this.parsedHistory = []
    this.info = res
    this.chatbot = new Chatbot()
+    this._recorder = { }
  }

  set input (newInput) {
@ -20,6 +21,14 @@ export default class Client {
    }
  }

+  set recorder (recorder) {
+    this._recorder = recorder
+  }
+
+  get recorder () {
+    return this._recorder
+  }
+
  init () {
    this.chatbot.init()

@ -46,11 +55,42 @@ export default class Client {
      const ctx = new AudioContext()
      const source = ctx.createBufferSource()

-      ctx.decodeAudioData(data, (buffer) => {
+      ctx.decodeAudioData(data.buffer, (buffer) => {
        source.buffer = buffer

        source.connect(ctx.destination)
        source.start(0)
+
+        /**
+         * When the after speech option is enabled and
+         * the answer is a final one
+         */
+        if (this.info.after_speech && data.is_final_answer) {
+          // Enable recording after the speech + 500ms
+          setTimeout(() => {
+            this._recorder.start(false)
+            this._recorder.enabled = true
+
+            // Check every second if the recorder is enabled to stop it
+            const id = setInterval(() => {
+              if (this._recorder.enabled) {
+                if (this._recorder.countSilenceAfterTalk <= 8) {
+                  // Stop recording if there was no noise for 8 seconds
+                  if (this._recorder.countSilenceAfterTalk === 8) {
+                    this._recorder.stop(false)
+                    this._recorder.enabled = false
+                    this._recorder.countSilenceAfterTalk = 0
+                    clearInterval(id)
+                  } else if (!this._recorder.noiseDetected) {
+                    this._recorder.countSilenceAfterTalk += 1
+                  } else {
+                    clearInterval(id)
+                  }
+                }
+              }
+            }, 1000)
+          }, data.duration + 500)
+        }
      })

      cb('audio-received')
--- a/app/js/loader.es6.js
+++ b/app/js/loader.es6.js
@ -8,7 +8,7 @@ export default class Loader {
    this.body = document.querySelector('body')

    this.load.on('settingup', (state) => {
-      if (state === true) {
+      if (state) {
        this.body.classList.add('settingup')
      } else {
        this.body.classList.remove('settingup')
--- a/app/js/main.es6.js
+++ b/app/js/main.es6.js
@ -34,11 +34,6 @@ document.addEventListener('DOMContentLoaded', () => {
        const client = new Client(config.app, serverUrl, input, res.body)
        let rec = { }
        let chunks = []
-        let enabled = false
-        let hotwordTriggered = false
-        let autoStartedAfterTalk = false
-        let noiseDetected = false
-        let countSilenceAfterTalk = 0
        let sLogger = ' enabled, thank you.'

        v.innerHTML += client.info.version
@ -53,6 +48,7 @@ document.addEventListener('DOMContentLoaded', () => {
          navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
            if (MediaRecorder) {
              rec = new Recorder(stream, mic, client.info)
+              client.recorder = rec

              rec.ondataavailable((e) => {
                chunks.push(e.data)
@ -63,7 +59,7 @@ document.addEventListener('DOMContentLoaded', () => {
              rec.onstop(() => {
                const blob = new Blob(chunks)
                chunks = []
-                enabled = false
+                rec.enabled = false

                // Ensure there are some data
                if (blob.size >= 1000) {
@ -73,50 +69,24 @@ document.addEventListener('DOMContentLoaded', () => {

              listener.listening(stream, config.min_decibels, config.max_blank_time, () => {
                // Noise detected
-                noiseDetected = true
+                rec.noiseDetected = true
              }, () => {
                // Noise ended

-                noiseDetected = false
-                if (enabled === true && hotwordTriggered === false) {
+                rec.noiseDetected = false
+                if (rec.enabled && !rec.hotwordTriggered) {
                  rec.stop()
-                  enabled = false
-                  hotwordTriggered = false
-                  countSilenceAfterTalk = 0
-
-                  if (client.info.after_speech === true) {
-                    // Auto enable recording after talk
-                    setTimeout(() => {
-                      rec.start(false)
-                      enabled = true
-                      autoStartedAfterTalk = true
-                    }, 500)
-                  }
+                  rec.enabled = false
+                  rec.hotwordTriggered = false
+                  rec.countSilenceAfterTalk = 0
                }
              })

-              if (client.info.after_speech === true) {
-                setInterval(() => {
-                  // If record after talk has started
-                  if (autoStartedAfterTalk === true && countSilenceAfterTalk <= 3) {
-                    // Stop recording if there was no noise for 3 seconds
-                    if (countSilenceAfterTalk === 3) {
-                      rec.stop(false)
-                      enabled = false
-                      autoStartedAfterTalk = false
-                      countSilenceAfterTalk = 0
-                    } else if (noiseDetected === false) {
-                      countSilenceAfterTalk += 1
-                    }
-                  }
-                }, 1000)
-              }
-
              client.socket.on('enable-record', () => {
-                hotwordTriggered = true
+                rec.hotwordTriggered = true
                rec.start()
-                setTimeout(() => { hotwordTriggered = false }, config.max_blank_time)
-                enabled = true
+                setTimeout(() => { rec.hotwordTriggered = false }, config.max_blank_time)
+                rec.enabled = true
              })
            } else {
              console.error('MediaRecorder is not supported on your browser.')
@ -132,13 +102,13 @@ document.addEventListener('DOMContentLoaded', () => {

        document.addEventListener('keydown', (e) => {
          onkeydowndocument(e, () => {
-            if (enabled === false) {
+            if (rec.enabled === false) {
              input.value = ''
              rec.start()
-              enabled = true
+              rec.enabled = true
            } else {
              rec.stop()
-              enabled = false
+              rec.enabled = false
            }
          })
        })
@ -150,12 +120,12 @@ document.addEventListener('DOMContentLoaded', () => {
        mic.addEventListener('click', (e) => {
          e.preventDefault()

-          if (enabled === false) {
+          if (rec.enabled === false) {
            rec.start()
-            enabled = true
+            rec.enabled = true
          } else {
            rec.stop()
-            enabled = false
+            rec.enabled = false
          }
        })
      }
--- a/app/js/onkeydown.es6.js
+++ b/app/js/onkeydown.es6.js
@ -30,7 +30,7 @@ const onkeydowninput = (e, client) => {
 }

 const onkeydowndocument = (e, cb) => {
-  if (e.altKey === true && e.key === 't') {
+  if (e.altKey && e.key === 't') {
    cb()
  }
 }
--- a/app/js/recorder.es6.js
+++ b/app/js/recorder.es6.js
@ -8,6 +8,10 @@ export default class Recorder {
    this.audioOff = new Audio('../sounds/off.mp3')
    this.playSound = true
    this.info = info
+    this.enabled = false
+    this.hotwordTriggered = false
+    this.noiseDetected = false
+    this.countSilenceAfterTalk = 0
  }

  start (playSound = true) {
@ -30,7 +34,7 @@ export default class Recorder {

  onstart (cb) {
    this.recorder.onstart = (e) => {
-      if (this.playSound === true) {
+      if (this.playSound) {
        this.audioOn.play()
      }
      this.el.classList.add('enabled')
@ -41,7 +45,7 @@ export default class Recorder {

  onstop (cb) {
    this.recorder.onstop = (e) => {
-      if (this.playSound === true) {
+      if (this.playSound) {
        this.audioOff.play()
      }
      this.el.classList.remove('enabled')
--- a/packages/calendar/data/answers/en.json
+++ b/packages/calendar/data/answers/en.json
@ -38,7 +38,7 @@
      "I deleted the \"%list%\" list and all the todos it was containing."
    ],
    "lists_listed": [
-      "You have %lists_nb% lists. Please let me list them below for you:<br><br><ul>%result%</ul>"
+      "You have %lists_nb% lists. Please let me list them for you:<br><br><ul>%result%</ul>"
    ],
    "list_list_element": [
      "<li>\"%list%\", with %todos_nb% elements in it.</li>",
@ -54,7 +54,7 @@
    ],
    "unchecked_todos_listed": [
      "Here are the in progress elements of your \"%list%\" list:<br><br><ul>%result%</ul><br>Stay motivated!",
-      "Please find the in progress elements of your \"%list%\" list below:<br><br><ul>%result%</ul><br>Keep going!"
+      "Please find the in progress elements of your \"%list%\" list:<br><br><ul>%result%</ul><br>Keep going!"
    ],
    "completed_todos_listed": [
      "And here are the completed elements of your \"%list%\" list:<br><br><ul>%result%</ul>"
--- a/server/src/core/brain.js
+++ b/server/src/core/brain.js
@ -48,7 +48,7 @@ class Brain {
  /**
   * Make Leon talk
   */
-  talk (rawSpeech) {
+  talk (rawSpeech, end = false) {
    log.title('Leon')
    log.info('Talking...')

@ -57,7 +57,7 @@ class Brain {
        // Stripe HTML to a whitespace. Whitespace to let the TTS respects punctuation
        const speech = rawSpeech.replace(/<(?:.|\n)*?>/gm, ' ')

-        this.tts.add(speech)
+        this.tts.add(speech, end)
      }

      this.socket.emit('answer', rawSpeech)
@ -101,7 +101,7 @@ class Brain {

      // Ask to repeat if Leon is not sure about the request
      if (obj.classification.confidence < langs[process.env.LEON_LANG].min_confidence) {
-        this.talk(`${this.wernicke('random_not_sure')}.`)
+        this.talk(`${this.wernicke('random_not_sure')}.`, true)
        this.socket.emit('is-typing', false)

        resolve()
@ -179,7 +179,7 @@ class Brain {
          // Check if there is an output (no module error)
          if (this.finalOutput !== '') {
            this.finalOutput = JSON.parse(this.finalOutput).output
-            this.talk(this.finalOutput.speech.toString())
+            this.talk(this.finalOutput.speech.toString(), true)

            /* istanbul ignore next */
            // Synchronize the downloaded content if enabled
--- a/server/src/core/nlu.js
+++ b/server/src/core/nlu.js
@ -106,7 +106,7 @@ class Nlu {
      const fallback = Nlu.fallback(obj, langs[process.env.LEON_LANG].fallbacks)

      if (fallback === false) {
-        this.brain.talk(`${this.brain.wernicke('random_unknown_queries')}.`)
+        this.brain.talk(`${this.brain.wernicke('random_unknown_queries')}.`, true)
        this.brain.socket.emit('is-typing', false)

        log.title('NLU')
--- a/server/src/tts/amazon-polly/synthesizer.js
+++ b/server/src/tts/amazon-polly/synthesizer.js
@ -72,8 +72,9 @@ synthesizer.save = (speech, em, cb) => {
        ffmpeg.input(file).ffprobe((err, data) => {
          if (err) log.error(err)
          else {
-            em.emit('saved', data.streams[0].duration * 1000)
-            cb(file)
+            const duration = data.streams[0].duration * 1000
+            em.emit('saved', duration)
+            cb(file, duration)
          }
        })
      })
--- a/server/src/tts/flite/synthesizer.js
+++ b/server/src/tts/flite/synthesizer.js
@ -72,8 +72,9 @@ synthesizer.save = (speech, em, cb) => {
      /* istanbul ignore if */
      if (err) log.error(err)
      else {
-        em.emit('saved', data.streams[0].duration * 1000)
-        cb(file)
+        const duration = data.streams[0].duration * 1000
+        em.emit('saved', duration)
+        cb(file, duration)
      }
    })
  })
--- a/server/src/tts/google-cloud-tts/synthesizer.js
+++ b/server/src/tts/google-cloud-tts/synthesizer.js
@ -78,8 +78,9 @@ synthesizer.save = (speech, em, cb) => {
      ffmpeg.input(file).ffprobe((err, data) => {
        if (err) log.error(err)
        else {
-          em.emit('saved', data.streams[0].duration * 1000)
-          cb(file)
+          const duration = data.streams[0].duration * 1000
+          em.emit('saved', duration)
+          cb(file, duration)
        }
      })
    })
--- a/server/src/tts/tts.js
+++ b/server/src/tts/tts.js
@ -59,15 +59,19 @@ class Tts {
  }

  /**
-   * Forward buffer audio file to the client
+   * Forward buffer audio file and duration to the client
   * and delete audio file once it has been forwarded
   */
  forward (speech) {
-    this.synthesizer.default.save(speech, this.em, (file) => {
+    this.synthesizer.default.save(speech.text, this.em, (file, duration) => {
      /* istanbul ignore next */
      const bitmap = fs.readFileSync(file)
      /* istanbul ignore next */
-      this.socket.emit('audio-forwarded', Buffer.from(bitmap), (confirmation) => {
+      this.socket.emit('audio-forwarded', {
+        buffer: Buffer.from(bitmap),
+        is_final_answer: speech.isFinalAnswer,
+        duration
+      }, (confirmation) => {
        if (confirmation === 'audio-received') {
          fs.unlinkSync(file)
        }
@ -98,15 +102,17 @@ class Tts {
  /**
   * Add speeches to the queue
   */
-  add (speech) {
+  add (text, isFinalAnswer) {
    /**
     * Flite fix. When the string is only one word,
     * Flite cannot save to a file. So we add a space at the end of the string
     */
-    if (this.provider === 'flite' && speech.indexOf(' ') === -1) {
-      speech += ' '
+    if (this.provider === 'flite' && text.indexOf(' ') === -1) {
+      text += ' '
    }

+    const speech = { text, isFinalAnswer }
+
    if (this.speeches.length > 0) {
      this.speeches.push(speech)
    } else {
--- a/server/src/tts/watson-tts/synthesizer.js
+++ b/server/src/tts/watson-tts/synthesizer.js
@ -74,8 +74,9 @@ synthesizer.save = (speech, em, cb) => {
      ffmpeg.input(file).ffprobe((err, data) => {
        if (err) log.error(err)
        else {
-          em.emit('saved', data.streams[0].duration * 1000)
-          cb(file)
+          const duration = data.streams[0].duration * 1000
+          em.emit('saved', duration)
+          cb(file, duration)
        }
      })
    })
--- a/test/unit/server/tts/tts.spec.js
+++ b/test/unit/server/tts/tts.spec.js
@ -31,7 +31,7 @@ describe('TTS', () => {
      tts.synthesizer = { default: { save: jest.fn() } }
      tts.socket = { emit: jest.fn() }

-      tts.forward('Hello')
+      tts.forward({ text: 'Hello', isFinalAnswer: true })
      expect(tts.synthesizer.default.save.mock.calls[0][0]).toBe('Hello')
    })
  })
@ -58,7 +58,7 @@ describe('TTS', () => {
      const tts = new Tts({ }, 'flite')
      tts.forward = jest.fn()

-      expect(tts.add('Hello')[0].substr('Hello'.length)).toBe(' ')
+      expect(tts.add('Hello', true)[0].text.substr('Hello'.length)).toBe(' ')
    })

    test('adds speech to the queue ', async () => {