feat(server): differentiate skill text answer from speech answer (WIP)

2024-11-28 04:04:58 +03:00 · 2023-05-18 00:00:40 +08:00 · 2023-05-18 00:00:40 +08:00 · 7f9b60a6cd
commit 7f9b60a6cd
parent 9e77953976
7 changed files with 169 additions and 115 deletions
--- a/bridges/nodejs/src/sdk/leon.ts
+++ b/bridges/nodejs/src/sdk/leon.ts
@ -59,7 +59,7 @@ class Leon {
        }

        const answers = SKILL_CONFIG.answers[answerKey] ?? ''
-        let answer: string
+        let answer: AnswerConfig

        if (Array.isArray(answers)) {
          answer = answers[Math.floor(Math.random() * answers.length)] ?? ''
@ -69,7 +69,22 @@ class Leon {

        if (data) {
          for (const key in data) {
-            answer = answer.replaceAll(`%${key}%`, String(data[key]))
+            // In case the answer needs speech and text differentiation
+            if (typeof answer !== 'string' && answer.text) {
+              answer.text = answer.text.replaceAll(
+                `%${key}%`,
+                String(data[key])
+              )
+              answer.speech = answer.speech.replaceAll(
+                `%${key}%`,
+                String(data[key])
+              )
+            } else {
+              answer = (answer as string).replaceAll(
+                `%${key}%`,
+                String(data[key])
+              )
+            }
          }
        }

@ -77,7 +92,22 @@ class Leon {
          const { variables } = SKILL_CONFIG

          for (const key in variables) {
-            answer = answer.replaceAll(`%${key}%`, String(variables[key]))
+            // In case the answer needs speech and text differentiation
+            if (typeof answer !== 'string' && answer.text) {
+              answer.text = answer.text.replaceAll(
+                `%${key}%`,
+                String(variables[key])
+              )
+              answer.speech = answer.speech.replaceAll(
+                `%${key}%`,
+                String(variables[key])
+              )
+            } else {
+              answer = (answer as string).replaceAll(
+                `%${key}%`,
+                String(variables[key])
+              )
+            }
          }
        }

@ -108,7 +138,7 @@ class Leon {
            answerInput.widget && !answerInput.key
              ? 'widget'
              : (answerInput.key as string),
-          speech: this.setAnswerData(answerInput.key, answerInput.data) ?? '',
+          answer: this.setAnswerData(answerInput.key, answerInput.data) ?? '',
          core: answerInput.core,
          options: this.getSRCConfig('options')
        }
--- a/bridges/nodejs/src/sdk/types.ts
+++ b/bridges/nodejs/src/sdk/types.ts
@ -1,8 +1,13 @@
 /**
 * Action types
 */
-import type { ActionParams, IntentObject } from '@/core/brain/types'
-import type { AnswerConfigSchema } from '@/schemas/skill-schemas'
+import type {
+  ActionParams,
+  IntentObject,
+  SkillAnswerCoreData,
+  SkillAnswerOutput
+} from '@/core/brain/types'
+import type { SkillAnswerConfigSchema } from '@/schemas/skill-schemas'

 export type { ActionParams, IntentObject }

@ -11,26 +16,11 @@ export type ActionFunction = (params: ActionParams) => Promise<void>
 /**
 * Answer types
 */
-export interface AnswerOutput extends IntentObject {
-  output: {
-    codes: string
-    speech: string
-    core?: AnswerCoreData
-    widget?: unknown // TODO
-    options: Record<string, string>
-  }
-}
-export interface AnswerCoreData {
-  restart?: boolean
-  isInActionLoop?: boolean
-  showNextActionSuggestions?: boolean
-  showSuggestions?: boolean
-}
 export interface Answer {
  key?: string
  widget?: unknown // TODO
  data?: AnswerData
-  core?: AnswerCoreData
+  core?: SkillAnswerCoreData
 }
 export interface TextAnswer extends Answer {
  key: string
@ -41,4 +31,5 @@ export interface WidgetAnswer extends Answer {
 }
 export type AnswerData = Record<string, string | number> | null
 export type AnswerInput = TextAnswer | WidgetAnswer
-export type AnswerConfig = AnswerConfigSchema
+export type AnswerOutput = SkillAnswerOutput
+export type AnswerConfig = SkillAnswerConfigSchema
--- a/core/skills-endpoints.json
+++ b/core/skills-endpoints.json
@ -1,5 +1,75 @@
 {
  "endpoints": [
+    {
+      "method": "GET",
+      "route": "/api/action/leon/age/run",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/color/favorite_color",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/color/why",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/color/color_hexadecimal",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/good_bye/run",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/greeting/run",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/introduction/introduce_leon",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/introduction/gather_basic_info",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/introduction/remember",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/joke/run",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/meaning_of_life/run",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/partner_assistant/run",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/random_number/run",
+      "params": []
+    },
+    {
+      "method": "GET",
+      "route": "/api/action/leon/welcome/run",
+      "params": []
+    },
    {
      "method": "POST",
      "route": "/api/action/news/github_trends/run",
@ -140,76 +210,6 @@
      "method": "GET",
      "route": "/api/action/games/rochambeau/rematch",
      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/age/run",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/color/favorite_color",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/color/why",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/color/color_hexadecimal",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/good_bye/run",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/greeting/run",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/introduction/introduce_leon",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/introduction/gather_basic_info",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/introduction/remember",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/joke/run",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/meaning_of_life/run",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/partner_assistant/run",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/random_number/run",
-      "params": []
-    },
-    {
-      "method": "GET",
-      "route": "/api/action/leon/welcome/run",
-      "params": []
    }
  ]
 }
--- a/server/src/core/brain/brain.ts
+++ b/server/src/core/brain/brain.ts
@ -9,7 +9,11 @@ import type {
  NERCustomEntity,
  NLUResult
 } from '@/core/nlp/types'
-import type { SkillConfigSchema, SkillSchema } from '@/schemas/skill-schemas'
+import type {
+  SkillAnswerConfigSchema,
+  SkillConfigSchema,
+  SkillSchema
+} from '@/schemas/skill-schemas'
 import type {
  BrainProcessResult,
  IntentObject,
@ -29,6 +33,7 @@ import { LogHelper } from '@/helpers/log-helper'
 import { SkillDomainHelper } from '@/helpers/skill-domain-helper'
 import { StringHelper } from '@/helpers/string-helper'
 import Synchronizer from '@/core/synchronizer'
+import type { AnswerOutput } from '@sdk/types'

 export default class Brain {
  private static instance: Brain
@ -43,7 +48,7 @@ export default class Brain {
  private domainFriendlyName = ''
  private skillFriendlyName = ''
  private skillOutput = ''
-  private speeches: string[] = []
+  private answers: SkillAnswerConfigSchema[] = []
  public isMuted = false // Close Leon mouth if true; e.g. over HTTP

  constructor() {
@ -97,19 +102,22 @@ export default class Brain {
  /**
   * Make Leon talk
   */
-  public talk(rawSpeech: string, end = false): void {
+  public talk(answer: SkillAnswerConfigSchema, end = false): void {
    LogHelper.title('Brain')
    LogHelper.info('Talking...')

-    if (rawSpeech !== '') {
+    if (answer !== '') {
+      const textAnswer = typeof answer === 'string' ? answer : answer.text
+      const speechAnswer = typeof answer === 'string' ? answer : answer.speech
+
      if (HAS_TTS) {
        // Stripe HTML to a whitespace. Whitespace to let the TTS respects punctuation
-        const speech = rawSpeech.replace(/<(?:.|\n)*?>/gm, ' ')
+        const speech = speechAnswer.replace(/<(?:.|\n)*?>/gm, ' ')

        TTS.add(speech, end)
      }

-      SOCKET_SERVER.socket?.emit('answer', rawSpeech)
+      SOCKET_SERVER.socket?.emit('answer', textAnswer)
    }
  }

@ -192,7 +200,7 @@ export default class Brain {
    data: Buffer
  ): Promise<Error | null> | void {
    try {
-      const obj = JSON.parse(data.toString())
+      const obj = JSON.parse(data.toString()) as AnswerOutput

      if (typeof obj === 'object') {
        LogHelper.title(`${this.skillFriendlyName} skill (on data)`)
@ -202,11 +210,14 @@ export default class Brain {
          SOCKET_SERVER.socket?.emit('widget', obj.output.widget)
        }

-        const speech = obj.output.speech.toString()
+        // TODO: remove this condition when Python skills outputs are updated (replace "speech" with "answer")
+        // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+        // @ts-ignore
+        const { answer, speech } = obj.output
        if (!this.isMuted) {
-          this.talk(speech)
+          this.talk(answer || speech)
        }
-        this.speeches.push(speech)
+        this.answers.push(answer)
        this.skillOutput = data.toString()

        return Promise.resolve(null)
@ -231,11 +242,13 @@ export default class Brain {
      '%skill_name%': this.skillFriendlyName,
      '%domain_name%': this.domainFriendlyName
    })}!`
+
    if (!this.isMuted) {
      this.talk(speech)
      SOCKET_SERVER.socket?.emit('is-typing', false)
    }
-    this.speeches.push(speech)
+
+    this.answers.push(speech)
  }

  /**
@ -480,6 +493,8 @@ export default class Brain {
            await SkillDomainHelper.getSkillConfig(configFilePath, this._lang)
          const utteranceHasEntities = nluResult.entities.length > 0
          const { answers: rawAnswers } = nluResult
+          // TODO: handle dialog action skill speech vs text
+          // let answers = rawAnswers as [{ answer: SkillAnswerConfigSchema }]
          let answers = rawAnswers
          let answer: string | undefined = ''

@ -505,6 +520,8 @@ export default class Brain {
                actions[nluResult.classification.action]?.unknown_answers

              if (unknownAnswers) {
+                // eslint-disable-next-line @typescript-eslint/ban-ts-comment
+                // @ts-ignore
                answer =
                  unknownAnswers[
                    Math.floor(Math.random() * unknownAnswers.length)
--- a/server/src/core/brain/types.ts
+++ b/server/src/core/brain/types.ts
@ -9,7 +9,10 @@ import type {
  NLUSlot,
  NLUSlots
 } from '@/core/nlp/types'
-import type { SkillConfigSchema } from '@/schemas/skill-schemas'
+import type {
+  SkillConfigSchema,
+  SkillAnswerConfigSchema
+} from '@/schemas/skill-schemas'
 import type { ShortLanguageCode } from '@/types'

 interface SkillCoreData {
@ -29,7 +32,7 @@ export interface SkillResult {
  slots: NLUSlots
  output: {
    codes: string[]
-    speech: string
+    answer: string
    core: SkillCoreData | undefined
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    options: Record<string, any>
@ -62,6 +65,22 @@ export interface IntentObject extends ActionParams {
  action: NLPAction
 }

+export interface SkillAnswerCoreData {
+  restart?: boolean
+  isInActionLoop?: boolean
+  showNextActionSuggestions?: boolean
+  showSuggestions?: boolean
+}
+export interface SkillAnswerOutput extends IntentObject {
+  output: {
+    codes: string
+    answer: SkillAnswerConfigSchema
+    core?: SkillAnswerCoreData
+    widget?: unknown // TODO
+    options: Record<string, string>
+  }
+}
+
 export interface BrainProcessResult extends NLUResult {
  speeches: string[]
  executionTime: number
--- a/server/src/schemas/skill-schemas.ts
+++ b/server/src/schemas/skill-schemas.ts
@ -275,4 +275,4 @@ export type SkillCustomRegexEntityTypeSchema = Static<
 export type SkillCustomEnumEntityTypeSchema = Static<
  typeof skillCustomEnumEntityType
 >
-export type AnswerConfigSchema = Static<typeof answerTypes>
+export type SkillAnswerConfigSchema = Static<typeof answerTypes>
--- a/skills/leon/good_bye/config/en.json
+++ b/skills/leon/good_bye/config/en.json
@ -12,14 +12,11 @@
        "I have to go"
      ],
      "answers": [
-        "Bye!",
-        "Bye bye!",
-        "Good bye.",
-        "Bye! Take care.",
-        "Good bye, please, take care of yourself.",
-        "Bye! Enjoy your time!",
-        "See you!",
-        "See ya!"
+        {
+          "text": "text test",
+          "speech": "speech test"
+        },
+        "Bye!"
      ]
    }
  }