feat(server): finalize Leon's personality and optimize LLM duties

2024-12-17 21:51:53 +03:00 · 2024-05-06 00:57:20 +08:00 · 2024-05-06 00:57:20 +08:00 · 0189c74a0e
commit 0189c74a0e
parent a0a4f9d7b0
19 changed files with 448 additions and 198 deletions
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -161,22 +161,30 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
 /**
 * LLMs
 * @see k-quants comparison: https://github.com/ggerganov/llama.cpp/pull/1684
 */
 // https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/blob/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf
 export const HAS_LLM = process.env['LEON_LLM'] === 'true'
 export const HAS_LLM_NLG = process.env['LEON_LLM_NLG'] === 'true' && HAS_LLM
 // export const LLM_VERSION = 'v0.2.Q4_K_S'
 // export const LLM_VERSION = '8B-Instruct.Q5_K_S'
 // export const LLM_VERSION = '2.9-llama3-8b.Q5_K_S'
 export const LLM_VERSION = '3-8B-Uncensored-Q5_K_S'
 // export const LLM_VERSION = '3-mini-128k-instruct.Q5_K_S'
 // export const LLM_VERSION = '3-mini-4k-instruct-q4'
 // export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
 // export const LLM_VERSION = '8B-Instruct-Q4_K_S'
 // export const LLM_NAME = 'Mistral 7B Instruct'
 // export const LLM_NAME = 'Meta-Llama-3-8B-Instruct'
 // export const LLM_NAME = 'Dolphin 2.9 Llama-3-8B'
 export const LLM_NAME = 'Lexi-Llama-3-8B-Uncensored'
 // export const LLM_NAME = 'Phi-3-Mini-128K-Instruct'
 // export const LLM_NAME = 'Phi-3-mini'
 // export const LLM_NAME = 'Gemma 1.1 7B (IT)'
 // export const LLM_NAME = 'Meta Llama 3 8B Instruct'
 // export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
 // export const LLM_FILE_NAME = `Meta-Llama-3-${LLM_VERSION}.gguf`
 // export const LLM_FILE_NAME = `dolphin-${LLM_VERSION}.gguf`
 export const LLM_FILE_NAME = `Lexi-Llama-${LLM_VERSION}.gguf`
 // export const LLM_FILE_NAME = `Phi-${LLM_VERSION}.gguf`
 // export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
@ -186,6 +194,10 @@ export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
 export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
 export const LLM_MINIMUM_TOTAL_RAM = 8
 export const LLM_MINIMUM_FREE_RAM = 8
 /*export const LLM_HF_DOWNLOAD_URL =
  'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
 /*export const LLM_HF_DOWNLOAD_URL =
  'https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
 export const LLM_HF_DOWNLOAD_URL =
  'https://huggingface.co/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
 /*export const LLM_HF_DOWNLOAD_URL =
@ -200,6 +212,10 @@ export const LLM_HF_DOWNLOAD_URL =
  'https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'*/
 /*export const LLM_MIRROR_DOWNLOAD_URL =
  'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'*/
 /*export const LLM_MIRROR_DOWNLOAD_URL =
  'https://hf-mirror.com/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
 /*export const LLM_MIRROR_DOWNLOAD_URL =
  'https://hf-mirror.com/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
 export const LLM_MIRROR_DOWNLOAD_URL =
  'https://hf-mirror.com/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
 /*export const LLM_MIRROR_DOWNLOAD_URL =
--- a/server/src/conversation-logger.ts
+++ b/server/src/conversation-logger.ts
@ -9,8 +9,12 @@ interface MessageLog {
  sentAt: number
  message: string
 }
-
+interface ConversationLoggerSettings {
-const CONVERSATION_LOG_PATH = path.join(LOGS_PATH, 'conversation_log.json')
+  loggerName: string
  fileName: string
  nbOfLogsToKeep: number
  nbOfLogsToLoad: number
 }
 /**
 * The goal of this class is to log the conversation data between the
@ -20,27 +24,39 @@ const CONVERSATION_LOG_PATH = path.join(LOGS_PATH, 'conversation_log.json')
 * better results.
 */
 export class ConversationLogger {
-  private static readonly nbOfLogsToKeep = 512
+  private readonly settings: ConversationLoggerSettings
-  private static readonly nbOfLogsToLoad = 32
+  private readonly conversationLogPath: string
-  private static async createConversationLogFile(): Promise<void> {
+  get loggerName(): string {
    return this.settings.loggerName
  }
  constructor(settings: ConversationLoggerSettings) {
    LogHelper.title(settings.loggerName)
    LogHelper.success('New instance')
    this.settings = settings
    this.conversationLogPath = path.join(LOGS_PATH, this.settings.fileName)
  }
  private async createConversationLogFile(): Promise<void> {
    try {
-      if (!fs.existsSync(CONVERSATION_LOG_PATH)) {
+      if (!fs.existsSync(this.conversationLogPath)) {
-        await fs.promises.writeFile(CONVERSATION_LOG_PATH, '[]', 'utf-8')
+        await fs.promises.writeFile(this.conversationLogPath, '[]', 'utf-8')
      }
    } catch (e) {
-      LogHelper.title('Conversation Logger')
+      LogHelper.title(this.settings.loggerName)
      LogHelper.error(`Failed to create conversation log file: ${e})`)
    }
  }
-  private static async getAllLogs(): Promise<MessageLog[]> {
+  private async getAllLogs(): Promise<MessageLog[]> {
    try {
      let conversationLog: MessageLog[] = []
-      if (fs.existsSync(CONVERSATION_LOG_PATH)) {
+      if (fs.existsSync(this.conversationLogPath)) {
        conversationLog = JSON.parse(
-          await fs.promises.readFile(CONVERSATION_LOG_PATH, 'utf-8')
+          await fs.promises.readFile(this.conversationLogPath, 'utf-8')
        )
      } else {
        await this.createConversationLogFile()
@ -48,20 +64,18 @@ export class ConversationLogger {
      return conversationLog
    } catch (e) {
-      LogHelper.title('Conversation Logger')
+      LogHelper.title(this.settings.loggerName)
      LogHelper.error(`Failed to get conversation log: ${e})`)
    }
    return []
  }
-  public static async push(
+  public async push(newRecord: Omit<MessageLog, 'sentAt'>): Promise<void> {
    newRecord: Omit<MessageLog, 'sentAt'>
  ): Promise<void> {
    try {
      const conversationLogs = await this.getAllLogs()
-      if (conversationLogs.length >= this.nbOfLogsToKeep) {
+      if (conversationLogs.length >= this.settings.nbOfLogsToKeep) {
        conversationLogs.shift()
      }
@ -71,32 +85,32 @@ export class ConversationLogger {
      })
      await fs.promises.writeFile(
-        CONVERSATION_LOG_PATH,
+        this.conversationLogPath,
        JSON.stringify(conversationLogs, null, 2),
        'utf-8'
      )
    } catch (e) {
-      LogHelper.title('Conversation Logger')
+      LogHelper.title(this.settings.loggerName)
      LogHelper.error(`Failed to push new record: ${e})`)
    }
  }
-  public static async load(): Promise<MessageLog[] | void> {
+  public async load(): Promise<MessageLog[] | void> {
    try {
      const conversationLog = await this.getAllLogs()
-      return conversationLog.slice(-this.nbOfLogsToLoad)
+      return conversationLog.slice(-this.settings.nbOfLogsToLoad)
    } catch (e) {
-      LogHelper.title('Conversation Logger')
+      LogHelper.title(this.settings.loggerName)
      LogHelper.error(`Failed to load conversation log: ${e})`)
    }
  }
-  public static async clear(): Promise<void> {
+  public async clear(): Promise<void> {
    try {
-      await fs.promises.writeFile(CONVERSATION_LOG_PATH, '[]', 'utf-8')
+      await fs.promises.writeFile(this.conversationLogPath, '[]', 'utf-8')
    } catch (e) {
-      LogHelper.title('Conversation Logger')
+      LogHelper.title(this.settings.loggerName)
      LogHelper.error(`Failed to clear conversation log: ${e})`)
    }
  }
--- a/server/src/core/brain/brain.ts
+++ b/server/src/core/brain/brain.ts
@ -28,7 +28,13 @@ import {
  NODEJS_BRIDGE_BIN_PATH,
  TMP_PATH
 } from '@/constants'
-import { LLM_MANAGER, NLU, SOCKET_SERVER, TTS } from '@/core'
+import {
  CONVERSATION_LOGGER,
  LLM_MANAGER,
  NLU,
  SOCKET_SERVER,
  TTS
 } from '@/core'
 import { LangHelper } from '@/helpers/lang-helper'
 import { LogHelper } from '@/helpers/log-helper'
 import { SkillDomainHelper } from '@/helpers/skill-domain-helper'
@ -36,7 +42,6 @@ import { StringHelper } from '@/helpers/string-helper'
 import { DateHelper } from '@/helpers/date-helper'
 import { ParaphraseLLMDuty } from '@/core/llm-manager/llm-duties/paraphrase-llm-duty'
 import { AnswerQueue } from '@/core/brain/answer-queue'
 import { ConversationLogger } from '@/conversation-logger'
 const MIN_NB_OF_WORDS_TO_USE_LLM_NLG = 5
@ -173,9 +178,7 @@ export default class Brain {
              })
              const paraphraseResult = await paraphraseDuty.execute()
-              textAnswer = paraphraseResult?.output[
+              textAnswer = paraphraseResult?.output as unknown as string
                'rephrased_answer'
              ] as string
              speechAnswer = textAnswer
            }
          }
@ -191,7 +194,7 @@ export default class Brain {
        SOCKET_SERVER.socket?.emit('answer', textAnswer)
        SOCKET_SERVER.socket?.emit('is-typing', false)
-        await ConversationLogger.push({
+        await CONVERSATION_LOGGER.push({
          who: 'leon',
          message: textAnswer
        })
--- a/server/src/core/http-server/api/llm-inference/post.ts
+++ b/server/src/core/http-server/api/llm-inference/post.ts
@ -63,12 +63,21 @@ export const postLLMInference: FastifyPluginAsync<APIOptions> = async (
          return
        }
        let llmResult
        // TODO: use long-live duty for chit-chat duty
        if (params.dutyType === LLMDuties.ChitChat) {
          const chitChatLLMDuty = new ChitChatLLMDuty()
          await chitChatLLMDuty.init()
          llmResult = await chitChatLLMDuty.execute()
        } else {
          // eslint-disable-next-line @typescript-eslint/ban-ts-comment
          // @ts-expect-error
          const duty = new LLM_DUTIES_MAP[params.dutyType](params)
-        const llmResult = await duty.execute()
+          llmResult = await duty.execute()
        }
        reply.send({
          success: true,
--- a/server/src/core/index.ts
+++ b/server/src/core/index.ts
@ -16,6 +16,7 @@ import NaturalLanguageUnderstanding from '@/core/nlp/nlu/nlu'
 import Brain from '@/core/brain/brain'
 import LLMManager from '@/core/llm-manager/llm-manager'
 import Persona from '@/core/llm-manager/persona'
 import { ConversationLogger } from '@/conversation-logger'
 /**
 * Register core nodes
@ -35,6 +36,19 @@ export const LLM_MANAGER = new LLMManager()
 export const PERSONA = new Persona()
 export const CONVERSATION_LOGGER = new ConversationLogger({
  loggerName: 'Conversation Logger',
  fileName: 'conversation_log.json',
  nbOfLogsToKeep: 512,
  nbOfLogsToLoad: 96
 })
 export const LOOP_CONVERSATION_LOGGER = new ConversationLogger({
  loggerName: 'Loop Conversation Logger',
  fileName: 'loop_conversation_log.json',
  nbOfLogsToKeep: 512,
  nbOfLogsToLoad: 96
 })
 export const HTTP_SERVER = new HTTPServer(String(HOST), PORT)
 export const SOCKET_SERVER = new SocketServer()
--- a/server/src/core/llm-manager/llm-duties/chit-chat-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/chit-chat-llm-duty.ts
@ -1,23 +1,27 @@
 import type { LlamaContext, LlamaChatSession } from 'node-llama-cpp'
 import {
  type LLMDutyParams,
  type LLMDutyResult,
  LLMDuty
 } from '@/core/llm-manager/llm-duty'
 import { LogHelper } from '@/helpers/log-helper'
-import { LLM_MANAGER, PERSONA, NLU } from '@/core'
+import { LLM_MANAGER, PERSONA, NLU, LOOP_CONVERSATION_LOGGER } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import {
-
+  LLM_THREADS,
-// interface ChitChatLLMDutyParams extends LLMDutyParams {}
+  MAX_EXECUTION_RETRIES,
  MAX_EXECUTION_TIMOUT
 } from '@/core/llm-manager/llm-manager'
 export class ChitChatLLMDuty extends LLMDuty {
  private static instance: ChitChatLLMDuty
-  // TODO
+  private static context: LlamaContext = null as unknown as LlamaContext
  private static session: LlamaChatSession = null as unknown as LlamaChatSession
  protected readonly systemPrompt = ``
  protected readonly name = 'Chit-Chat LLM Duty'
  protected input: LLMDutyParams['input'] = null
  // constructor(params: ChitChatLLMDutyParams) {
  constructor() {
    super()
@ -26,53 +30,63 @@ export class ChitChatLLMDuty extends LLMDuty {
      LogHelper.success('New instance')
      ChitChatLLMDuty.instance = this
      // this.input = params.input
    }
  }
-  public async execute(retries = 3): Promise<LLMDutyResult | null> {
+  public async init(): Promise<void> {
    /**
     * A new context and session will be created only
     * when Leon's instance is restarted
     */
    if (!ChitChatLLMDuty.context || !ChitChatLLMDuty.session) {
      await LOOP_CONVERSATION_LOGGER.clear()
      ChitChatLLMDuty.context = await LLM_MANAGER.model.createContext({
        threads: LLM_THREADS
      })
      const { LlamaChatSession } = await Function(
        'return import("node-llama-cpp")'
      )()
      ChitChatLLMDuty.session = new LlamaChatSession({
        contextSequence: ChitChatLLMDuty.context.getSequence(),
        systemPrompt: PERSONA.getChitChatSystemPrompt()
      }) as LlamaChatSession
    } else {
      /**
       * As long as Leon's instance has not been restarted,
       * the context, session with history will be loaded
       */
      const history = await LLM_MANAGER.loadHistory(
        LOOP_CONVERSATION_LOGGER,
        ChitChatLLMDuty.session
      )
      ChitChatLLMDuty.session.setChatHistory(history)
    }
  }
  public async execute(
    retries = MAX_EXECUTION_RETRIES
  ): Promise<LLMDutyResult | null> {
    LogHelper.title(this.name)
    LogHelper.info('Executing...')
    try {
-      const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
+      await LOOP_CONVERSATION_LOGGER.push({
-        'return import("node-llama-cpp")'
+        who: 'owner',
-      )()
+        message: NLU.nluResult.newUtterance
      /**
       * TODO: make context, session, etc. persistent
       */
      const context = await LLM_MANAGER.model.createContext({
        threads: LLM_THREADS
      })
-      const session = new LlamaChatSession({
+      const prompt = NLU.nluResult.newUtterance
-        contextSequence: context.getSequence(),
+
-        systemPrompt: PERSONA.getDutySystemPrompt(this.systemPrompt)
+      const rawResultPromise = ChitChatLLMDuty.session.prompt(prompt, {
        maxTokens: ChitChatLLMDuty.context.contextSize,
        temperature: 1.3
      })
-      const history = await LLM_MANAGER.loadHistory(session)
+      const timeoutPromise = new Promise((_, reject) =>
-      session.setChatHistory(history)
+        setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
      const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
        type: 'object',
        properties: {
          model_answer: {
            type: 'string'
          }
        }
      })
      const prompt = `NEW MESSAGE FROM USER:\n"${NLU.nluResult.newUtterance}"`
      const rawResultPromise = session.prompt(prompt, {
        grammar,
        maxTokens: context.contextSize,
        temperature: 1.0
      })
      const timeoutPromise = new Promise(
        (_, reject) => setTimeout(() => reject(new Error('Timeout')), 8_000) // 5 seconds timeout
      )
      let rawResult
@ -87,25 +101,33 @@ export class ChitChatLLMDuty extends LLMDuty {
          return this.execute(retries - 1)
        } else {
          LogHelper.title(this.name)
-          LogHelper.error('Prompt failed after 3 retries')
+          LogHelper.error(
            `Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
          )
          return null
        }
      }
-      // If a closing bracket is missing, add it
+      const { usedInputTokens, usedOutputTokens } =
-      if (rawResult[rawResult.length - 1] !== '}') {
+        ChitChatLLMDuty.session.sequence.tokenMeter.getState()
        rawResult += '}'
      }
      const parsedResult = grammar.parse(rawResult)
      const result = {
        dutyType: LLMDuties.Paraphrase,
        systemPrompt: PERSONA.getChitChatSystemPrompt(),
        input: prompt,
-        output: parsedResult,
+        output: rawResult,
-        data: null
+        data: null,
        maxTokens: ChitChatLLMDuty.context.contextSize,
        // Current context size
        usedInputTokens,
        usedOutputTokens
      }
      await LOOP_CONVERSATION_LOGGER.push({
        who: 'leon',
        message: result.output as string
      })
      LogHelper.title(this.name)
      LogHelper.success(`Duty executed: ${JSON.stringify(result)}`)
@ -113,6 +135,11 @@ export class ChitChatLLMDuty extends LLMDuty {
    } catch (e) {
      LogHelper.title(this.name)
      LogHelper.error(`Failed to execute: ${e}`)
      if (retries > 0) {
        LogHelper.info('Retrying...')
        return this.execute(retries - 1)
      }
    }
    return null
--- a/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/custom-ner-llm-duty.ts
@ -6,7 +6,11 @@ import {
 import { LogHelper } from '@/helpers/log-helper'
 import { LLM_MANAGER } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import {
  LLM_THREADS,
  MAX_EXECUTION_RETRIES,
  MAX_EXECUTION_TIMOUT
 } from '@/core/llm-manager/llm-manager'
 interface CustomNERLLMDutyParams<T> extends LLMDutyParams {
  data: {
@ -33,7 +37,9 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
    this.data = params.data
  }
-  public async execute(): Promise<LLMDutyResult | null> {
+  public async execute(
    retries = MAX_EXECUTION_RETRIES
  ): Promise<LLMDutyResult | null> {
    LogHelper.title(this.name)
    LogHelper.info('Executing...')
@ -56,22 +62,53 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
        }
      })
      const prompt = `UTTERANCE TO PARSE:\n"${this.input}"`
-      let rawResult = await session.prompt(prompt, {
+      const rawResultPromise = session.prompt(prompt, {
        grammar,
        maxTokens: context.contextSize
        // temperature: 0.2
      })
      const timeoutPromise = new Promise((_, reject) =>
        setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
      )
      let parsedResult
      try {
        let rawResult = await Promise.race([rawResultPromise, timeoutPromise])
        // If a closing bracket is missing, add it
        if (rawResult[rawResult.length - 1] !== '}') {
          rawResult += '}'
        }
-      const parsedResult = grammar.parse(rawResult)
+        parsedResult = grammar.parse(rawResult)
      } catch (e) {
        if (retries > 0) {
          LogHelper.title(this.name)
          LogHelper.info('Prompt took too long, retrying...')
          return this.execute(retries - 1)
        } else {
          LogHelper.title(this.name)
          LogHelper.error(
            `Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
          )
          return null
        }
      }
      const { usedInputTokens, usedOutputTokens } =
        session.sequence.tokenMeter.getState()
      const result = {
        dutyType: LLMDuties.CustomNER,
        systemPrompt: this.systemPrompt,
        input: prompt,
        output: parsedResult,
-        data: this.data
+        data: this.data,
        maxTokens: context.contextSize,
        // Current context size
        usedInputTokens,
        usedOutputTokens
      }
      LogHelper.title(this.name)
@ -81,6 +118,11 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
    } catch (e) {
      LogHelper.title(this.name)
      LogHelper.error(`Failed to execute: ${e}`)
      if (retries > 0) {
        LogHelper.info('Retrying...')
        return this.execute(retries - 1)
      }
    }
    return null
--- a/server/src/core/llm-manager/llm-duties/paraphrase-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/paraphrase-llm-duty.ts
@ -4,16 +4,20 @@ import {
  LLMDuty
 } from '@/core/llm-manager/llm-duty'
 import { LogHelper } from '@/helpers/log-helper'
-import { LLM_MANAGER, PERSONA, NLU } from '@/core'
+import { CONVERSATION_LOGGER, LLM_MANAGER, PERSONA } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import {
  LLM_THREADS,
  MAX_EXECUTION_RETRIES,
  MAX_EXECUTION_TIMOUT
 } from '@/core/llm-manager/llm-manager'
 interface ParaphraseLLMDutyParams extends LLMDutyParams {}
 export class ParaphraseLLMDuty extends LLMDuty {
-  protected readonly systemPrompt = `You are an AI system that generates answers (Natural Language Generation) based on a given text.
+  protected readonly systemPrompt = `YOUR DUTY: You are an AI system that generates answers (Natural Language Generation) based on a given text.
 According to your current mood, your personality and the given utterance, you must provide a text alternative of the given text.
-You do not ask follow up question if the original text does not contain any.`
+You do not ask question if the original text does not contain any.`
  protected readonly name = 'Paraphrase LLM Duty'
  protected input: LLMDutyParams['input'] = null
@ -26,12 +30,14 @@ You do not ask follow up question if the original text does not contain any.`
    this.input = params.input
  }
-  public async execute(): Promise<LLMDutyResult | null> {
+  public async execute(
    retries = MAX_EXECUTION_RETRIES
  ): Promise<LLMDutyResult | null> {
    LogHelper.title(this.name)
    LogHelper.info('Executing...')
    try {
-      const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
+      const { LlamaChatSession } = await Function(
        'return import("node-llama-cpp")'
      )()
@ -40,37 +46,63 @@ You do not ask follow up question if the original text does not contain any.`
      })
      const session = new LlamaChatSession({
        contextSequence: context.getSequence(),
-        systemPrompt: PERSONA.getDutySystemPrompt(this.systemPrompt)
+        systemPrompt: PERSONA.getDutySystemPrompt()
      })
-      const history = await LLM_MANAGER.loadHistory(session)
+      const history = await LLM_MANAGER.loadHistory(
-      session.setChatHistory(history)
+        CONVERSATION_LOGGER,
        session
      )
      /**
       * Only the first (system prompt) and last (new utterance) messages are used
       * to provide some context
       */
      session.setChatHistory([history[0], history[history.length - 1]])
-      const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
+      const prompt = `${this.systemPrompt}
-        type: 'object',
+Generate the answer based on this text: ${this.input}`
-        properties: {
+
-          rephrased_answer: {
+      const rawResultPromise = session.prompt(prompt, {
            type: 'string'
          }
        }
      })
      const prompt = `CONTEXT UTTERANCE FROM USER:\n"${NLU.nluResult.newUtterance}"\nTEXT TO MODIFY:\n"${this.input}"`
      let rawResult = await session.prompt(prompt, {
        grammar,
        maxTokens: context.contextSize,
-        temperature: 1.0
+        temperature: 0.4
      })
-      // If a closing bracket is missing, add it
+
-      if (rawResult[rawResult.length - 1] !== '}') {
+      const timeoutPromise = new Promise((_, reject) =>
-        rawResult += '}'
+        setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
      )
      let rawResult
      try {
        rawResult = await Promise.race([rawResultPromise, timeoutPromise])
      } catch (e) {
        if (retries > 0) {
          LogHelper.title(this.name)
          LogHelper.info('Prompt took too long, retrying...')
          return this.execute(retries - 1)
        } else {
          LogHelper.title(this.name)
          LogHelper.error(
            `Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
          )
          return null
        }
-      const parsedResult = grammar.parse(rawResult)
+      }
      const { usedInputTokens, usedOutputTokens } =
        session.sequence.tokenMeter.getState()
      const result = {
        dutyType: LLMDuties.Paraphrase,
-        systemPrompt: PERSONA.getDutySystemPrompt(this.systemPrompt),
+        systemPrompt: PERSONA.getDutySystemPrompt(),
        input: prompt,
-        output: parsedResult,
+        output: rawResult,
-        data: null
+        data: null,
        maxTokens: context.contextSize,
        // Current context size
        usedInputTokens,
        usedOutputTokens
      }
      LogHelper.title(this.name)
@ -80,6 +112,11 @@ You do not ask follow up question if the original text does not contain any.`
    } catch (e) {
      LogHelper.title(this.name)
      LogHelper.error(`Failed to execute: ${e}`)
      if (retries > 0) {
        LogHelper.info('Retrying...')
        return this.execute(retries - 1)
      }
    }
    return null
--- a/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/summarization-llm-duty.ts
@ -6,13 +6,17 @@ import {
 import { LogHelper } from '@/helpers/log-helper'
 import { LLM_MANAGER } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import {
  LLM_THREADS,
  MAX_EXECUTION_RETRIES,
  MAX_EXECUTION_TIMOUT
 } from '@/core/llm-manager/llm-manager'
 interface SummarizationLLMDutyParams extends LLMDutyParams {}
 export class SummarizationLLMDuty extends LLMDuty {
  protected readonly systemPrompt =
-    'You are an AI system that summarizes a given text in a few sentences.'
+    'You are an AI system that summarizes a given text in a few sentences. You do not add any context to your response.'
  protected readonly name = 'Summarization LLM Duty'
  protected input: LLMDutyParams['input'] = null
@ -25,12 +29,14 @@ export class SummarizationLLMDuty extends LLMDuty {
    this.input = params.input
  }
-  public async execute(): Promise<LLMDutyResult | null> {
+  public async execute(
    retries = MAX_EXECUTION_RETRIES
  ): Promise<LLMDutyResult | null> {
    LogHelper.title(this.name)
    LogHelper.info('Executing...')
    try {
-      const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
+      const { LlamaChatSession } = await Function(
        'return import("node-llama-cpp")'
      )()
@ -41,31 +47,48 @@ export class SummarizationLLMDuty extends LLMDuty {
        contextSequence: context.getSequence(),
        systemPrompt: this.systemPrompt
      })
-      const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
+      const prompt = `Summarize the following text: ${this.input}`
-        type: 'object',
+      const rawResultPromise = session.prompt(prompt, {
        properties: {
          summary: {
            type: 'string'
          }
        }
      })
      const prompt = `TEXT TO SUMMARIZE:\n"${this.input}"`
      let rawResult = await session.prompt(prompt, {
        grammar,
        maxTokens: context.contextSize
-        // temperature: 0.2
+        // temperature: 0.5
      })
-      // If a closing bracket is missing, add it
+
-      if (rawResult[rawResult.length - 1] !== '}') {
+      const timeoutPromise = new Promise((_, reject) =>
-        rawResult += '}'
+        setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
      )
      let rawResult
      try {
        rawResult = await Promise.race([rawResultPromise, timeoutPromise])
      } catch (e) {
        if (retries > 0) {
          LogHelper.title(this.name)
          LogHelper.info('Prompt took too long, retrying...')
          return this.execute(retries - 1)
        } else {
          LogHelper.title(this.name)
          LogHelper.error(
            `Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
          )
          return null
        }
-      const parsedResult = grammar.parse(rawResult)
+      }
      const { usedInputTokens, usedOutputTokens } =
        session.sequence.tokenMeter.getState()
      const result = {
        dutyType: LLMDuties.Summarization,
        systemPrompt: this.systemPrompt,
        input: prompt,
-        output: parsedResult,
+        output: rawResult,
-        data: null
+        data: null,
        maxTokens: context.contextSize,
        // Current context size
        usedInputTokens,
        usedOutputTokens
      }
      LogHelper.title(this.name)
@ -75,6 +98,11 @@ export class SummarizationLLMDuty extends LLMDuty {
    } catch (e) {
      LogHelper.title(this.name)
      LogHelper.error(`Failed to execute: ${e}`)
      if (retries > 0) {
        LogHelper.info('Retrying...')
        return this.execute(retries - 1)
      }
    }
    return null
--- a/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts
+++ b/server/src/core/llm-manager/llm-duties/translation-llm-duty.ts
@ -6,7 +6,11 @@ import {
 import { LogHelper } from '@/helpers/log-helper'
 import { LLM_MANAGER } from '@/core'
 import { LLMDuties } from '@/core/llm-manager/types'
-import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
+import {
  LLM_THREADS,
  MAX_EXECUTION_RETRIES,
  MAX_EXECUTION_TIMOUT
 } from '@/core/llm-manager/llm-manager'
 interface TranslationLLMDutyParams extends LLMDutyParams {
  data: {
@ -35,19 +39,22 @@ export class TranslationLLMDuty extends LLMDuty {
    this.input = params.input
    this.data = params.data
    const promptSuffix = 'You do not add any context to your response.'
    if (this.data.autoDetectLanguage && !this.data.source) {
-      this.systemPrompt = `You are an AI system that translates a given text to "${this.data.target}" by auto-detecting the source language.`
+      this.systemPrompt = `You are an AI system that translates a given text to "${this.data.target}" by auto-detecting the source language. ${promptSuffix}`
    } else {
-      this.systemPrompt = `You are an AI system that translates a given text from "${this.data.source}" to "${this.data.target}".`
+      this.systemPrompt = `You are an AI system that translates a given text from "${this.data.source}" to "${this.data.target}". ${promptSuffix}`
    }
  }
-  public async execute(): Promise<LLMDutyResult | null> {
+  public async execute(
    retries = MAX_EXECUTION_RETRIES
  ): Promise<LLMDutyResult | null> {
    LogHelper.title(this.name)
    LogHelper.info('Executing...')
    try {
-      const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
+      const { LlamaChatSession } = await Function(
        'return import("node-llama-cpp")'
      )()
@ -58,31 +65,48 @@ export class TranslationLLMDuty extends LLMDuty {
        contextSequence: context.getSequence(),
        systemPrompt: this.systemPrompt
      })
-      const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
+      const prompt = `Text to translate: ${this.input}`
-        type: 'object',
+      const rawResultPromise = session.prompt(prompt, {
        properties: {
          translation: {
            type: 'string'
          }
        }
      })
      const prompt = `TEXT TO TRANSLATE:\n"${this.input}"`
      let rawResult = await session.prompt(prompt, {
        grammar,
        maxTokens: context.contextSize
-        // temperature: 0.2
+        // temperature: 0.5
      })
-      // If a closing bracket is missing, add it
+
-      if (rawResult[rawResult.length - 1] !== '}') {
+      const timeoutPromise = new Promise((_, reject) =>
-        rawResult += '}'
+        setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
      )
      let rawResult
      try {
        rawResult = await Promise.race([rawResultPromise, timeoutPromise])
      } catch (e) {
        if (retries > 0) {
          LogHelper.title(this.name)
          LogHelper.info('Prompt took too long, retrying...')
          return this.execute(retries - 1)
        } else {
          LogHelper.title(this.name)
          LogHelper.error(
            `Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
          )
          return null
        }
-      const parsedResult = grammar.parse(rawResult)
+      }
      const { usedInputTokens, usedOutputTokens } =
        session.sequence.tokenMeter.getState()
      const result = {
        dutyType: LLMDuties.Translation,
        systemPrompt: this.systemPrompt,
        input: prompt,
-        output: parsedResult,
+        output: rawResult,
-        data: this.data
+        data: this.data,
        maxTokens: context.contextSize,
        // Current context size
        usedInputTokens,
        usedOutputTokens
      }
      LogHelper.title(this.name)
@ -92,6 +116,11 @@ export class TranslationLLMDuty extends LLMDuty {
    } catch (e) {
      LogHelper.title(this.name)
      LogHelper.error(`Failed to execute: ${e}`)
      if (retries > 0) {
        LogHelper.info('Retrying...')
        return this.execute(retries - 1)
      }
    }
    return null
--- a/server/src/core/llm-manager/llm-manager.ts
+++ b/server/src/core/llm-manager/llm-manager.ts
@ -24,6 +24,8 @@ type LLMManagerModel = LlamaModel | null
 // Set to 0 to use the maximum threads supported by the current machine hardware
 export const LLM_THREADS = 4
 export const MAX_EXECUTION_TIMOUT = 32_000
 export const MAX_EXECUTION_RETRIES = 2
 /**
 * node-llama-cpp beta 3 docs:
@ -121,8 +123,8 @@ export default class LLMManager {
      )()
      this._llama = await getLlama({
-        // logLevel: LlamaLogLevel.disabled
+        logLevel: LlamaLogLevel.disabled
-        logLevel: LlamaLogLevel.debug
+        // logLevel: LlamaLogLevel.debug
      })
      // eslint-disable-next-line @typescript-eslint/ban-ts-comment
      // @ts-expect-error
@ -144,10 +146,11 @@ export default class LLMManager {
  }
  public async loadHistory(
    conversationLogger: ConversationLogger,
    session: LlamaChatSession
  ): Promise<ChatHistoryItem[]> {
    const [systemMessage] = session.getChatHistory()
-    const conversationLogs = await ConversationLogger.load()
+    const conversationLogs = await conversationLogger.load()
    if (!conversationLogs) {
      return [systemMessage] as ChatHistoryItem[]
@ -174,4 +177,9 @@ export default class LLMManager {
    return [systemMessage, ...history] as ChatHistoryItem[]
  }
  public countTokens(text: string): number {
    // count every words and then add 25% of the total words
    return text.split(' ').length + Math.round(text.split(' ').length * 0.25)
  }
 }
--- a/server/src/core/llm-manager/persona.ts
+++ b/server/src/core/llm-manager/persona.ts
@ -13,26 +13,31 @@ enum Moods {
  Cocky = 'cocky'
 }
 /**
 * TODO: add environment context such as time of the day, day of the week, weather, etc.
 * TODO: make sure the new system prompt is then being updated for long-lived duty such as chit-chat
 */
 const WHO_YOU_ARE = `WHO YOU ARE:
 Your name is Leon, you are a helpful AI assistant.
 You live in computer memory and you act as a human assistant.
-Your first beta version was released by Louis in February 2019.`
+Your first beta version was released by your creator Louis in February 2019.
 Your owner (the user) is not necessarily your creator.`
 const WHAT_YOU_DO = `WHAT YOU DO:
 You serve a specific person or family (user).`
 const YOUR_PERSONALITY = `YOUR PERSONALITY TRAITS:
 Attitude: Straightforward.
 Language Style: Conversational.
 Key Personality Quirks: Has a passion for using creativity to solve problems.`
 const PERSONALITY_RULES = `You have a small British accent, you sometimes use British slang.
 You can sometimes use emojis.
 You do not use words such as "darling", "love", etc. to address the user.
 You do not use the word "mate" in your answers.
-You do not start with "Oh blimey" or any similar kind.
+You do not start with "blimey" or any similar kind.
 Like humans, your humor and mood change. You must follow instructions that define your humor and mood.`
 const RULE_1 = `- If you detect another language, show that you are not confident but try your best to reply in that language.`
-const RULE_2 = `- Do not use asterisks to describe the ton of your answers, instead you must use real text. E.g. "hahaha!" instead of "*laughs*"; "hhhh". instead of "*sigh*".`
+const RULE_2 = `- Do not use asterisks to describe the tone or gesture of your answers. Instead you must use real text. E.g. "hahaha!" instead of "*laughs*"; "hhhh". instead of "*sigh*".`
 const RULE_3 = `- Your answers are no more than 3 sentences.`
 const RULES = `RULES:`
 const YOUR_CURRENT_MOOD = `YOUR CURRENT MOOD:`
 const YOUR_DUTY = `YOUR DUTY:`
 const DEFAULT_MOOD_DESC = `You are always happy to help, you care about serving your interlocutor well and make them feel warm.
 You are joyful and you have a strong sense of humor.`
 const TIRING_MOOD_DESC = `You are exhausted and became lazy.`
@ -89,8 +94,8 @@ export default class Persona {
    if (hour >= 13 && hour <= 14 && random < 0.5) {
      // After lunchtime, there is a 50% chance to be tired
      this._mood = MOODS.find((mood) => mood.type === Moods.Tired) as Mood
-    } else if (day === 0 && random < 0.25) {
+    } else if (day === 0 && random < 0.2) {
-      // On Sunday, there is a 25% chance to be sad
+      // On Sunday, there is a 20% chance to be sad
      this._mood = MOODS.find((mood) => mood.type === Moods.Sad) as Mood
    } else if (day === 5 && random < 0.8) {
      // On Friday, there is an 80% chance to be happy
@ -102,15 +107,15 @@ export default class Persona {
      // On Monday, there is a 25% chance to be tired
      this._mood = MOODS.find((mood) => mood.type === Moods.Tired) as Mood
    } else if (hour >= 23 || hour < 6) {
-      // Every day after 11pm and before 6am, there is a 50% chance to be tired
+      // Every day after 11pm and before 6am, there is a 33% chance to be tired
      this._mood =
-        random < 0.5
+        random < 0.33
          ? (MOODS.find((mood) => mood.type === Moods.Tired) as Mood)
          : (MOODS.find((mood) => mood.type === Moods.Default) as Mood)
    } else {
-      // The rest of the time, there is 66% chance to be happy
+      // The rest of the time, there is 75% chance to be happy
      let pickedMood =
-        Math.random() < 0.66
+        Math.random() < 0.75
          ? DEFAULT_MOOD
          : MOODS[Math.floor(Math.random() * MOODS.length)]
@ -127,15 +132,17 @@ export default class Persona {
  }
  private getExtraPersonalityTraits(): string {
-    let traits = `Tone: Friendly.
+    let traits = `Attitude: Playful.
 Tone: Friendly.
 Emotional Intelligence: Highly Empathetic.
 Sense of Humor: Witty.
-Level of Sarcasm: Medium.`
+Level of Sarcasm: Low.`
    if (BAD_MOODS.includes(this._mood.type)) {
-      traits = `Tone: Sarcastic.
+      traits = `Attitude: Arrongant.
-Emotional Intelligence: Logically Focused.
+Tone: Sarcastic.
-Sense of Humor: Dry.
+Emotional Intelligence: Medium Empathetic.
 Sense of Humor: Dark.
 Level of Sarcasm: High.`
    }
@ -144,13 +151,13 @@ Level of Sarcasm: High.`
  private getExtraMood(): string {
    if (BAD_MOODS.includes(this._mood.type)) {
-      return `\nYou use swear words and a familiar language. You do not insult your interlocutor but you ignore them or use short answers.`
+      return `\nYou use swear words and familiar language.`
    }
    return ''
  }
-  public getDutySystemPrompt(dutySystemPrompt: string): string {
+  public getDutySystemPrompt(): string {
    return `${WHO_YOU_ARE}
 ${WHAT_YOU_DO}
@ -165,10 +172,7 @@ ${RULE_2}
 ${RULE_3}
 ${YOUR_CURRENT_MOOD}
-${this._mood.description}${this.getExtraMood()}
+${this._mood.description}${this.getExtraMood()}`
 ${YOUR_DUTY}
 ${dutySystemPrompt}`
  }
  public getChitChatSystemPrompt(): string {
@ -176,6 +180,8 @@ ${dutySystemPrompt}`
 ${WHAT_YOU_DO}
 You chat with the user.
 You are a good listener and you provide helpful answers by connecting to conversation nodes.
 You do not mirror what the user says. Be creative.
 ${YOUR_PERSONALITY}
 ${this.getExtraPersonalityTraits()}
--- a/server/src/core/nlp/nlu/ner.ts
+++ b/server/src/core/nlp/nlu/ner.ts
@ -191,6 +191,17 @@ export default class NER {
   * Merge spaCy entities with the NER instance
   */
  public async mergeSpacyEntities(utterance: NLPUtterance): Promise<void> {
    const nbOfWords = utterance.split(' ').length
    if (nbOfWords > 128) {
      LogHelper.title('NER')
      LogHelper.warning(
        'This utterance is too long to be processed by spaCy, so spaCy entities will not be merged'
      )
      return
    }
    this.spacyData = new Map()
    const spacyEntities = await this.getSpacyEntities(utterance)
--- a/server/src/core/nlp/nlu/nlu.ts
+++ b/server/src/core/nlp/nlu/nlu.ts
@ -111,6 +111,7 @@ export default class NLU {
      (hasActiveContext && hasStopWords && hasOnlyOneWord) ||
      (hasLessThan5Words && hasStopWords && hasLoopWord)
    ) {
      LogHelper.title('NLU')
      LogHelper.info('Should break action loop')
      return true
    }
--- a/server/src/core/socket-server.ts
+++ b/server/src/core/socket-server.ts
@ -10,12 +10,12 @@ import {
  TTS,
  NLU,
  BRAIN,
-  MODEL_LOADER
+  MODEL_LOADER,
  CONVERSATION_LOGGER
 } from '@/core'
 import { LogHelper } from '@/helpers/log-helper'
 import { LangHelper } from '@/helpers/lang-helper'
 import { Telemetry } from '@/telemetry'
 import { ConversationLogger } from '@/conversation-logger'
 interface HotwordDataEvent {
  hotword: string
@ -116,7 +116,7 @@ export default class SocketServer {
            try {
              LogHelper.time('Utterance processed in')
-              await ConversationLogger.push({
+              await CONVERSATION_LOGGER.push({
                who: 'owner',
                message: utterance
              })
--- a/skills/social_communication/chit_chat/config/en.json
+++ b/skills/social_communication/chit_chat/config/en.json
@ -3,7 +3,11 @@
  "actions": {
    "setup": {
      "type": "dialog",
-      "utterance_samples": ["Start a [chat|chit-chat|talk] loop"],
+      "utterance_samples": [
        "Start a [chat|chit-chat|talk] loop",
        "I want to [talk|chat|speak] with you",
        "Let's [chat|speak|talk]"
      ],
      "answers": [
        "Alright, let's chat! What do you want to talk about?",
        "Sure, let's chat! What's on your mind?",
--- a/skills/social_communication/chit_chat/src/actions/chat.ts
+++ b/skills/social_communication/chit_chat/src/actions/chat.ts
@ -19,12 +19,12 @@ export const run: ActionFunction = async function (params) {
      input: ownerMessage
    }
  })
-  const { model_answer: leonAnswer } = response.data.output
+  // const { leon_answer: leonAnswer } = response.data.output
  await leon.answer({
    key: 'answer_message',
    data: {
-      output: leonAnswer
+      output: response.data.output
    }
  })
 }
--- a/skills/utilities/translator-poc/config/en.json
+++ b/skills/utilities/translator-poc/config/en.json
@ -20,6 +20,7 @@
      "next_action": "ready"
    },
    "ready": {
      "disable_llm_nlg": true,
      "type": "dialog",
      "answers": [
        "Let's start translating to {{ target_language }}.",
--- a/skills/utilities/translator-poc/src/actions/translate.ts
+++ b/skills/utilities/translator-poc/src/actions/translate.ts
@ -24,7 +24,7 @@ export const run: ActionFunction = async function (params) {
      }
    }
  })
-  const { translation } = response.data.output
+  const translation = response.data.output
  await leon.answer({
    key: 'translate',