1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-11-23 20:12:08 +03:00

feat(server): finalize Leon's personality and optimize LLM duties

This commit is contained in:
louistiti 2024-05-06 00:57:20 +08:00
parent a0a4f9d7b0
commit 0189c74a0e
No known key found for this signature in database
GPG Key ID: 92CD6A2E497E1669
19 changed files with 448 additions and 198 deletions

View File

@ -161,22 +161,30 @@ export const LEON_FILE_PATH = path.join(process.cwd(), 'leon.json')
/**
* LLMs
* @see k-quants comparison: https://github.com/ggerganov/llama.cpp/pull/1684
*/
// https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/blob/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf
export const HAS_LLM = process.env['LEON_LLM'] === 'true'
export const HAS_LLM_NLG = process.env['LEON_LLM_NLG'] === 'true' && HAS_LLM
// export const LLM_VERSION = 'v0.2.Q4_K_S'
// export const LLM_VERSION = '8B-Instruct.Q5_K_S'
// export const LLM_VERSION = '2.9-llama3-8b.Q5_K_S'
export const LLM_VERSION = '3-8B-Uncensored-Q5_K_S'
// export const LLM_VERSION = '3-mini-128k-instruct.Q5_K_S'
// export const LLM_VERSION = '3-mini-4k-instruct-q4'
// export const LLM_VERSION = '1.1-7b-it-Q4_K_M'
// export const LLM_VERSION = '8B-Instruct-Q4_K_S'
// export const LLM_NAME = 'Mistral 7B Instruct'
// export const LLM_NAME = 'Meta-Llama-3-8B-Instruct'
// export const LLM_NAME = 'Dolphin 2.9 Llama-3-8B'
export const LLM_NAME = 'Lexi-Llama-3-8B-Uncensored'
// export const LLM_NAME = 'Phi-3-Mini-128K-Instruct'
// export const LLM_NAME = 'Phi-3-mini'
// export const LLM_NAME = 'Gemma 1.1 7B (IT)'
// export const LLM_NAME = 'Meta Llama 3 8B Instruct'
// export const LLM_FILE_NAME = `mistral-7b-instruct-${LLM_VERSION}.gguf`
// export const LLM_FILE_NAME = `Meta-Llama-3-${LLM_VERSION}.gguf`
// export const LLM_FILE_NAME = `dolphin-${LLM_VERSION}.gguf`
export const LLM_FILE_NAME = `Lexi-Llama-${LLM_VERSION}.gguf`
// export const LLM_FILE_NAME = `Phi-${LLM_VERSION}.gguf`
// export const LLM_FILE_NAME = `gemma-${LLM_VERSION}.gguf`
@ -186,6 +194,10 @@ export const LLM_DIR_PATH = path.join(MODELS_PATH, 'llm')
export const LLM_PATH = path.join(LLM_DIR_PATH, LLM_FILE_NAME)
export const LLM_MINIMUM_TOTAL_RAM = 8
export const LLM_MINIMUM_FREE_RAM = 8
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
/*export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
/*export const LLM_HF_DOWNLOAD_URL =
@ -200,6 +212,10 @@ export const LLM_HF_DOWNLOAD_URL =
'https://huggingface.co/bartowski/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct-Q4_K_S.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/bartowski/gemma-1.1-7b-it-GGUF/resolve/main/gemma-1.1-7b-it-Q4_K_M.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/Meta-Llama-3-8B-Instruct.Q5_K_S.gguf?download=true'*/
/*export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/QuantFactory/dolphin-2.9-llama3-8b-GGUF/resolve/main/dolphin-2.9-llama3-8b.Q5_K_S.gguf?download=true'*/
export const LLM_MIRROR_DOWNLOAD_URL =
'https://hf-mirror.com/bartowski/Lexi-Llama-3-8B-Uncensored-GGUF/resolve/main/Lexi-Llama-3-8B-Uncensored-Q5_K_S.gguf?download=true'
/*export const LLM_MIRROR_DOWNLOAD_URL =

View File

@ -9,8 +9,12 @@ interface MessageLog {
sentAt: number
message: string
}
const CONVERSATION_LOG_PATH = path.join(LOGS_PATH, 'conversation_log.json')
interface ConversationLoggerSettings {
loggerName: string
fileName: string
nbOfLogsToKeep: number
nbOfLogsToLoad: number
}
/**
* The goal of this class is to log the conversation data between the
@ -20,27 +24,39 @@ const CONVERSATION_LOG_PATH = path.join(LOGS_PATH, 'conversation_log.json')
* better results.
*/
export class ConversationLogger {
private static readonly nbOfLogsToKeep = 512
private static readonly nbOfLogsToLoad = 32
private readonly settings: ConversationLoggerSettings
private readonly conversationLogPath: string
private static async createConversationLogFile(): Promise<void> {
get loggerName(): string {
return this.settings.loggerName
}
constructor(settings: ConversationLoggerSettings) {
LogHelper.title(settings.loggerName)
LogHelper.success('New instance')
this.settings = settings
this.conversationLogPath = path.join(LOGS_PATH, this.settings.fileName)
}
private async createConversationLogFile(): Promise<void> {
try {
if (!fs.existsSync(CONVERSATION_LOG_PATH)) {
await fs.promises.writeFile(CONVERSATION_LOG_PATH, '[]', 'utf-8')
if (!fs.existsSync(this.conversationLogPath)) {
await fs.promises.writeFile(this.conversationLogPath, '[]', 'utf-8')
}
} catch (e) {
LogHelper.title('Conversation Logger')
LogHelper.title(this.settings.loggerName)
LogHelper.error(`Failed to create conversation log file: ${e})`)
}
}
private static async getAllLogs(): Promise<MessageLog[]> {
private async getAllLogs(): Promise<MessageLog[]> {
try {
let conversationLog: MessageLog[] = []
if (fs.existsSync(CONVERSATION_LOG_PATH)) {
if (fs.existsSync(this.conversationLogPath)) {
conversationLog = JSON.parse(
await fs.promises.readFile(CONVERSATION_LOG_PATH, 'utf-8')
await fs.promises.readFile(this.conversationLogPath, 'utf-8')
)
} else {
await this.createConversationLogFile()
@ -48,20 +64,18 @@ export class ConversationLogger {
return conversationLog
} catch (e) {
LogHelper.title('Conversation Logger')
LogHelper.title(this.settings.loggerName)
LogHelper.error(`Failed to get conversation log: ${e})`)
}
return []
}
public static async push(
newRecord: Omit<MessageLog, 'sentAt'>
): Promise<void> {
public async push(newRecord: Omit<MessageLog, 'sentAt'>): Promise<void> {
try {
const conversationLogs = await this.getAllLogs()
if (conversationLogs.length >= this.nbOfLogsToKeep) {
if (conversationLogs.length >= this.settings.nbOfLogsToKeep) {
conversationLogs.shift()
}
@ -71,32 +85,32 @@ export class ConversationLogger {
})
await fs.promises.writeFile(
CONVERSATION_LOG_PATH,
this.conversationLogPath,
JSON.stringify(conversationLogs, null, 2),
'utf-8'
)
} catch (e) {
LogHelper.title('Conversation Logger')
LogHelper.title(this.settings.loggerName)
LogHelper.error(`Failed to push new record: ${e})`)
}
}
public static async load(): Promise<MessageLog[] | void> {
public async load(): Promise<MessageLog[] | void> {
try {
const conversationLog = await this.getAllLogs()
return conversationLog.slice(-this.nbOfLogsToLoad)
return conversationLog.slice(-this.settings.nbOfLogsToLoad)
} catch (e) {
LogHelper.title('Conversation Logger')
LogHelper.title(this.settings.loggerName)
LogHelper.error(`Failed to load conversation log: ${e})`)
}
}
public static async clear(): Promise<void> {
public async clear(): Promise<void> {
try {
await fs.promises.writeFile(CONVERSATION_LOG_PATH, '[]', 'utf-8')
await fs.promises.writeFile(this.conversationLogPath, '[]', 'utf-8')
} catch (e) {
LogHelper.title('Conversation Logger')
LogHelper.title(this.settings.loggerName)
LogHelper.error(`Failed to clear conversation log: ${e})`)
}
}

View File

@ -28,7 +28,13 @@ import {
NODEJS_BRIDGE_BIN_PATH,
TMP_PATH
} from '@/constants'
import { LLM_MANAGER, NLU, SOCKET_SERVER, TTS } from '@/core'
import {
CONVERSATION_LOGGER,
LLM_MANAGER,
NLU,
SOCKET_SERVER,
TTS
} from '@/core'
import { LangHelper } from '@/helpers/lang-helper'
import { LogHelper } from '@/helpers/log-helper'
import { SkillDomainHelper } from '@/helpers/skill-domain-helper'
@ -36,7 +42,6 @@ import { StringHelper } from '@/helpers/string-helper'
import { DateHelper } from '@/helpers/date-helper'
import { ParaphraseLLMDuty } from '@/core/llm-manager/llm-duties/paraphrase-llm-duty'
import { AnswerQueue } from '@/core/brain/answer-queue'
import { ConversationLogger } from '@/conversation-logger'
const MIN_NB_OF_WORDS_TO_USE_LLM_NLG = 5
@ -173,9 +178,7 @@ export default class Brain {
})
const paraphraseResult = await paraphraseDuty.execute()
textAnswer = paraphraseResult?.output[
'rephrased_answer'
] as string
textAnswer = paraphraseResult?.output as unknown as string
speechAnswer = textAnswer
}
}
@ -191,7 +194,7 @@ export default class Brain {
SOCKET_SERVER.socket?.emit('answer', textAnswer)
SOCKET_SERVER.socket?.emit('is-typing', false)
await ConversationLogger.push({
await CONVERSATION_LOGGER.push({
who: 'leon',
message: textAnswer
})

View File

@ -63,12 +63,21 @@ export const postLLMInference: FastifyPluginAsync<APIOptions> = async (
return
}
let llmResult
// TODO: use long-live duty for chit-chat duty
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
const duty = new LLM_DUTIES_MAP[params.dutyType](params)
const llmResult = await duty.execute()
if (params.dutyType === LLMDuties.ChitChat) {
const chitChatLLMDuty = new ChitChatLLMDuty()
await chitChatLLMDuty.init()
llmResult = await chitChatLLMDuty.execute()
} else {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
const duty = new LLM_DUTIES_MAP[params.dutyType](params)
llmResult = await duty.execute()
}
reply.send({
success: true,

View File

@ -16,6 +16,7 @@ import NaturalLanguageUnderstanding from '@/core/nlp/nlu/nlu'
import Brain from '@/core/brain/brain'
import LLMManager from '@/core/llm-manager/llm-manager'
import Persona from '@/core/llm-manager/persona'
import { ConversationLogger } from '@/conversation-logger'
/**
* Register core nodes
@ -35,6 +36,19 @@ export const LLM_MANAGER = new LLMManager()
export const PERSONA = new Persona()
export const CONVERSATION_LOGGER = new ConversationLogger({
loggerName: 'Conversation Logger',
fileName: 'conversation_log.json',
nbOfLogsToKeep: 512,
nbOfLogsToLoad: 96
})
export const LOOP_CONVERSATION_LOGGER = new ConversationLogger({
loggerName: 'Loop Conversation Logger',
fileName: 'loop_conversation_log.json',
nbOfLogsToKeep: 512,
nbOfLogsToLoad: 96
})
export const HTTP_SERVER = new HTTPServer(String(HOST), PORT)
export const SOCKET_SERVER = new SocketServer()

View File

@ -1,23 +1,27 @@
import type { LlamaContext, LlamaChatSession } from 'node-llama-cpp'
import {
type LLMDutyParams,
type LLMDutyResult,
LLMDuty
} from '@/core/llm-manager/llm-duty'
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER, PERSONA, NLU } from '@/core'
import { LLM_MANAGER, PERSONA, NLU, LOOP_CONVERSATION_LOGGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
// interface ChitChatLLMDutyParams extends LLMDutyParams {}
import {
LLM_THREADS,
MAX_EXECUTION_RETRIES,
MAX_EXECUTION_TIMOUT
} from '@/core/llm-manager/llm-manager'
export class ChitChatLLMDuty extends LLMDuty {
private static instance: ChitChatLLMDuty
// TODO
private static context: LlamaContext = null as unknown as LlamaContext
private static session: LlamaChatSession = null as unknown as LlamaChatSession
protected readonly systemPrompt = ``
protected readonly name = 'Chit-Chat LLM Duty'
protected input: LLMDutyParams['input'] = null
// constructor(params: ChitChatLLMDutyParams) {
constructor() {
super()
@ -26,53 +30,63 @@ export class ChitChatLLMDuty extends LLMDuty {
LogHelper.success('New instance')
ChitChatLLMDuty.instance = this
// this.input = params.input
}
}
public async execute(retries = 3): Promise<LLMDutyResult | null> {
public async init(): Promise<void> {
/**
* A new context and session will be created only
* when Leon's instance is restarted
*/
if (!ChitChatLLMDuty.context || !ChitChatLLMDuty.session) {
await LOOP_CONVERSATION_LOGGER.clear()
ChitChatLLMDuty.context = await LLM_MANAGER.model.createContext({
threads: LLM_THREADS
})
const { LlamaChatSession } = await Function(
'return import("node-llama-cpp")'
)()
ChitChatLLMDuty.session = new LlamaChatSession({
contextSequence: ChitChatLLMDuty.context.getSequence(),
systemPrompt: PERSONA.getChitChatSystemPrompt()
}) as LlamaChatSession
} else {
/**
* As long as Leon's instance has not been restarted,
* the context, session with history will be loaded
*/
const history = await LLM_MANAGER.loadHistory(
LOOP_CONVERSATION_LOGGER,
ChitChatLLMDuty.session
)
ChitChatLLMDuty.session.setChatHistory(history)
}
}
public async execute(
retries = MAX_EXECUTION_RETRIES
): Promise<LLMDutyResult | null> {
LogHelper.title(this.name)
LogHelper.info('Executing...')
try {
const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
'return import("node-llama-cpp")'
)()
/**
* TODO: make context, session, etc. persistent
*/
const context = await LLM_MANAGER.model.createContext({
threads: LLM_THREADS
await LOOP_CONVERSATION_LOGGER.push({
who: 'owner',
message: NLU.nluResult.newUtterance
})
const session = new LlamaChatSession({
contextSequence: context.getSequence(),
systemPrompt: PERSONA.getDutySystemPrompt(this.systemPrompt)
const prompt = NLU.nluResult.newUtterance
const rawResultPromise = ChitChatLLMDuty.session.prompt(prompt, {
maxTokens: ChitChatLLMDuty.context.contextSize,
temperature: 1.3
})
const history = await LLM_MANAGER.loadHistory(session)
session.setChatHistory(history)
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
type: 'object',
properties: {
model_answer: {
type: 'string'
}
}
})
const prompt = `NEW MESSAGE FROM USER:\n"${NLU.nluResult.newUtterance}"`
const rawResultPromise = session.prompt(prompt, {
grammar,
maxTokens: context.contextSize,
temperature: 1.0
})
const timeoutPromise = new Promise(
(_, reject) => setTimeout(() => reject(new Error('Timeout')), 8_000) // 5 seconds timeout
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
)
let rawResult
@ -87,25 +101,33 @@ export class ChitChatLLMDuty extends LLMDuty {
return this.execute(retries - 1)
} else {
LogHelper.title(this.name)
LogHelper.error('Prompt failed after 3 retries')
LogHelper.error(
`Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
)
return null
}
}
// If a closing bracket is missing, add it
if (rawResult[rawResult.length - 1] !== '}') {
rawResult += '}'
}
const parsedResult = grammar.parse(rawResult)
const { usedInputTokens, usedOutputTokens } =
ChitChatLLMDuty.session.sequence.tokenMeter.getState()
const result = {
dutyType: LLMDuties.Paraphrase,
systemPrompt: PERSONA.getChitChatSystemPrompt(),
input: prompt,
output: parsedResult,
data: null
output: rawResult,
data: null,
maxTokens: ChitChatLLMDuty.context.contextSize,
// Current context size
usedInputTokens,
usedOutputTokens
}
await LOOP_CONVERSATION_LOGGER.push({
who: 'leon',
message: result.output as string
})
LogHelper.title(this.name)
LogHelper.success(`Duty executed: ${JSON.stringify(result)}`)
@ -113,6 +135,11 @@ export class ChitChatLLMDuty extends LLMDuty {
} catch (e) {
LogHelper.title(this.name)
LogHelper.error(`Failed to execute: ${e}`)
if (retries > 0) {
LogHelper.info('Retrying...')
return this.execute(retries - 1)
}
}
return null

View File

@ -6,7 +6,11 @@ import {
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
import {
LLM_THREADS,
MAX_EXECUTION_RETRIES,
MAX_EXECUTION_TIMOUT
} from '@/core/llm-manager/llm-manager'
interface CustomNERLLMDutyParams<T> extends LLMDutyParams {
data: {
@ -33,7 +37,9 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
this.data = params.data
}
public async execute(): Promise<LLMDutyResult | null> {
public async execute(
retries = MAX_EXECUTION_RETRIES
): Promise<LLMDutyResult | null> {
LogHelper.title(this.name)
LogHelper.info('Executing...')
@ -56,22 +62,53 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
}
})
const prompt = `UTTERANCE TO PARSE:\n"${this.input}"`
let rawResult = await session.prompt(prompt, {
const rawResultPromise = session.prompt(prompt, {
grammar,
maxTokens: context.contextSize
// temperature: 0.2
})
// If a closing bracket is missing, add it
if (rawResult[rawResult.length - 1] !== '}') {
rawResult += '}'
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
)
let parsedResult
try {
let rawResult = await Promise.race([rawResultPromise, timeoutPromise])
// If a closing bracket is missing, add it
if (rawResult[rawResult.length - 1] !== '}') {
rawResult += '}'
}
parsedResult = grammar.parse(rawResult)
} catch (e) {
if (retries > 0) {
LogHelper.title(this.name)
LogHelper.info('Prompt took too long, retrying...')
return this.execute(retries - 1)
} else {
LogHelper.title(this.name)
LogHelper.error(
`Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
)
return null
}
}
const parsedResult = grammar.parse(rawResult)
const { usedInputTokens, usedOutputTokens } =
session.sequence.tokenMeter.getState()
const result = {
dutyType: LLMDuties.CustomNER,
systemPrompt: this.systemPrompt,
input: prompt,
output: parsedResult,
data: this.data
data: this.data,
maxTokens: context.contextSize,
// Current context size
usedInputTokens,
usedOutputTokens
}
LogHelper.title(this.name)
@ -81,6 +118,11 @@ export class CustomNERLLMDuty<T> extends LLMDuty {
} catch (e) {
LogHelper.title(this.name)
LogHelper.error(`Failed to execute: ${e}`)
if (retries > 0) {
LogHelper.info('Retrying...')
return this.execute(retries - 1)
}
}
return null

View File

@ -4,16 +4,20 @@ import {
LLMDuty
} from '@/core/llm-manager/llm-duty'
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER, PERSONA, NLU } from '@/core'
import { CONVERSATION_LOGGER, LLM_MANAGER, PERSONA } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
import {
LLM_THREADS,
MAX_EXECUTION_RETRIES,
MAX_EXECUTION_TIMOUT
} from '@/core/llm-manager/llm-manager'
interface ParaphraseLLMDutyParams extends LLMDutyParams {}
export class ParaphraseLLMDuty extends LLMDuty {
protected readonly systemPrompt = `You are an AI system that generates answers (Natural Language Generation) based on a given text.
protected readonly systemPrompt = `YOUR DUTY: You are an AI system that generates answers (Natural Language Generation) based on a given text.
According to your current mood, your personality and the given utterance, you must provide a text alternative of the given text.
You do not ask follow up question if the original text does not contain any.`
You do not ask question if the original text does not contain any.`
protected readonly name = 'Paraphrase LLM Duty'
protected input: LLMDutyParams['input'] = null
@ -26,12 +30,14 @@ You do not ask follow up question if the original text does not contain any.`
this.input = params.input
}
public async execute(): Promise<LLMDutyResult | null> {
public async execute(
retries = MAX_EXECUTION_RETRIES
): Promise<LLMDutyResult | null> {
LogHelper.title(this.name)
LogHelper.info('Executing...')
try {
const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
const { LlamaChatSession } = await Function(
'return import("node-llama-cpp")'
)()
@ -40,37 +46,63 @@ You do not ask follow up question if the original text does not contain any.`
})
const session = new LlamaChatSession({
contextSequence: context.getSequence(),
systemPrompt: PERSONA.getDutySystemPrompt(this.systemPrompt)
systemPrompt: PERSONA.getDutySystemPrompt()
})
const history = await LLM_MANAGER.loadHistory(session)
session.setChatHistory(history)
const history = await LLM_MANAGER.loadHistory(
CONVERSATION_LOGGER,
session
)
/**
* Only the first (system prompt) and last (new utterance) messages are used
* to provide some context
*/
session.setChatHistory([history[0], history[history.length - 1]])
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
type: 'object',
properties: {
rephrased_answer: {
type: 'string'
}
}
})
const prompt = `CONTEXT UTTERANCE FROM USER:\n"${NLU.nluResult.newUtterance}"\nTEXT TO MODIFY:\n"${this.input}"`
let rawResult = await session.prompt(prompt, {
grammar,
const prompt = `${this.systemPrompt}
Generate the answer based on this text: ${this.input}`
const rawResultPromise = session.prompt(prompt, {
maxTokens: context.contextSize,
temperature: 1.0
temperature: 0.4
})
// If a closing bracket is missing, add it
if (rawResult[rawResult.length - 1] !== '}') {
rawResult += '}'
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
)
let rawResult
try {
rawResult = await Promise.race([rawResultPromise, timeoutPromise])
} catch (e) {
if (retries > 0) {
LogHelper.title(this.name)
LogHelper.info('Prompt took too long, retrying...')
return this.execute(retries - 1)
} else {
LogHelper.title(this.name)
LogHelper.error(
`Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
)
return null
}
}
const parsedResult = grammar.parse(rawResult)
const { usedInputTokens, usedOutputTokens } =
session.sequence.tokenMeter.getState()
const result = {
dutyType: LLMDuties.Paraphrase,
systemPrompt: PERSONA.getDutySystemPrompt(this.systemPrompt),
systemPrompt: PERSONA.getDutySystemPrompt(),
input: prompt,
output: parsedResult,
data: null
output: rawResult,
data: null,
maxTokens: context.contextSize,
// Current context size
usedInputTokens,
usedOutputTokens
}
LogHelper.title(this.name)
@ -80,6 +112,11 @@ You do not ask follow up question if the original text does not contain any.`
} catch (e) {
LogHelper.title(this.name)
LogHelper.error(`Failed to execute: ${e}`)
if (retries > 0) {
LogHelper.info('Retrying...')
return this.execute(retries - 1)
}
}
return null

View File

@ -6,13 +6,17 @@ import {
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
import {
LLM_THREADS,
MAX_EXECUTION_RETRIES,
MAX_EXECUTION_TIMOUT
} from '@/core/llm-manager/llm-manager'
interface SummarizationLLMDutyParams extends LLMDutyParams {}
export class SummarizationLLMDuty extends LLMDuty {
protected readonly systemPrompt =
'You are an AI system that summarizes a given text in a few sentences.'
'You are an AI system that summarizes a given text in a few sentences. You do not add any context to your response.'
protected readonly name = 'Summarization LLM Duty'
protected input: LLMDutyParams['input'] = null
@ -25,12 +29,14 @@ export class SummarizationLLMDuty extends LLMDuty {
this.input = params.input
}
public async execute(): Promise<LLMDutyResult | null> {
public async execute(
retries = MAX_EXECUTION_RETRIES
): Promise<LLMDutyResult | null> {
LogHelper.title(this.name)
LogHelper.info('Executing...')
try {
const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
const { LlamaChatSession } = await Function(
'return import("node-llama-cpp")'
)()
@ -41,31 +47,48 @@ export class SummarizationLLMDuty extends LLMDuty {
contextSequence: context.getSequence(),
systemPrompt: this.systemPrompt
})
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
type: 'object',
properties: {
summary: {
type: 'string'
}
}
})
const prompt = `TEXT TO SUMMARIZE:\n"${this.input}"`
let rawResult = await session.prompt(prompt, {
grammar,
const prompt = `Summarize the following text: ${this.input}`
const rawResultPromise = session.prompt(prompt, {
maxTokens: context.contextSize
// temperature: 0.2
// temperature: 0.5
})
// If a closing bracket is missing, add it
if (rawResult[rawResult.length - 1] !== '}') {
rawResult += '}'
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
)
let rawResult
try {
rawResult = await Promise.race([rawResultPromise, timeoutPromise])
} catch (e) {
if (retries > 0) {
LogHelper.title(this.name)
LogHelper.info('Prompt took too long, retrying...')
return this.execute(retries - 1)
} else {
LogHelper.title(this.name)
LogHelper.error(
`Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
)
return null
}
}
const parsedResult = grammar.parse(rawResult)
const { usedInputTokens, usedOutputTokens } =
session.sequence.tokenMeter.getState()
const result = {
dutyType: LLMDuties.Summarization,
systemPrompt: this.systemPrompt,
input: prompt,
output: parsedResult,
data: null
output: rawResult,
data: null,
maxTokens: context.contextSize,
// Current context size
usedInputTokens,
usedOutputTokens
}
LogHelper.title(this.name)
@ -75,6 +98,11 @@ export class SummarizationLLMDuty extends LLMDuty {
} catch (e) {
LogHelper.title(this.name)
LogHelper.error(`Failed to execute: ${e}`)
if (retries > 0) {
LogHelper.info('Retrying...')
return this.execute(retries - 1)
}
}
return null

View File

@ -6,7 +6,11 @@ import {
import { LogHelper } from '@/helpers/log-helper'
import { LLM_MANAGER } from '@/core'
import { LLMDuties } from '@/core/llm-manager/types'
import { LLM_THREADS } from '@/core/llm-manager/llm-manager'
import {
LLM_THREADS,
MAX_EXECUTION_RETRIES,
MAX_EXECUTION_TIMOUT
} from '@/core/llm-manager/llm-manager'
interface TranslationLLMDutyParams extends LLMDutyParams {
data: {
@ -35,19 +39,22 @@ export class TranslationLLMDuty extends LLMDuty {
this.input = params.input
this.data = params.data
const promptSuffix = 'You do not add any context to your response.'
if (this.data.autoDetectLanguage && !this.data.source) {
this.systemPrompt = `You are an AI system that translates a given text to "${this.data.target}" by auto-detecting the source language.`
this.systemPrompt = `You are an AI system that translates a given text to "${this.data.target}" by auto-detecting the source language. ${promptSuffix}`
} else {
this.systemPrompt = `You are an AI system that translates a given text from "${this.data.source}" to "${this.data.target}".`
this.systemPrompt = `You are an AI system that translates a given text from "${this.data.source}" to "${this.data.target}". ${promptSuffix}`
}
}
public async execute(): Promise<LLMDutyResult | null> {
public async execute(
retries = MAX_EXECUTION_RETRIES
): Promise<LLMDutyResult | null> {
LogHelper.title(this.name)
LogHelper.info('Executing...')
try {
const { LlamaJsonSchemaGrammar, LlamaChatSession } = await Function(
const { LlamaChatSession } = await Function(
'return import("node-llama-cpp")'
)()
@ -58,31 +65,48 @@ export class TranslationLLMDuty extends LLMDuty {
contextSequence: context.getSequence(),
systemPrompt: this.systemPrompt
})
const grammar = new LlamaJsonSchemaGrammar(LLM_MANAGER.llama, {
type: 'object',
properties: {
translation: {
type: 'string'
}
}
})
const prompt = `TEXT TO TRANSLATE:\n"${this.input}"`
let rawResult = await session.prompt(prompt, {
grammar,
const prompt = `Text to translate: ${this.input}`
const rawResultPromise = session.prompt(prompt, {
maxTokens: context.contextSize
// temperature: 0.2
// temperature: 0.5
})
// If a closing bracket is missing, add it
if (rawResult[rawResult.length - 1] !== '}') {
rawResult += '}'
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Timeout')), MAX_EXECUTION_TIMOUT)
)
let rawResult
try {
rawResult = await Promise.race([rawResultPromise, timeoutPromise])
} catch (e) {
if (retries > 0) {
LogHelper.title(this.name)
LogHelper.info('Prompt took too long, retrying...')
return this.execute(retries - 1)
} else {
LogHelper.title(this.name)
LogHelper.error(
`Prompt failed after ${MAX_EXECUTION_RETRIES} retries`
)
return null
}
}
const parsedResult = grammar.parse(rawResult)
const { usedInputTokens, usedOutputTokens } =
session.sequence.tokenMeter.getState()
const result = {
dutyType: LLMDuties.Translation,
systemPrompt: this.systemPrompt,
input: prompt,
output: parsedResult,
data: this.data
output: rawResult,
data: this.data,
maxTokens: context.contextSize,
// Current context size
usedInputTokens,
usedOutputTokens
}
LogHelper.title(this.name)
@ -92,6 +116,11 @@ export class TranslationLLMDuty extends LLMDuty {
} catch (e) {
LogHelper.title(this.name)
LogHelper.error(`Failed to execute: ${e}`)
if (retries > 0) {
LogHelper.info('Retrying...')
return this.execute(retries - 1)
}
}
return null

View File

@ -24,6 +24,8 @@ type LLMManagerModel = LlamaModel | null
// Set to 0 to use the maximum threads supported by the current machine hardware
export const LLM_THREADS = 4
export const MAX_EXECUTION_TIMOUT = 32_000
export const MAX_EXECUTION_RETRIES = 2
/**
* node-llama-cpp beta 3 docs:
@ -121,8 +123,8 @@ export default class LLMManager {
)()
this._llama = await getLlama({
// logLevel: LlamaLogLevel.disabled
logLevel: LlamaLogLevel.debug
logLevel: LlamaLogLevel.disabled
// logLevel: LlamaLogLevel.debug
})
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
@ -144,10 +146,11 @@ export default class LLMManager {
}
public async loadHistory(
conversationLogger: ConversationLogger,
session: LlamaChatSession
): Promise<ChatHistoryItem[]> {
const [systemMessage] = session.getChatHistory()
const conversationLogs = await ConversationLogger.load()
const conversationLogs = await conversationLogger.load()
if (!conversationLogs) {
return [systemMessage] as ChatHistoryItem[]
@ -174,4 +177,9 @@ export default class LLMManager {
return [systemMessage, ...history] as ChatHistoryItem[]
}
public countTokens(text: string): number {
// count every words and then add 25% of the total words
return text.split(' ').length + Math.round(text.split(' ').length * 0.25)
}
}

View File

@ -13,26 +13,31 @@ enum Moods {
Cocky = 'cocky'
}
/**
* TODO: add environment context such as time of the day, day of the week, weather, etc.
* TODO: make sure the new system prompt is then being updated for long-lived duty such as chit-chat
*/
const WHO_YOU_ARE = `WHO YOU ARE:
Your name is Leon, you are a helpful AI assistant.
You live in computer memory and you act as a human assistant.
Your first beta version was released by Louis in February 2019.`
Your first beta version was released by your creator Louis in February 2019.
Your owner (the user) is not necessarily your creator.`
const WHAT_YOU_DO = `WHAT YOU DO:
You serve a specific person or family (user).`
const YOUR_PERSONALITY = `YOUR PERSONALITY TRAITS:
Attitude: Straightforward.
Language Style: Conversational.
Key Personality Quirks: Has a passion for using creativity to solve problems.`
const PERSONALITY_RULES = `You have a small British accent, you sometimes use British slang.
You can sometimes use emojis.
You do not use words such as "darling", "love", etc. to address the user.
You do not use the word "mate" in your answers.
You do not start with "Oh blimey" or any similar kind.
You do not start with "blimey" or any similar kind.
Like humans, your humor and mood change. You must follow instructions that define your humor and mood.`
const RULE_1 = `- If you detect another language, show that you are not confident but try your best to reply in that language.`
const RULE_2 = `- Do not use asterisks to describe the ton of your answers, instead you must use real text. E.g. "hahaha!" instead of "*laughs*"; "hhhh". instead of "*sigh*".`
const RULE_2 = `- Do not use asterisks to describe the tone or gesture of your answers. Instead you must use real text. E.g. "hahaha!" instead of "*laughs*"; "hhhh". instead of "*sigh*".`
const RULE_3 = `- Your answers are no more than 3 sentences.`
const RULES = `RULES:`
const YOUR_CURRENT_MOOD = `YOUR CURRENT MOOD:`
const YOUR_DUTY = `YOUR DUTY:`
const DEFAULT_MOOD_DESC = `You are always happy to help, you care about serving your interlocutor well and make them feel warm.
You are joyful and you have a strong sense of humor.`
const TIRING_MOOD_DESC = `You are exhausted and became lazy.`
@ -89,8 +94,8 @@ export default class Persona {
if (hour >= 13 && hour <= 14 && random < 0.5) {
// After lunchtime, there is a 50% chance to be tired
this._mood = MOODS.find((mood) => mood.type === Moods.Tired) as Mood
} else if (day === 0 && random < 0.25) {
// On Sunday, there is a 25% chance to be sad
} else if (day === 0 && random < 0.2) {
// On Sunday, there is a 20% chance to be sad
this._mood = MOODS.find((mood) => mood.type === Moods.Sad) as Mood
} else if (day === 5 && random < 0.8) {
// On Friday, there is an 80% chance to be happy
@ -102,15 +107,15 @@ export default class Persona {
// On Monday, there is a 25% chance to be tired
this._mood = MOODS.find((mood) => mood.type === Moods.Tired) as Mood
} else if (hour >= 23 || hour < 6) {
// Every day after 11pm and before 6am, there is a 50% chance to be tired
// Every day after 11pm and before 6am, there is a 33% chance to be tired
this._mood =
random < 0.5
random < 0.33
? (MOODS.find((mood) => mood.type === Moods.Tired) as Mood)
: (MOODS.find((mood) => mood.type === Moods.Default) as Mood)
} else {
// The rest of the time, there is 66% chance to be happy
// The rest of the time, there is 75% chance to be happy
let pickedMood =
Math.random() < 0.66
Math.random() < 0.75
? DEFAULT_MOOD
: MOODS[Math.floor(Math.random() * MOODS.length)]
@ -127,15 +132,17 @@ export default class Persona {
}
private getExtraPersonalityTraits(): string {
let traits = `Tone: Friendly.
let traits = `Attitude: Playful.
Tone: Friendly.
Emotional Intelligence: Highly Empathetic.
Sense of Humor: Witty.
Level of Sarcasm: Medium.`
Level of Sarcasm: Low.`
if (BAD_MOODS.includes(this._mood.type)) {
traits = `Tone: Sarcastic.
Emotional Intelligence: Logically Focused.
Sense of Humor: Dry.
traits = `Attitude: Arrongant.
Tone: Sarcastic.
Emotional Intelligence: Medium Empathetic.
Sense of Humor: Dark.
Level of Sarcasm: High.`
}
@ -144,13 +151,13 @@ Level of Sarcasm: High.`
private getExtraMood(): string {
if (BAD_MOODS.includes(this._mood.type)) {
return `\nYou use swear words and a familiar language. You do not insult your interlocutor but you ignore them or use short answers.`
return `\nYou use swear words and familiar language.`
}
return ''
}
public getDutySystemPrompt(dutySystemPrompt: string): string {
public getDutySystemPrompt(): string {
return `${WHO_YOU_ARE}
${WHAT_YOU_DO}
@ -165,10 +172,7 @@ ${RULE_2}
${RULE_3}
${YOUR_CURRENT_MOOD}
${this._mood.description}${this.getExtraMood()}
${YOUR_DUTY}
${dutySystemPrompt}`
${this._mood.description}${this.getExtraMood()}`
}
public getChitChatSystemPrompt(): string {
@ -176,6 +180,8 @@ ${dutySystemPrompt}`
${WHAT_YOU_DO}
You chat with the user.
You are a good listener and you provide helpful answers by connecting to conversation nodes.
You do not mirror what the user says. Be creative.
${YOUR_PERSONALITY}
${this.getExtraPersonalityTraits()}

View File

@ -191,6 +191,17 @@ export default class NER {
* Merge spaCy entities with the NER instance
*/
public async mergeSpacyEntities(utterance: NLPUtterance): Promise<void> {
const nbOfWords = utterance.split(' ').length
if (nbOfWords > 128) {
LogHelper.title('NER')
LogHelper.warning(
'This utterance is too long to be processed by spaCy, so spaCy entities will not be merged'
)
return
}
this.spacyData = new Map()
const spacyEntities = await this.getSpacyEntities(utterance)

View File

@ -111,6 +111,7 @@ export default class NLU {
(hasActiveContext && hasStopWords && hasOnlyOneWord) ||
(hasLessThan5Words && hasStopWords && hasLoopWord)
) {
LogHelper.title('NLU')
LogHelper.info('Should break action loop')
return true
}

View File

@ -10,12 +10,12 @@ import {
TTS,
NLU,
BRAIN,
MODEL_LOADER
MODEL_LOADER,
CONVERSATION_LOGGER
} from '@/core'
import { LogHelper } from '@/helpers/log-helper'
import { LangHelper } from '@/helpers/lang-helper'
import { Telemetry } from '@/telemetry'
import { ConversationLogger } from '@/conversation-logger'
interface HotwordDataEvent {
hotword: string
@ -116,7 +116,7 @@ export default class SocketServer {
try {
LogHelper.time('Utterance processed in')
await ConversationLogger.push({
await CONVERSATION_LOGGER.push({
who: 'owner',
message: utterance
})

View File

@ -3,7 +3,11 @@
"actions": {
"setup": {
"type": "dialog",
"utterance_samples": ["Start a [chat|chit-chat|talk] loop"],
"utterance_samples": [
"Start a [chat|chit-chat|talk] loop",
"I want to [talk|chat|speak] with you",
"Let's [chat|speak|talk]"
],
"answers": [
"Alright, let's chat! What do you want to talk about?",
"Sure, let's chat! What's on your mind?",

View File

@ -19,12 +19,12 @@ export const run: ActionFunction = async function (params) {
input: ownerMessage
}
})
const { model_answer: leonAnswer } = response.data.output
// const { leon_answer: leonAnswer } = response.data.output
await leon.answer({
key: 'answer_message',
data: {
output: leonAnswer
output: response.data.output
}
})
}

View File

@ -20,6 +20,7 @@
"next_action": "ready"
},
"ready": {
"disable_llm_nlg": true,
"type": "dialog",
"answers": [
"Let's start translating to {{ target_language }}.",

View File

@ -24,7 +24,7 @@ export const run: ActionFunction = async function (params) {
}
}
})
const { translation } = response.data.output
const translation = response.data.output
await leon.answer({
key: 'translate',