1
1
mirror of https://github.com/leon-ai/leon.git synced 2024-08-17 06:00:33 +03:00

feat(server): enable FlashAttention for faster inference

This commit is contained in:
louistiti 2024-07-14 21:19:42 +08:00
parent 626c77d340
commit fd5e952695

View File

@ -230,7 +230,8 @@ export default class LLMManager {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-expect-error
this._model = await this._llama.loadModel({
modelPath: LLM_PATH
modelPath: LLM_PATH,
defaultContextFlashAttention: true
})
if (HAS_LLM_NLG) {