mirror of
https://github.com/leon-ai/leon.git
synced 2024-09-11 18:27:21 +03:00
feat(server): enable FlashAttention for faster inference
This commit is contained in:
parent
626c77d340
commit
fd5e952695
@ -230,7 +230,8 @@ export default class LLMManager {
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-expect-error
|
||||
this._model = await this._llama.loadModel({
|
||||
modelPath: LLM_PATH
|
||||
modelPath: LLM_PATH,
|
||||
defaultContextFlashAttention: true
|
||||
})
|
||||
|
||||
if (HAS_LLM_NLG) {
|
||||
|
Loading…
Reference in New Issue
Block a user