embllm: fix use of llama ctx before loading (#2465)

This fixes a regression in PR #2396.

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
This commit is contained in:
Jared Van Bortel 2024-06-25 11:04:01 -04:00 committed by GitHub
parent 9273b49b62
commit 1a00882276
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -84,10 +84,6 @@ bool EmbeddingLLMWorker::loadModel()
return false;
}
// FIXME(jared): the user may want this to take effect without having to restart
int n_threads = MySettings::globalInstance()->threadCount();
m_model->setThreadCount(n_threads);
// NOTE: explicitly loads model on CPU to avoid GPU OOM
// TODO(cebtenzzre): support GPU-accelerated embeddings
bool success = m_model->loadModel(filePath.toStdString(), 2048, 0);
@ -104,6 +100,11 @@ bool EmbeddingLLMWorker::loadModel()
m_model = nullptr;
return false;
}
// FIXME(jared): the user may want this to take effect without having to restart
int n_threads = MySettings::globalInstance()->threadCount();
m_model->setThreadCount(n_threads);
return true;
}