Fallback to CPU more robustly.

This commit is contained in:
Adam Treat 2023-09-14 16:52:31 -04:00
parent 79843c269e
commit aa33419c6e
3 changed files with 15 additions and 4 deletions

@ -1 +1 @@
Subproject commit 7ff671e149464d1a52b4f9e50a7819bc49e8fdaa Subproject commit 703ef9c1252aff4f6c4e1fdc60fffe6ab9def377

View File

@ -168,6 +168,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)
d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params); d_ptr->ctx = llama_init_from_file(modelPath.c_str(), d_ptr->params);
if (!d_ptr->ctx) { if (!d_ptr->ctx) {
#ifdef GGML_USE_KOMPUTE
// Explicitly free the device so next load it doesn't use it
ggml_vk_free_device();
#endif
std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl; std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
return false; return false;
} }
@ -194,7 +198,7 @@ int32_t LLamaModel::threadCount() const {
LLamaModel::~LLamaModel() LLamaModel::~LLamaModel()
{ {
if(d_ptr->ctx) { if (d_ptr->ctx) {
llama_free(d_ptr->ctx); llama_free(d_ptr->ctx);
} }
} }

View File

@ -294,9 +294,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
emit reportDevice(actualDevice); emit reportDevice(actualDevice);
bool success = m_llModelInfo.model->loadModel(filePath.toStdString()); bool success = m_llModelInfo.model->loadModel(filePath.toStdString());
if (!success && actualDevice != "CPU") {
emit reportDevice("CPU");
success = m_llModelInfo.model->loadModel(filePath.toStdString());
}
MySettings::globalInstance()->setAttemptModelLoad(QString()); MySettings::globalInstance()->setAttemptModelLoad(QString());
if (!success) { if (!success) {
delete std::exchange(m_llModelInfo.model, nullptr); delete m_llModelInfo.model;
m_llModelInfo.model = nullptr;
if (!m_isServer) if (!m_isServer)
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
m_llModelInfo = LLModelInfo(); m_llModelInfo = LLModelInfo();
@ -317,7 +323,8 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
case 'S': m_llModelType = LLModelType::STARCODER_; break; case 'S': m_llModelType = LLModelType::STARCODER_; break;
default: default:
{ {
delete std::exchange(m_llModelInfo.model, nullptr); delete m_llModelInfo.model;
m_llModelInfo.model = nullptr;
if (!m_isServer) if (!m_isServer)
LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store LLModelStore::globalInstance()->releaseModel(m_llModelInfo); // release back into the store
m_llModelInfo = LLModelInfo(); m_llModelInfo = LLModelInfo();