Only show GPU when we're actually using it.

This commit is contained in:
Adam Treat 2023-09-14 09:59:19 -04:00
parent 1fa67a585c
commit 3076e0bf26
6 changed files with 29 additions and 3 deletions

View File

@ -337,6 +337,16 @@ bool LLamaModel::hasGPUDevice()
#endif
}
bool LLamaModel::usingGPUDevice()
{
#if defined(GGML_USE_KOMPUTE)
return ggml_vk_using_vulkan();
#elif defined(GGML_USE_METAL)
return true;
#endif
return false;
}
#if defined(_WIN32)
#define DLL_EXPORT __declspec(dllexport)
#else

View File

@ -30,6 +30,7 @@ public:
bool initializeGPUDevice(const GPUDevice &device) override;
bool initializeGPUDevice(int device) override;
bool hasGPUDevice() override;
bool usingGPUDevice() override;
private:
LLamaPrivate *d_ptr;

View File

@ -100,6 +100,7 @@ public:
virtual bool initializeGPUDevice(const GPUDevice &/*device*/) { return false; }
virtual bool initializeGPUDevice(int /*device*/) { return false; }
virtual bool hasGPUDevice() { return false; }
virtual bool usingGPUDevice() { return false; }
protected:
// These are pure virtual because subclasses need to implement as the default implementation of

View File

@ -975,6 +975,14 @@ const std::vector<LLModel::Token> &Replit::endTokens() const
return fres;
}
bool Replit::usingGPUDevice()
{
#if defined(GGML_USE_METAL)
return true;
#endif
return false;
}
#if defined(_WIN32)
#define DLL_EXPORT __declspec(dllexport)
#else

View File

@ -27,6 +27,7 @@ public:
size_t restoreState(const uint8_t *src) override;
void setThreadCount(int32_t n_threads) override;
int32_t threadCount() const override;
bool usingGPUDevice() override;
private:
ReplitPrivate *d_ptr;

View File

@ -302,6 +302,11 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
m_llModelInfo = LLModelInfo();
emit modelLoadingError(QString("Could not load model due to invalid model file for %1").arg(modelInfo.filename()));
} else {
// We might have had to fallback to CPU after load if the model is not possible to accelerate
// for instance if the quantization method is not supported on Vulkan yet
if (actualDevice != "CPU" && !m_llModelInfo.model->usingGPUDevice())
emit reportDevice("CPU");
switch (m_llModelInfo.model->implementation().modelType()[0]) {
case 'L': m_llModelType = LLModelType::LLAMA_; break;
case 'G': m_llModelType = LLModelType::GPTJ_; break;