diff --git a/gpt4all-backend/llama.cpp-mainline b/gpt4all-backend/llama.cpp-mainline
index f67f4651..b2db03ac 160000
--- a/gpt4all-backend/llama.cpp-mainline
+++ b/gpt4all-backend/llama.cpp-mainline
@@ -1 +1 @@
-Subproject commit f67f4651fac0b2f377dc53fe853b1dafa96f9aa9
+Subproject commit b2db03acf299111885af2921a4230de07623eaf8
diff --git a/gpt4all-backend/llamamodel.cpp b/gpt4all-backend/llamamodel.cpp
index 0e10ab8a..ab560e89 100644
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@@ -371,6 +371,11 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
         d_ptr->model_params.main_gpu = d_ptr->device;
         d_ptr->model_params.n_gpu_layers = ngl;
         d_ptr->model_params.split_mode = LLAMA_SPLIT_MODE_NONE;
+    } else {
+#ifdef GGML_USE_CUDA
+        std::cerr << "Llama ERROR: CUDA loadModel was called without a device\n";
+        return false;
+#endif // GGML_USE_CUDA
     }
 #elif defined(GGML_USE_METAL)
     (void)ngl;
@@ -383,15 +388,17 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
     // always fully offload on Metal
     // TODO(cebtenzzre): use this parameter to allow using more than 53% of system RAM to load a model
     d_ptr->model_params.n_gpu_layers = 100;
-#else
+#else // !KOMPUTE && !VULKAN && !CUDA && !METAL
     (void)ngl;
 #endif
 
-    d_ptr->model = llama_load_model_from_file_gpt4all(modelPath.c_str(), &d_ptr->model_params);
+    d_ptr->model = llama_load_model_from_file(modelPath.c_str(), d_ptr->model_params);
     if (!d_ptr->model) {
         fflush(stdout);
+#ifndef GGML_USE_CUDA
         d_ptr->device = -1;
         d_ptr->deviceName.clear();
+#endif
         std::cerr << "LLAMA ERROR: failed to load model from " << modelPath << std::endl;
         return false;
     }
@@ -434,8 +441,10 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
         std::cerr << "LLAMA ERROR: failed to init context for model " <<  modelPath << std::endl;
         llama_free_model(d_ptr->model);
         d_ptr->model = nullptr;
+#ifndef GGML_USE_CUDA
         d_ptr->device = -1;
         d_ptr->deviceName.clear();
+#endif
         return false;
     }
 
@@ -723,31 +732,16 @@ bool LLamaModel::initializeGPUDevice(int device, std::string *unavail_reason) co
 #endif
 }
 
-bool LLamaModel::hasGPUDevice() const
-{
-#if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
-    return d_ptr->device != -1;
-#else
-    return false;
-#endif
-}
-
 bool LLamaModel::usingGPUDevice() const
 {
-    bool hasDevice;
+    if (!d_ptr->model)
+        return false;
 
+    bool usingGPU = llama_model_using_gpu(d_ptr->model);
 #ifdef GGML_USE_KOMPUTE
-    hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
-    assert(!hasDevice || ggml_vk_has_device());
-#elif defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
-    hasDevice = hasGPUDevice() && d_ptr->model_params.n_gpu_layers > 0;
-#elif defined(GGML_USE_METAL)
-    hasDevice = true;
-#else
-    hasDevice = false;
+    assert(!usingGPU || ggml_vk_has_device());
 #endif
-
-    return hasDevice;
+    return usingGPU;
 }
 
 const char *LLamaModel::backendName() const
@@ -760,6 +754,8 @@ const char *LLamaModel::gpuDeviceName() const
     if (usingGPUDevice()) {
 #if defined(GGML_USE_KOMPUTE) || defined(GGML_USE_VULKAN) || defined(GGML_USE_CUDA)
         return d_ptr->deviceName.c_str();
+#elif defined(GGML_USE_METAL)
+        return "Metal";
 #endif
     }
     return nullptr;
diff --git a/gpt4all-backend/llamamodel_impl.h b/gpt4all-backend/llamamodel_impl.h
index 9ab386bc..f162a94d 100644
--- a/gpt4all-backend/llamamodel_impl.h
+++ b/gpt4all-backend/llamamodel_impl.h
@@ -34,7 +34,6 @@ public:
     std::vector<GPUDevice> availableGPUDevices(size_t memoryRequired = 0) const override;
     bool initializeGPUDevice(size_t memoryRequired, const std::string &name) const override;
     bool initializeGPUDevice(int device, std::string *unavail_reason = nullptr) const override;
-    bool hasGPUDevice() const override;
     bool usingGPUDevice() const override;
     const char *backendName() const override;
     const char *gpuDeviceName() const override;
diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h
index de5d4b5c..5a26f4a4 100644
--- a/gpt4all-backend/llmodel.h
+++ b/gpt4all-backend/llmodel.h
@@ -2,6 +2,7 @@
 #define LLMODEL_H
 
 #include <algorithm>
+#include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <functional>
@@ -57,23 +58,30 @@ public:
             backend(backend), index(index), type(type), heapSize(heapSize), name(std::move(name)),
             vendor(std::move(vendor)) {}
 
-        std::string selectionName() const { return m_backendNames.at(backend) + ": " + name; }
-        std::string reportedName()  const { return name + " (" + m_backendNames.at(backend) + ")"; }
+        std::string selectionName() const
+        {
+            assert(backend == "cuda"s || backend == "kompute"s);
+            return backendName() + ": " + name;
+        }
+
+        std::string backendName() const { return backendIdToName(backend); }
+
+        static std::string backendIdToName(const std::string &backend) { return s_backendNames.at(backend); }
 
         static std::string updateSelectionName(const std::string &name) {
             if (name == "Auto" || name == "CPU" || name == "Metal")
                 return name;
-            auto it = std::find_if(m_backendNames.begin(), m_backendNames.end(), [&name](const auto &entry) {
+            auto it = std::find_if(s_backendNames.begin(), s_backendNames.end(), [&name](const auto &entry) {
                 return name.starts_with(entry.second + ": ");
             });
-            if (it != m_backendNames.end())
+            if (it != s_backendNames.end())
                 return name;
             return "Vulkan: " + name; // previously, there were only Vulkan devices
         }
 
     private:
-        static inline const std::unordered_map<std::string, std::string> m_backendNames {
-            {"cuda", "CUDA"}, {"kompute", "Vulkan"},
+        static inline const std::unordered_map<std::string, std::string> s_backendNames {
+            {"cpu", "CPU"}, {"metal", "Metal"}, {"cuda", "CUDA"}, {"kompute", "Vulkan"},
         };
     };
 
@@ -196,7 +204,6 @@ public:
         return false;
     }
 
-    virtual bool hasGPUDevice() const { return false; }
     virtual bool usingGPUDevice() const { return false; }
     virtual const char *backendName() const { return "cpu"; }
     virtual const char *gpuDeviceName() const { return nullptr; }
diff --git a/gpt4all-backend/llmodel_c.cpp b/gpt4all-backend/llmodel_c.cpp
index 62663d7f..b585e4bf 100644
--- a/gpt4all-backend/llmodel_c.cpp
+++ b/gpt4all-backend/llmodel_c.cpp
@@ -287,12 +287,6 @@ bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device)
     return wrapper->llModel->initializeGPUDevice(device);
 }
 
-bool llmodel_has_gpu_device(llmodel_model model)
-{
-    const auto *wrapper = static_cast<LLModelWrapper *>(model);
-    return wrapper->llModel->hasGPUDevice();
-}
-
 const char *llmodel_model_backend_name(llmodel_model model)
 {
     const auto *wrapper = static_cast<LLModelWrapper *>(model);
diff --git a/gpt4all-backend/llmodel_c.h b/gpt4all-backend/llmodel_c.h
index 3955260a..ced23a96 100644
--- a/gpt4all-backend/llmodel_c.h
+++ b/gpt4all-backend/llmodel_c.h
@@ -291,11 +291,6 @@ bool llmodel_gpu_init_gpu_device_by_struct(llmodel_model model, const llmodel_gp
  */
 bool llmodel_gpu_init_gpu_device_by_int(llmodel_model model, int device);
 
-/**
- * @return True if a GPU device is successfully initialized, false otherwise.
- */
-bool llmodel_has_gpu_device(llmodel_model model);
-
 /**
  * @return The name of the llama.cpp backend currently in use. One of "cpu", "kompute", or "metal".
  */
diff --git a/gpt4all-bindings/python/gpt4all/_pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
index fa879cde..892d72e7 100644
--- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py
+++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py
@@ -177,9 +177,6 @@ llmodel.llmodel_gpu_init_gpu_device_by_struct.restype = ctypes.c_bool
 llmodel.llmodel_gpu_init_gpu_device_by_int.argtypes = [ctypes.c_void_p, ctypes.c_int32]
 llmodel.llmodel_gpu_init_gpu_device_by_int.restype = ctypes.c_bool
 
-llmodel.llmodel_has_gpu_device.argtypes = [ctypes.c_void_p]
-llmodel.llmodel_has_gpu_device.restype = ctypes.c_bool
-
 llmodel.llmodel_model_backend_name.argtypes = [ctypes.c_void_p]
 llmodel.llmodel_model_backend_name.restype = ctypes.c_char_p
 
diff --git a/gpt4all-chat/chat.cpp b/gpt4all-chat/chat.cpp
index b5564657..5874abe3 100644
--- a/gpt4all-chat/chat.cpp
+++ b/gpt4all-chat/chat.cpp
@@ -64,8 +64,7 @@ void Chat::connectLLM()
     connect(m_llmodel, &ChatLLM::recalcChanged, this, &Chat::handleRecalculating, Qt::QueuedConnection);
     connect(m_llmodel, &ChatLLM::generatedNameChanged, this, &Chat::generatedNameChanged, Qt::QueuedConnection);
     connect(m_llmodel, &ChatLLM::reportSpeed, this, &Chat::handleTokenSpeedChanged, Qt::QueuedConnection);
-    connect(m_llmodel, &ChatLLM::reportDevice, this, &Chat::handleDeviceChanged, Qt::QueuedConnection);
-    connect(m_llmodel, &ChatLLM::reportFallbackReason, this, &Chat::handleFallbackReasonChanged, Qt::QueuedConnection);
+    connect(m_llmodel, &ChatLLM::loadedModelInfoChanged, this, &Chat::loadedModelInfoChanged, Qt::QueuedConnection);
     connect(m_llmodel, &ChatLLM::databaseResultsChanged, this, &Chat::handleDatabaseResultsChanged, Qt::QueuedConnection);
     connect(m_llmodel, &ChatLLM::modelInfoChanged, this, &Chat::handleModelInfoChanged, Qt::QueuedConnection);
     connect(m_llmodel, &ChatLLM::trySwitchContextOfLoadedModelCompleted, this, &Chat::handleTrySwitchContextOfLoadedModelCompleted, Qt::QueuedConnection);
@@ -327,16 +326,19 @@ void Chat::handleTokenSpeedChanged(const QString &tokenSpeed)
     emit tokenSpeedChanged();
 }
 
-void Chat::handleDeviceChanged(const QString &device)
+QString Chat::deviceBackend() const
 {
-    m_device = device;
-    emit deviceChanged();
+    return m_llmodel->deviceBackend();
 }
 
-void Chat::handleFallbackReasonChanged(const QString &fallbackReason)
+QString Chat::device() const
 {
-    m_fallbackReason = fallbackReason;
-    emit fallbackReasonChanged();
+    return m_llmodel->device();
+}
+
+QString Chat::fallbackReason() const
+{
+    return m_llmodel->fallbackReason();
 }
 
 void Chat::handleDatabaseResultsChanged(const QList<ResultInfo> &results)
diff --git a/gpt4all-chat/chat.h b/gpt4all-chat/chat.h
index 9da04459..019caf89 100644
--- a/gpt4all-chat/chat.h
+++ b/gpt4all-chat/chat.h
@@ -33,8 +33,9 @@ class Chat : public QObject
     Q_PROPERTY(QList<QString> collectionList READ collectionList NOTIFY collectionListChanged)
     Q_PROPERTY(QString modelLoadingError READ modelLoadingError NOTIFY modelLoadingErrorChanged)
     Q_PROPERTY(QString tokenSpeed READ tokenSpeed NOTIFY tokenSpeedChanged);
-    Q_PROPERTY(QString device READ device NOTIFY deviceChanged);
-    Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY fallbackReasonChanged);
+    Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged)
+    Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged)
+    Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged)
     Q_PROPERTY(LocalDocsCollectionsModel *collectionModel READ collectionModel NOTIFY collectionModelChanged)
     // 0=no, 1=waiting, 2=working
     Q_PROPERTY(int trySwitchContextInProgress READ trySwitchContextInProgress NOTIFY trySwitchContextInProgressChanged)
@@ -111,8 +112,10 @@ public:
     QString modelLoadingError() const { return m_modelLoadingError; }
 
     QString tokenSpeed() const { return m_tokenSpeed; }
-    QString device() const { return m_device; }
-    QString fallbackReason() const { return m_fallbackReason; }
+    QString deviceBackend() const;
+    QString device() const;
+    // not loaded -> QString(), no fallback -> QString("")
+    QString fallbackReason() const;
 
     int trySwitchContextInProgress() const { return m_trySwitchContextInProgress; }
 
@@ -149,6 +152,7 @@ Q_SIGNALS:
     void fallbackReasonChanged();
     void collectionModelChanged();
     void trySwitchContextInProgressChanged();
+    void loadedModelInfoChanged();
 
 private Q_SLOTS:
     void handleResponseChanged(const QString &response);
@@ -159,8 +163,6 @@ private Q_SLOTS:
     void handleRecalculating();
     void handleModelLoadingError(const QString &error);
     void handleTokenSpeedChanged(const QString &tokenSpeed);
-    void handleDeviceChanged(const QString &device);
-    void handleFallbackReasonChanged(const QString &device);
     void handleDatabaseResultsChanged(const QList<ResultInfo> &results);
     void handleModelInfoChanged(const ModelInfo &modelInfo);
     void handleTrySwitchContextOfLoadedModelCompleted(int value);
diff --git a/gpt4all-chat/chatllm.cpp b/gpt4all-chat/chatllm.cpp
index 581eaab5..96239fdb 100644
--- a/gpt4all-chat/chatllm.cpp
+++ b/gpt4all-chat/chatllm.cpp
@@ -93,6 +93,12 @@ void LLModelStore::destroy()
     m_availableModel.reset();
 }
 
+void LLModelInfo::resetModel(ChatLLM *cllm, LLModel *model) {
+    this->model.reset(model);
+    fallbackReason.reset();
+    emit cllm->loadedModelInfoChanged();
+}
+
 ChatLLM::ChatLLM(Chat *parent, bool isServer)
     : QObject{nullptr}
     , m_promptResponseTokens(0)
@@ -141,7 +147,7 @@ void ChatLLM::destroy()
     // The only time we should have a model loaded here is on shutdown
     // as we explicitly unload the model in all other circumstances
     if (isModelLoaded()) {
-        m_llModelInfo.model.reset();
+        m_llModelInfo.resetModel(this);
     }
 }
 
@@ -208,7 +214,7 @@ void ChatLLM::trySwitchContextOfLoadedModel(const ModelInfo &modelInfo)
     QString filePath = modelInfo.dirpath + modelInfo.filename();
     QFileInfo fileInfo(filePath);
 
-    m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
+    acquireModel();
 #if defined(DEBUG_MODEL_LOADING)
         qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
@@ -251,8 +257,6 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
     // reset status
     emit modelLoadingPercentageChanged(std::numeric_limits<float>::min()); // small non-zero positive value
     emit modelLoadingError("");
-    emit reportFallbackReason("");
-    emit reportDevice("");
     m_pristineLoadedState = false;
 
     QString filePath = modelInfo.dirpath + modelInfo.filename();
@@ -265,12 +269,12 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #if defined(DEBUG_MODEL_LOADING)
         qDebug() << "already acquired model deleted" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
-        m_llModelInfo.model.reset();
+        m_llModelInfo.resetModel(this);
     } else if (!m_isServer) {
         // This is a blocking call that tries to retrieve the model we need from the model store.
         // If it succeeds, then we just have to restore state. If the store has never had a model
         // returned to it, then the modelInfo.model pointer should be null which will happen on startup
-        m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
+        acquireModel();
 #if defined(DEBUG_MODEL_LOADING)
         qDebug() << "acquired model from store" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
@@ -305,7 +309,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #if defined(DEBUG_MODEL_LOADING)
             qDebug() << "deleting model" << m_llmThread.objectName() << m_llModelInfo.model.get();
 #endif
-            m_llModelInfo.model.reset();
+            m_llModelInfo.resetModel(this);
         }
     }
 
@@ -335,7 +339,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
             model->setModelName(modelName);
             model->setRequestURL(modelInfo.url());
             model->setAPIKey(apiKey);
-            m_llModelInfo.model.reset(model);
+            m_llModelInfo.resetModel(this, model);
         } else {
             QElapsedTimer modelLoadTimer;
             modelLoadTimer.start();
@@ -360,10 +364,10 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
 #endif
 
             QString constructError;
-            m_llModelInfo.model.reset();
+            m_llModelInfo.resetModel(this);
             try {
                 auto *model = LLModel::Implementation::construct(filePath.toStdString(), backend, n_ctx);
-                m_llModelInfo.model.reset(model);
+                m_llModelInfo.resetModel(this, model);
             } catch (const LLModel::MissingImplementationError &e) {
                 modelLoadProps.insert("error", "missing_model_impl");
                 constructError = e.what();
@@ -412,14 +416,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                         memGB = std::floor(memGB * 10.f) / 10.f; // truncate to 1 decimal place
                         modelLoadProps.insert("default_device", QString::fromStdString(defaultDevice->name));
                         modelLoadProps.insert("default_device_mem", approxDeviceMemGB(defaultDevice));
+                        modelLoadProps.insert("default_device_backend", QString::fromStdString(defaultDevice->backendName()));
                     }
                 }
 
-                QString actualDevice("CPU");
+                bool actualDeviceIsCPU = true;
 
 #if defined(Q_OS_MAC) && defined(__aarch64__)
                 if (m_llModelInfo.model->implementation().buildVariant() == "metal")
-                    actualDevice = "Metal";
+                    actualDeviceIsCPU = false;
 #else
                 if (requestedDevice != "CPU") {
                     const auto *device = defaultDevice;
@@ -437,41 +442,39 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                     if (!device) {
                         // GPU not available
                     } else if (!m_llModelInfo.model->initializeGPUDevice(device->index, &unavail_reason)) {
-                        emit reportFallbackReason(QString::fromStdString("<br>" + unavail_reason));
+                        m_llModelInfo.fallbackReason = QString::fromStdString(unavail_reason);
                     } else {
-                        actualDevice = QString::fromStdString(device->reportedName());
+                        actualDeviceIsCPU = false;
                         modelLoadProps.insert("requested_device_mem", approxDeviceMemGB(device));
                     }
                 }
 #endif
 
                 // Report which device we're actually using
-                emit reportDevice(actualDevice);
                 bool success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, ngl);
 
                 if (!m_shouldBeLoaded) {
-                    m_llModelInfo.model.reset();
+                    m_llModelInfo.resetModel(this);
                     if (!m_isServer)
                         LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                    m_llModelInfo = LLModelInfo();
+                    resetModel();
                     emit modelLoadingPercentageChanged(0.0f);
                     return false;
                 }
 
-                if (actualDevice == "CPU") {
+                if (actualDeviceIsCPU) {
                     // we asked llama.cpp to use the CPU
                 } else if (!success) {
                     // llama_init_from_file returned nullptr
-                    emit reportDevice("CPU");
-                    emit reportFallbackReason("<br>GPU loading failed (out of VRAM?)");
+                    m_llModelInfo.fallbackReason = "GPU loading failed (out of VRAM?)";
                     modelLoadProps.insert("cpu_fallback_reason", "gpu_load_failed");
                     success = m_llModelInfo.model->loadModel(filePath.toStdString(), n_ctx, 0);
 
                     if (!m_shouldBeLoaded) {
-                        m_llModelInfo.model.reset();
+                        m_llModelInfo.resetModel(this);
                         if (!m_isServer)
                             LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                        m_llModelInfo = LLModelInfo();
+                        resetModel();
                         emit modelLoadingPercentageChanged(0.0f);
                         return false;
                     }
@@ -479,16 +482,15 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                     // ggml_vk_init was not called in llama.cpp
                     // We might have had to fallback to CPU after load if the model is not possible to accelerate
                     // for instance if the quantization method is not supported on Vulkan yet
-                    emit reportDevice("CPU");
-                    emit reportFallbackReason("<br>model or quant has no GPU support");
+                    m_llModelInfo.fallbackReason = "model or quant has no GPU support";
                     modelLoadProps.insert("cpu_fallback_reason", "gpu_unsupported_model");
                 }
 
                 if (!success) {
-                    m_llModelInfo.model.reset();
+                    m_llModelInfo.resetModel(this);
                     if (!m_isServer)
                         LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                    m_llModelInfo = LLModelInfo();
+                    resetModel();
                     emit modelLoadingError(u"Could not load model due to invalid model file for %1"_s.arg(modelInfo.filename()));
                     modelLoadProps.insert("error", "loadmodel_failed");
                 } else {
@@ -497,10 +499,10 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
                     case 'G': m_llModelType = LLModelType::GPTJ_; break;
                     default:
                         {
-                            m_llModelInfo.model.reset();
+                            m_llModelInfo.resetModel(this);
                             if (!m_isServer)
                                 LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                            m_llModelInfo = LLModelInfo();
+                            resetModel();
                             emit modelLoadingError(u"Could not determine model type for %1"_s.arg(modelInfo.filename()));
                         }
                     }
@@ -510,7 +512,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
             } else {
                 if (!m_isServer)
                     LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo));
-                m_llModelInfo = LLModelInfo();
+                resetModel();
                 emit modelLoadingError(u"Error loading %1: %2"_s.arg(modelInfo.filename(), constructError));
             }
         }
@@ -523,6 +525,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
         fflush(stdout);
 #endif
         emit modelLoadingPercentageChanged(isModelLoaded() ? 1.0f : 0.0f);
+        emit loadedModelInfoChanged();
 
         modelLoadProps.insert("requestedDevice", MySettings::globalInstance()->device());
         modelLoadProps.insert("model", modelInfo.filename());
@@ -530,7 +533,7 @@ bool ChatLLM::loadModel(const ModelInfo &modelInfo)
     } else {
         if (!m_isServer)
             LLModelStore::globalInstance()->releaseModel(std::move(m_llModelInfo)); // release back into the store
-        m_llModelInfo = LLModelInfo();
+        resetModel();
         emit modelLoadingError(u"Could not find file for model %1"_s.arg(modelInfo.filename()));
     }
 
@@ -621,6 +624,16 @@ void ChatLLM::setModelInfo(const ModelInfo &modelInfo)
     emit modelInfoChanged(modelInfo);
 }
 
+void ChatLLM::acquireModel() {
+    m_llModelInfo = LLModelStore::globalInstance()->acquireModel();
+    emit loadedModelInfoChanged();
+}
+
+void ChatLLM::resetModel() {
+    m_llModelInfo = {};
+    emit loadedModelInfoChanged();
+}
+
 void ChatLLM::modelChangeRequested(const ModelInfo &modelInfo)
 {
     m_shouldBeLoaded = true;
@@ -809,7 +822,7 @@ void ChatLLM::unloadModel()
 #endif
 
     if (m_forceUnloadModel) {
-        m_llModelInfo.model.reset();
+        m_llModelInfo.resetModel(this);
         m_forceUnloadModel = false;
     }
 
diff --git a/gpt4all-chat/chatllm.h b/gpt4all-chat/chatllm.h
index fde97f1c..001eae6f 100644
--- a/gpt4all-chat/chatllm.h
+++ b/gpt4all-chat/chatllm.h
@@ -20,6 +20,7 @@
 #include <atomic>
 #include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
 
 using namespace Qt::Literals::StringLiterals;
@@ -32,11 +33,17 @@ enum LLModelType {
     API_,
 };
 
+class ChatLLM;
+
 struct LLModelInfo {
     std::unique_ptr<LLModel> model;
     QFileInfo fileInfo;
+    std::optional<QString> fallbackReason;
+
     // NOTE: This does not store the model type or name on purpose as this is left for ChatLLM which
     // must be able to serialize the information even if it is in the unloaded state
+
+    void resetModel(ChatLLM *cllm, LLModel *model = nullptr);
 };
 
 class TokenTimer : public QObject {
@@ -84,6 +91,9 @@ class ChatLLM : public QObject
 {
     Q_OBJECT
     Q_PROPERTY(bool isRecalc READ isRecalc NOTIFY recalcChanged)
+    Q_PROPERTY(QString deviceBackend READ deviceBackend NOTIFY loadedModelInfoChanged)
+    Q_PROPERTY(QString device READ device NOTIFY loadedModelInfoChanged)
+    Q_PROPERTY(QString fallbackReason READ fallbackReason NOTIFY loadedModelInfoChanged)
 public:
     ChatLLM(Chat *parent, bool isServer = false);
     virtual ~ChatLLM();
@@ -110,6 +120,30 @@ public:
 
     bool isRecalc() const { return m_isRecalc; }
 
+    void acquireModel();
+    void resetModel();
+
+    QString deviceBackend() const
+    {
+        if (!isModelLoaded()) return QString();
+        std::string name = LLModel::GPUDevice::backendIdToName(m_llModelInfo.model->backendName());
+        return QString::fromStdString(name);
+    }
+
+    QString device() const
+    {
+        if (!isModelLoaded()) return QString();
+        const char *name = m_llModelInfo.model->gpuDeviceName();
+        return name ? QString(name) : u"CPU"_s;
+    }
+
+    // not loaded -> QString(), no fallback -> QString("")
+    QString fallbackReason() const
+    {
+        if (!isModelLoaded()) return QString();
+        return m_llModelInfo.fallbackReason.value_or(u""_s);
+    }
+
     QString generatedName() const { return QString::fromStdString(m_nameResponse); }
 
     bool serialize(QDataStream &stream, int version, bool serializeKV);
@@ -135,6 +169,7 @@ public Q_SLOTS:
 
 Q_SIGNALS:
     void recalcChanged();
+    void loadedModelInfoChanged();
     void modelLoadingPercentageChanged(float);
     void modelLoadingError(const QString &error);
     void modelLoadingWarning(const QString &warning);
diff --git a/gpt4all-chat/network.cpp b/gpt4all-chat/network.cpp
index 9b99683a..b30b4fdf 100644
--- a/gpt4all-chat/network.cpp
+++ b/gpt4all-chat/network.cpp
@@ -298,6 +298,7 @@ void Network::trackChatEvent(const QString &ev, QVariantMap props)
     const auto &curChat = ChatListModel::globalInstance()->currentChat();
     if (!props.contains("model"))
         props.insert("model", curChat->modelInfo().filename());
+    props.insert("device_backend", curChat->deviceBackend());
     props.insert("actualDevice", curChat->device());
     props.insert("doc_collections_enabled", curChat->collectionList().count());
     props.insert("doc_collections_total", LocalDocs::globalInstance()->localDocsModel()->rowCount());
diff --git a/gpt4all-chat/qml/ChatView.qml b/gpt4all-chat/qml/ChatView.qml
index 93cfe936..c7f13190 100644
--- a/gpt4all-chat/qml/ChatView.qml
+++ b/gpt4all-chat/qml/ChatView.qml
@@ -1294,7 +1294,21 @@ Rectangle {
                 visible: currentChat.tokenSpeed !== ""
                 elide: Text.ElideRight
                 wrapMode: Text.WordWrap
-                text: currentChat.tokenSpeed + " \u00B7 " + currentChat.device + currentChat.fallbackReason
+                text: {
+                    const segments = [currentChat.tokenSpeed];
+                    const device = currentChat.device;
+                    const backend = currentChat.deviceBackend;
+                    if (device !== null) { // device is null if we have no model loaded
+                        var deviceSegment = device;
+                        if (backend === "CUDA" || backend === "Vulkan")
+                            deviceSegment += ` (${backend})`;
+                        segments.push(deviceSegment);
+                    }
+                    const fallbackReason = currentChat.fallbackReason;
+                    if (fallbackReason !== null && fallbackReason !== "")
+                        segments.push(fallbackReason);
+                    return segments.join(" \u00B7 ");
+                }
                 font.pixelSize: theme.fontSizeSmaller
                 font.bold: true
             }