diff --git a/gpt4all-backend/llmodel.cpp b/gpt4all-backend/llmodel.cpp index 94be234d..8ef941ce 100644 --- a/gpt4all-backend/llmodel.cpp +++ b/gpt4all-backend/llmodel.cpp @@ -19,33 +19,27 @@ std::string s_implementations_search_path = "."; -static bool has_at_least_minimal_hardware() { -#if defined(__x86_64__) || defined(_M_X64) - #ifndef _MSC_VER - return __builtin_cpu_supports("avx"); - #else - int cpuInfo[4]; - __cpuid(cpuInfo, 1); - return cpuInfo[2] & (1 << 28); - #endif -#else - return true; // Don't know how to handle non-x86_64 -#endif -} +#if !(defined(__x86_64__) || defined(_M_X64)) + // irrelevant on non-x86_64 + #define cpu_supports_avx() -1 + #define cpu_supports_avx2() -1 +#elif defined(_MSC_VER) + // MSVC + static int get_cpu_info(int func_id, int reg_id) { + int info[4]; + __cpuid(info, func_id); + return info[reg_id]; + } -static bool requires_avxonly() { -#if defined(__x86_64__) || defined(_M_X64) - #ifndef _MSC_VER - return !__builtin_cpu_supports("avx2"); - #else - int cpuInfo[4]; - __cpuidex(cpuInfo, 7, 0); - return !(cpuInfo[1] & (1 << 5)); - #endif + // AVX via EAX=1: Processor Info and Feature Bits, bit 28 of ECX + #define cpu_supports_avx() (get_cpu_info(1, 2) & (1 << 28)) + // AVX2 via EAX=7, ECX=0: Extended Features, bit 5 of EBX + #define cpu_supports_avx2() (get_cpu_info(7, 1) & (1 << 5)) #else - return false; // Don't know how to handle non-x86_64 + // gcc/clang + #define cpu_supports_avx() __builtin_cpu_supports("avx") + #define cpu_supports_avx2() __builtin_cpu_supports("avx2") #endif -} LLModel::Implementation::Implementation(Dlhandle &&dlhandle_) : m_dlhandle(new Dlhandle(std::move(dlhandle_))) { @@ -71,21 +65,25 @@ LLModel::Implementation::Implementation(Implementation &&o) } LLModel::Implementation::~Implementation() { - if (m_dlhandle) delete m_dlhandle; + delete m_dlhandle; } -bool LLModel::Implementation::isImplementation(const Dlhandle &dl) { +static bool isImplementation(const Dlhandle &dl) { return dl.get("is_g4a_backend_model_implementation"); } const std::vector &LLModel::Implementation::implementationList() { + if (cpu_supports_avx() == 0) { + throw std::runtime_error("CPU does not support AVX"); + } + // NOTE: allocated on heap so we leak intentionally on exit so we have a chance to clean up the // individual models without the cleanup of the static list interfering static auto* libs = new std::vector([] () { std::vector fres; - std::string impl_name_re = "(bert|gptj|llamamodel-mainline)"; - if (requires_avxonly()) { + std::string impl_name_re = "(gptj|llamamodel-mainline)"; + if (cpu_supports_avx2() == 0) { impl_name_re += "-avxonly"; } else { impl_name_re += "-(default|metal)"; @@ -107,9 +105,8 @@ const std::vector &LLModel::Implementation::implementat // Add to list if model implementation try { Dlhandle dl(p.string()); - if (!Implementation::isImplementation(dl)) { + if (!isImplementation(dl)) continue; - } fres.emplace_back(Implementation(std::move(dl))); } catch (...) {} } @@ -134,18 +131,13 @@ const LLModel::Implementation* LLModel::Implementation::implementation(const cha return &i; } - if (!buildVariantMatched) { - std::cerr << "LLModel ERROR: Could not find any implementations for build variant: " << buildVariant << "\n"; - } - return nullptr; + if (!buildVariantMatched) + throw std::runtime_error("Could not find any implementations for build variant: " + buildVariant); + + return nullptr; // unsupported model format } LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::string buildVariant, int n_ctx) { - if (!has_at_least_minimal_hardware()) { - std::cerr << "LLModel ERROR: CPU does not support AVX\n"; - return nullptr; - } - // Get correct implementation const Implementation* impl = nullptr; @@ -178,7 +170,7 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s if (!impl) { //TODO: Auto-detect CUDA/OpenCL if (buildVariant == "auto") { - if (requires_avxonly()) { + if (cpu_supports_avx2() == 0) { buildVariant = "avxonly"; } else { buildVariant = "default"; @@ -196,15 +188,24 @@ LLModel *LLModel::Implementation::construct(const std::string &modelPath, std::s LLModel *LLModel::Implementation::constructDefaultLlama() { static std::unique_ptr llama([]() -> LLModel * { + const std::vector *impls; + try { + impls = &implementationList(); + } catch (const std::runtime_error &e) { + std::cerr << __func__ << ": implementationList failed: " << e.what() << "\n"; + return nullptr; + } + const LLModel::Implementation *impl = nullptr; - for (const auto &i : implementationList()) { + for (const auto &i: *impls) { if (i.m_buildVariant == "metal" || i.m_modelType != "LLaMA") continue; impl = &i; } if (!impl) { - std::cerr << "LLModel ERROR: Could not find CPU LLaMA implementation\n"; + std::cerr << __func__ << ": could not find llama.cpp implementation\n"; return nullptr; } + auto fres = impl->m_construct(); fres->m_implementation = impl; return fres; @@ -240,3 +241,7 @@ void LLModel::Implementation::setImplementationsSearchPath(const std::string& pa const std::string& LLModel::Implementation::implementationsSearchPath() { return s_implementations_search_path; } + +bool LLModel::Implementation::hasSupportedCPU() { + return cpu_supports_avx() != 0; +} diff --git a/gpt4all-backend/llmodel.h b/gpt4all-backend/llmodel.h index 0c76efd7..ac7f2055 100644 --- a/gpt4all-backend/llmodel.h +++ b/gpt4all-backend/llmodel.h @@ -30,7 +30,6 @@ public: class Implementation { public: - Implementation(Dlhandle &&); Implementation(const Implementation &) = delete; Implementation(Implementation &&); ~Implementation(); @@ -38,9 +37,6 @@ public: std::string_view modelType() const { return m_modelType; } std::string_view buildVariant() const { return m_buildVariant; } - static bool isImplementation(const Dlhandle &dl); - static const std::vector &implementationList(); - static const Implementation *implementation(const char *fname, const std::string &buildVariant); static LLModel *construct(const std::string &modelPath, std::string buildVariant = "auto", int n_ctx = 2048); static std::vector availableGPUDevices(); static int32_t maxContextLength(const std::string &modelPath); @@ -48,8 +44,13 @@ public: static bool isEmbeddingModel(const std::string &modelPath); static void setImplementationsSearchPath(const std::string &path); static const std::string &implementationsSearchPath(); + static bool hasSupportedCPU(); private: + Implementation(Dlhandle &&); + + static const std::vector &implementationList(); + static const Implementation *implementation(const char *fname, const std::string &buildVariant); static LLModel *constructDefaultLlama(); bool (*m_magicMatch)(const char *fname); diff --git a/gpt4all-backend/llmodel_c.cpp b/gpt4all-backend/llmodel_c.cpp index 40e41e82..e0809f1d 100644 --- a/gpt4all-backend/llmodel_c.cpp +++ b/gpt4all-backend/llmodel_c.cpp @@ -13,8 +13,6 @@ struct LLModelWrapper { ~LLModelWrapper() { delete llModel; } }; -thread_local static std::string last_error_message; - llmodel_model llmodel_model_create(const char *model_path) { const char *error; auto fres = llmodel_model_create2(model_path, "auto", &error); @@ -24,24 +22,30 @@ llmodel_model llmodel_model_create(const char *model_path) { return fres; } +static void llmodel_set_error(const char **errptr, const char *message) { + thread_local static std::string last_error_message; + if (errptr) { + last_error_message = message; + *errptr = last_error_message.c_str(); + } +} + llmodel_model llmodel_model_create2(const char *model_path, const char *build_variant, const char **error) { - auto wrapper = new LLModelWrapper; - + LLModel *llModel; try { - wrapper->llModel = LLModel::Implementation::construct(model_path, build_variant); - if (!wrapper->llModel) { - last_error_message = "Model format not supported (no matching implementation found)"; - } + llModel = LLModel::Implementation::construct(model_path, build_variant); } catch (const std::exception& e) { - last_error_message = e.what(); + llmodel_set_error(error, e.what()); + return nullptr; } - if (!wrapper->llModel) { - delete std::exchange(wrapper, nullptr); - if (error) { - *error = last_error_message.c_str(); - } + if (!llModel) { + llmodel_set_error(error, "Model format not supported (no matching implementation found)"); + return nullptr; } + + auto wrapper = new LLModelWrapper; + wrapper->llModel = llModel; return wrapper; } @@ -159,8 +163,7 @@ float *llmodel_embed( auto *wrapper = static_cast(model); if (!texts || !*texts) { - if (error) - *error = strdup("'texts' is NULL or empty"); + llmodel_set_error(error, "'texts' is NULL or empty"); return nullptr; } @@ -183,8 +186,7 @@ float *llmodel_embed( embedding = new float[embd_size]; wrapper->llModel->embed(textsVec, embedding, prefixStr, dimensionality, do_mean, atlas); } catch (std::exception const &e) { - if (error) - *error = strdup(e.what()); + llmodel_set_error(error, e.what()); return nullptr; } diff --git a/gpt4all-bindings/python/gpt4all/_pyllmodel.py b/gpt4all-bindings/python/gpt4all/_pyllmodel.py index a1cb13b2..dd090e8f 100644 --- a/gpt4all-bindings/python/gpt4all/_pyllmodel.py +++ b/gpt4all-bindings/python/gpt4all/_pyllmodel.py @@ -185,7 +185,7 @@ class LLModel: model = llmodel.llmodel_model_create2(self.model_path, b"auto", ctypes.byref(err)) if model is None: s = err.value - raise ValueError(f"Unable to instantiate model: {'null' if s is None else s.decode()}") + raise RuntimeError(f"Unable to instantiate model: {'null' if s is None else s.decode()}") self.model = model def __del__(self): diff --git a/gpt4all-chat/llm.cpp b/gpt4all-chat/llm.cpp index f9cb698b..3f562da7 100644 --- a/gpt4all-chat/llm.cpp +++ b/gpt4all-chat/llm.cpp @@ -1,4 +1,5 @@ #include "llm.h" +#include "../gpt4all-backend/llmodel.h" #include "../gpt4all-backend/sysinfo.h" #include @@ -25,22 +26,8 @@ LLM *LLM::globalInstance() LLM::LLM() : QObject{nullptr} - , m_compatHardware(true) + , m_compatHardware(LLModel::Implementation::hasSupportedCPU()) { -#if defined(__x86_64__) - #ifndef _MSC_VER - const bool minimal(__builtin_cpu_supports("avx")); - #else - int cpuInfo[4]; - __cpuid(cpuInfo, 1); - const bool minimal(cpuInfo[2] & (1 << 28)); - #endif -#else - const bool minimal = true; // Don't know how to handle non-x86_64 -#endif - - m_compatHardware = minimal; - QNetworkInformation::loadDefaultBackend(); auto * netinfo = QNetworkInformation::instance(); if (netinfo) { diff --git a/gpt4all-chat/modellist.cpp b/gpt4all-chat/modellist.cpp index 13a9d3cf..4cfd097a 100644 --- a/gpt4all-chat/modellist.cpp +++ b/gpt4all-chat/modellist.cpp @@ -228,11 +228,11 @@ int ModelInfo::maxContextLength() const if (!installed || isOnline) return -1; if (m_maxContextLength != -1) return m_maxContextLength; auto path = (dirpath + filename()).toStdString(); - int layers = LLModel::Implementation::maxContextLength(path); - if (layers < 0) { - layers = 4096; // fallback value + int n_ctx = LLModel::Implementation::maxContextLength(path); + if (n_ctx < 0) { + n_ctx = 4096; // fallback value } - m_maxContextLength = layers; + m_maxContextLength = n_ctx; return m_maxContextLength; }