llamamodel: fix embedding crash for >512 tokens after #2310 (#2383)

Signed-off-by: Jared Van Bortel <jared@nomic.ai>
2024-09-11 21:27:37 +03:00 · 2024-05-29 10:51:00 -04:00 · 2024-05-29 10:51:00 -04:00 · e94177ee9a
commit e94177ee9a
parent f047f383d0
1 changed files with 2 additions and 1 deletions
--- a/gpt4all-backend/llamamodel.cpp
+++ b/gpt4all-backend/llamamodel.cpp
@ -386,7 +386,8 @@ bool LLamaModel::loadModel(const std::string &modelPath, int n_ctx, int ngl)
    bool isEmbedding = is_embedding_arch(llama_model_arch(d_ptr->model));
    const int n_ctx_train = llama_n_ctx_train(d_ptr->model);
    if (isEmbedding) {
-        d_ptr->ctx_params.n_batch = n_ctx;
+        d_ptr->ctx_params.n_batch  = n_ctx;
+        d_ptr->ctx_params.n_ubatch = n_ctx;
    } else {
        if (n_ctx > n_ctx_train) {
            std::cerr << "warning: model was trained on only " << n_ctx_train << " context tokens ("