typescript: fix final bugs and polishing, circle ci documentation (#960)

* fix: esm and cjs compatibility

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* Update prebuild.js

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* fix gpt4all.js

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* Fix compile for windows and linux again. PLEASE DON'T REVERT THISgit gui!

* version bump

* polish up spec and build scripts

* lock file refresh

* fix: proper resource closing and error handling

* check make sure libPath not null

* add msvc build script and update readme requirements

* python workflows in circleci

* dummy python change

* no need for main

* second hold for pypi deploy

* let me deploy pls

* bring back when condition

* Typo, ignore list  (#967)

Fix typo in javadoc,
Add word to ignore list for codespellrc

---------

Co-authored-by: felix <felix@zaslavskiy.net>

* llmodel: change tokenToString to not use string_view (#968)

fixes a definite use-after-free and likely avoids some other
potential ones - std::string will convert to a std::string_view
automatically but as soon as the std::string in question goes out of
scope it is already freed and the string_view is pointing at freed
memory - this is *mostly* fine if its returning a reference to the
tokenizer's internal vocab table but it's, imo, too easy to return a
reference to a dynamically constructed string with this as replit is
doing (and unfortunately needs to do to convert the internal whitespace
replacement symbol back to a space)

* Initial Library Loader for .NET Bindings / Update bindings to support newest changes (#763)

* Initial Library Loader

* Load library as part of Model factory

* Dynamically search and find the dlls

* Update tests to use locally built runtimes

* Fix dylib loading, add macos runtime support for sample/tests

* Bypass automatic loading by default.

* Only set CMAKE_OSX_ARCHITECTURES if not already set, allow cross-compile

* Switch Loading again

* Update build scripts for mac/linux

* Update bindings to support newest breaking changes

* Fix build

* Use llmodel for Windows

* Actually, it does need to be libllmodel

* Name

* Remove TFMs, bypass loading by default

* Fix script

* Delete mac script

---------

Co-authored-by: Tim Miller <innerlogic4321@ghmail.com>

* bump llama.cpp mainline to latest (#964)

* fix prompt context so it's preserved in class

* update setup.py

* metal replit (#931)

metal+replit

makes replit work with Metal and removes its use of `mem_per_token`
in favor of fixed size scratch buffers (closer to llama.cpp)

* update documentation scripts and generation to include readme.md

* update readme and documentation for source

* begin tests, import jest, fix listModels export

* fix typo

* chore: update spec

* fix: finally, reduced potential of empty string

* chore: add stub for createTokenSream

* refactor: protecting resources properly

* add basic jest tests

* update

* update readme

* refactor: namespace the res variable

* circleci integration to automatically build docs

* add starter docs

* typo

* more circle ci typo

* forgot to add nodejs circle ci orb

* fix circle ci

* feat: @iimez verify download and fix prebuild script

* fix: oops, option name wrong

* fix: gpt4all utils not emitting docs

* chore: fix up scripts

* fix: update docs and typings for md5 sum

* fix: macos compilation

* some refactoring

* Update index.cc

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* update readme and enable exceptions on mac

* circle ci progress

* basic embedding with sbert (not tested & cpp side only)

* fix circle ci

* fix circle ci

* update circle ci script

* bruh

* fix again

* fix

* fixed required workflows

* fix ci

* fix pwd

* fix pwd

* update ci

* revert

* fix

* prevent rebuild

* revmove noop

* Update continue_config.yml

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* Update binding.gyp

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* fix fs not found

* remove cpp 20 standard

* fix warnings, safer way to calculate arrsize

* readd build backend

* basic embeddings and yarn test"

* fix circle ci

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

Update continue_config.yml

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

fix macos paths

update readme and roadmap

split up spec

update readme

check for url in modelsjson

update docs and inline stuff

update yarn configuration and readme

update readme

readd npm publish script

add exceptions

bruh one space broke the yaml

codespell

oops forgot to add runtimes folder

bump version

try code snippet https://support.circleci.com/hc/en-us/articles/8325075309339-How-to-install-NPM-on-Windows-images

add fallback for unknown architectures

attached to wrong workspace

hopefuly fix

moving everything under backend to persist

should work now

* update circle ci script

* prevent rebuild

* revmove noop

* Update continue_config.yml

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* Update binding.gyp

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

* fix fs not found

* remove cpp 20 standard

* fix warnings, safer way to calculate arrsize

* readd build backend

* basic embeddings and yarn test"

* fix circle ci

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

Update continue_config.yml

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

fix macos paths

update readme and roadmap

split up spec

update readme

check for url in modelsjson

update docs and inline stuff

update yarn configuration and readme

update readme

readd npm publish script

add exceptions

bruh one space broke the yaml

codespell

oops forgot to add runtimes folder

bump version

try code snippet https://support.circleci.com/hc/en-us/articles/8325075309339-How-to-install-NPM-on-Windows-images

add fallback for unknown architectures

attached to wrong workspace

hopefuly fix

moving everything under backend to persist

should work now

* Update README.md

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>

---------

Signed-off-by: Jacob Nguyen <76754747+jacoobes@users.noreply.github.com>
Co-authored-by: Adam Treat <treat.adam@gmail.com>
Co-authored-by: Richard Guo <richardg7890@gmail.com>
Co-authored-by: Felix Zaslavskiy <felix.zaslavskiy@gmail.com>
Co-authored-by: felix <felix@zaslavskiy.net>
Co-authored-by: Aaron Miller <apage43@ninjawhale.com>
Co-authored-by: Tim Miller <drasticactions@users.noreply.github.com>
Co-authored-by: Tim Miller <innerlogic4321@ghmail.com>
This commit is contained in:
Jacob Nguyen 2023-07-25 10:46:40 -05:00 committed by GitHub
parent b3f84c56e7
commit 545c23b4bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 8091 additions and 3205 deletions

View File

@ -12,6 +12,7 @@ workflows:
config-path: .circleci/continue_config.yml
mapping: |
gpt4all-bindings/python/.* run-python-workflow true
gpt4all-bindings/typescript/.* run-ts-workflow true
gpt4all-bindings/csharp/.* run-csharp-workflow true
gpt4all-backend/.* run-chat-workflow true
gpt4all-chat/.* run-chat-workflow true

View File

@ -2,6 +2,7 @@ version: 2.1
orbs:
win: circleci/windows@5.0
python: circleci/python@1.2
node: circleci/node@5.1
parameters:
run-default-workflow:
@ -13,6 +14,9 @@ parameters:
run-chat-workflow:
type: boolean
default: false
run-ts-workflow:
type: boolean
default: false
run-csharp-workflow:
type: boolean
default: false
@ -156,12 +160,26 @@ jobs:
-S ../gpt4all-chat \
-B .
~/Qt/Tools/CMake/CMake.app/Contents/bin/cmake --build . --target all
build-ts-docs:
docker:
- image: cimg/base:stable
steps:
- checkout
- node/install:
install-yarn: true
node-version: "18.16"
- run: node --version
- node/install-packages:
pkg-manager: yarn
- run:
name: build docs ts yo
command: yarn docs:build
build-py-docs:
docker:
- image: circleci/python:3.8
steps:
- checkout
- checkout
- run:
name: Install dependencies
command: |
@ -612,6 +630,160 @@ jobs:
- store_artifacts:
path: gpt4all-bindings/csharp/Gpt4All/bin/Release
build-nodejs-linux:
docker:
- image: cimg/base:stable
steps:
- checkout
- attach_workspace:
at: /tmp/gpt4all-backend
- node/install:
install-yarn: true
node-version: "18.16"
- run: node --version
- node/install-packages:
app-dir: gpt4all-bindings/typescript
pkg-manager: yarn
- run:
command: |
cd gpt4all-bindings/typescript
yarn prebuildify -t 18.16.0 --napi
- run:
command: |
mkdir -p gpt4all-backend/prebuilds/linux-x64
mkdir -p gpt4all-backend/runtimes/linux-x64
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so gpt4all-backend/runtimes/linux-x64
cp gpt4all-bindings/typescript/prebuilds/linux-x64/*.node gpt4all-backend/prebuilds/linux-x64
- persist_to_workspace:
root: gpt4all-backend
paths:
- prebuilds/linux-x64/*.node
- runtimes/linux-x64/*-*.so
build-nodejs-macos:
macos:
xcode: "14.0.0"
steps:
- checkout
- attach_workspace:
at: /tmp/gpt4all-backend
- node/install:
install-yarn: true
node-version: "18.16"
- run: node --version
- node/install-packages:
app-dir: gpt4all-bindings/typescript
pkg-manager: yarn
- run:
command: |
cd gpt4all-bindings/typescript
yarn prebuildify -t 18.16.0 --napi
- run:
name: "Persisting all necessary things to workspace"
command: |
mkdir -p gpt4all-backend/prebuilds/darwin-x64
mkdir -p gpt4all-backend/runtimes/darwin-x64
cp /tmp/gpt4all-backend/runtimes/osx-x64/*-*.* gpt4all-backend/runtimes/darwin-x64
cp gpt4all-bindings/typescript/prebuilds/darwin-x64/*.node gpt4all-backend/prebuilds/darwin-x64
- persist_to_workspace:
root: gpt4all-backend
paths:
- prebuilds/darwin-x64/*.node
- runtimes/darwin-x64/*-*.*
build-nodejs-windows:
executor:
name: win/default
size: large
shell: powershell.exe -ExecutionPolicy Bypass
steps:
- checkout
- attach_workspace:
at: /tmp/gpt4all-backend
- run: choco install wget -y
- run:
command: wget https://nodejs.org/dist/v18.16.0/node-v18.16.0-x86.msi -P C:\Users\circleci\Downloads\
shell: cmd.exe
- run: MsiExec.exe /i C:\Users\circleci\Downloads\node-v18.16.0-x86.msi /qn
- run:
command: |
Start-Process powershell -verb runAs -Args "-start GeneralProfile"
nvm install 18.16.0
nvm use 18.16.0
- run: node --version
- run:
command: |
npm install -g yarn
cd gpt4all-bindings/typescript
yarn install
- run:
command: |
cd gpt4all-bindings/typescript
yarn prebuildify -t 18.16.0 --napi
- run:
command: |
mkdir -p gpt4all-backend/prebuilds/win32-x64
mkdir -p gpt4all-backend/runtimes/win32-x64
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll gpt4all-backend/runtimes/win32-x64
cp gpt4all-bindings/typescript/prebuilds/win32-x64/*.node gpt4all-backend/prebuilds/win32-x64
- persist_to_workspace:
root: gpt4all-backend
paths:
- prebuilds/win32-x64/*.node
- runtimes/win32-x64/*-*.dll
prepare-npm-pkg:
docker:
- image: cimg/base:stable
steps:
- attach_workspace:
at: /tmp/gpt4all-backend
- checkout
- node/install:
install-yarn: true
node-version: "18.16"
- run: node --version
- run:
command: |
cd gpt4all-bindings/typescript
# excluding llmodel. nodejs bindings dont need llmodel.dll
mkdir -p runtimes/win32-x64/native
mkdir -p prebuilds/win32-x64/
cp /tmp/gpt4all-backend/runtimes/win-x64_msvc/*-*.dll runtimes/win32-x64/native/
cp /tmp/gpt4all-backend/prebuilds/win32-x64/*.node prebuilds/win32-x64/
mkdir -p runtimes/linux-x64/native
mkdir -p prebuilds/linux-x64/
cp /tmp/gpt4all-backend/runtimes/linux-x64/*-*.so runtimes/linux-x64/native/
cp /tmp/gpt4all-backend/prebuilds/linux-x64/*.node prebuilds/linux-x64/
mkdir -p runtimes/darwin-x64/native
mkdir -p prebuilds/darwin-x64/
cp /tmp/gpt4all-backend/runtimes/darwin-x64/*-*.* runtimes/darwin-x64/native/
cp /tmp/gpt4all-backend/prebuilds/darwin-x64/*.node prebuilds/darwin-x64/
# Fallback build if user is not on above prebuilds
mv -f binding.ci.gyp binding.gyp
mkdir gpt4all-backend
cd ../../gpt4all-backend
mv llmodel.h llmodel.cpp llmodel_c.cpp llmodel_c.h sysinfo.h dlhandle.h ../gpt4all-bindings/typescript/gpt4all-backend/
# Test install
- node/install-packages:
app-dir: gpt4all-bindings/typescript
pkg-manager: yarn
override-ci-command: yarn install
- run:
command: |
cd gpt4all-bindings/typescript
yarn run test
- run:
command: |
cd gpt4all-bindings/typescript
npm set //registry.npmjs.org/:_authToken=$NPM_TOKEN
npm publish --access public --tag alpha
workflows:
version: 2
default:
@ -635,6 +807,11 @@ workflows:
deploy-docs:
when: << pipeline.parameters.run-python-workflow >>
jobs:
- build-ts-docs:
filters:
branches:
only:
- main
- build-py-docs:
filters:
branches:
@ -679,11 +856,14 @@ workflows:
or:
- << pipeline.parameters.run-python-workflow >>
- << pipeline.parameters.run-csharp-workflow >>
- << pipeline.parameters.run-ts-workflow >>
jobs:
- hold:
type: approval
- nuget-hold:
type: approval
- npm-hold:
type: approval
- build-bindings-backend-linux:
filters:
branches:
@ -708,6 +888,41 @@ workflows:
only:
requires:
- hold
# NodeJs Jobs
- prepare-npm-pkg:
filters:
branches:
only:
requires:
- npm-hold
- build-nodejs-linux
- build-nodejs-windows
- build-nodejs-macos
- build-nodejs-linux:
filters:
branches:
only:
requires:
- npm-hold
- build-bindings-backend-linux
- build-nodejs-windows:
filters:
branches:
only:
requires:
- npm-hold
- build-bindings-backend-windows-msvc
- build-nodejs-macos:
filters:
branches:
only:
requires:
- npm-hold
- build-bindings-backend-macos
# CSharp Jobs
- build-csharp-linux:
filters:
branches:
@ -719,7 +934,7 @@ workflows:
branches:
only:
requires:
- build-bindings-backend-windows
- build-bindings-backend-windows
- build-csharp-macos:
filters:
branches:

View File

@ -0,0 +1,670 @@
# GPT4All Node.js API
```sh
yarn install gpt4all@alpha
npm install gpt4all@alpha
pnpm install gpt4all@alpha
```
The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date.
* New bindings created by [jacoobes](https://github.com/jacoobes) and the [nomic ai community](https://home.nomic.ai) :D, for all to use.
* [Documentation](#Documentation)
### Code (alpha)
```js
import { createCompletion, loadModel } from '../src/gpt4all.js'
const ll = await loadModel('ggml-vicuna-7b-1.1-q4_2.bin', { verbose: true });
const response = await createCompletion(ll, [
{ role : 'system', content: 'You are meant to be annoying and unhelpful.' },
{ role : 'user', content: 'What is 1 + 1?' }
]);
```
### API
* The nodejs api has made strides to mirror the python api. It is not 100% mirrored, but many pieces of the api resemble its python counterpart.
* Everything should work out the box.
* [docs](./docs/api.md)
### Build Instructions
* As of 05/21/2023, Tested on windows (MSVC). (somehow got it to work on MSVC 🤯)
* binding.gyp is compile config
* Tested on Ubuntu. Everything seems to work fine
* MingW works as well to build the gpt4all-backend. **HOWEVER**, this package works only with MSVC built dlls.
### Requirements
* git
* [node.js >= 18.0.0](https://nodejs.org/en)
* [yarn](https://yarnpkg.com/)
* [node-gyp](https://github.com/nodejs/node-gyp)
* all of its requirements.
* (unix) gcc version 12
* These bindings use the C++ 20 standard.
* (win) msvc version 143
* Can be obtained with visual studio 2022 build tools
### Build
```sh
git clone https://github.com/nomic-ai/gpt4all.git
cd gpt4all-bindings/typescript
```
* The below shell commands assume the current working directory is `typescript`.
* To Build and Rebuild:
```sh
yarn
```
* llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
```sh
git submodule update --init --depth 1 --recursive
```
**AS OF NEW BACKEND** to build the backend,
```sh
yarn build:backend
```
This will build platform-dependent dynamic libraries, and will be located in runtimes/(platform)/native The only current way to use them is to put them in the current working directory of your application. That is, **WHEREVER YOU RUN YOUR NODE APPLICATION**
* llama-xxxx.dll is required.
* According to whatever model you are using, you'll need to select the proper model loader.
* For example, if you running an Mosaic MPT model, you will need to select the mpt-(buildvariant).(dynamiclibrary)
### Test
```sh
yarn test
```
### Source Overview
#### src/
* Extra functions to help aid devex
* Typings for the native node addon
* the javascript interface
#### test/
* simple unit testings for some functions exported.
* more advanced ai testing is not handled
#### spec/
* Average look and feel of the api
* Should work assuming a model and libraries are installed locally in working directory
#### index.cc
* The bridge between nodejs and c. Where the bindings are.
#### prompt.cc
* Handling prompting and inference of models in a threadsafe, asynchronous way.
#### docs/
* Autogenerated documentation using the script `yarn docs:build`
### Roadmap
This package is in active development, and breaking changes may happen until the api stabilizes. Here's what's the todo list:
* \[x] prompt models via a threadsafe function in order to have proper non blocking behavior in nodejs
* \[ ] createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator)
* \[ ] proper unit testing (integrate with circle ci)
* \[ ] publish to npm under alpha tag `gpt4all@alpha`
* \[ ] have more people test on other platforms (mac tester needed)
* \[x] switch to new pluggable backend
### Documentation
<!-- Generated by documentation.js. Update this documentation by updating the source code. -->
##### Table of Contents
* [ModelType](#modeltype)
* [ModelFile](#modelfile)
* [gptj](#gptj)
* [llama](#llama)
* [mpt](#mpt)
* [replit](#replit)
* [type](#type)
* [LLModel](#llmodel)
* [constructor](#constructor)
* [Parameters](#parameters)
* [type](#type-1)
* [name](#name)
* [stateSize](#statesize)
* [threadCount](#threadcount)
* [setThreadCount](#setthreadcount)
* [Parameters](#parameters-1)
* [raw\_prompt](#raw_prompt)
* [Parameters](#parameters-2)
* [embed](#embed)
* [Parameters](#parameters-3)
* [isModelLoaded](#ismodelloaded)
* [setLibraryPath](#setlibrarypath)
* [Parameters](#parameters-4)
* [getLibraryPath](#getlibrarypath)
* [loadModel](#loadmodel)
* [Parameters](#parameters-5)
* [createCompletion](#createcompletion)
* [Parameters](#parameters-6)
* [Examples](#examples)
* [createEmbedding](#createembedding)
* [Parameters](#parameters-7)
* [CompletionOptions](#completionoptions)
* [verbose](#verbose)
* [hasDefaultHeader](#hasdefaultheader)
* [hasDefaultFooter](#hasdefaultfooter)
* [PromptMessage](#promptmessage)
* [role](#role)
* [content](#content)
* [prompt\_tokens](#prompt_tokens)
* [completion\_tokens](#completion_tokens)
* [total\_tokens](#total_tokens)
* [CompletionReturn](#completionreturn)
* [model](#model)
* [usage](#usage)
* [choices](#choices)
* [CompletionChoice](#completionchoice)
* [message](#message)
* [LLModelPromptContext](#llmodelpromptcontext)
* [logits\_size](#logits_size)
* [tokens\_size](#tokens_size)
* [n\_past](#n_past)
* [n\_ctx](#n_ctx)
* [n\_predict](#n_predict)
* [top\_k](#top_k)
* [top\_p](#top_p)
* [temp](#temp)
* [n\_batch](#n_batch)
* [repeat\_penalty](#repeat_penalty)
* [repeat\_last\_n](#repeat_last_n)
* [context\_erase](#context_erase)
* [createTokenStream](#createtokenstream)
* [Parameters](#parameters-8)
* [DEFAULT\_DIRECTORY](#default_directory)
* [DEFAULT\_LIBRARIES\_DIRECTORY](#default_libraries_directory)
* [downloadModel](#downloadmodel)
* [Parameters](#parameters-9)
* [Examples](#examples-1)
* [DownloadModelOptions](#downloadmodeloptions)
* [modelPath](#modelpath)
* [debug](#debug)
* [url](#url)
* [md5sum](#md5sum)
* [DownloadController](#downloadcontroller)
* [cancel](#cancel)
* [promise](#promise)
#### ModelType
Type of the model
Type: (`"gptj"` | `"llama"` | `"mpt"` | `"replit"`)
#### ModelFile
Full list of models available
##### gptj
List of GPT-J Models
Type: (`"ggml-gpt4all-j-v1.3-groovy.bin"` | `"ggml-gpt4all-j-v1.2-jazzy.bin"` | `"ggml-gpt4all-j-v1.1-breezy.bin"` | `"ggml-gpt4all-j.bin"`)
##### llama
List Llama Models
Type: (`"ggml-gpt4all-l13b-snoozy.bin"` | `"ggml-vicuna-7b-1.1-q4_2.bin"` | `"ggml-vicuna-13b-1.1-q4_2.bin"` | `"ggml-wizardLM-7B.q4_2.bin"` | `"ggml-stable-vicuna-13B.q4_2.bin"` | `"ggml-nous-gpt4-vicuna-13b.bin"` | `"ggml-v3-13b-hermes-q5_1.bin"`)
##### mpt
List of MPT Models
Type: (`"ggml-mpt-7b-base.bin"` | `"ggml-mpt-7b-chat.bin"` | `"ggml-mpt-7b-instruct.bin"`)
##### replit
List of Replit Models
Type: `"ggml-replit-code-v1-3b.bin"`
#### type
Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
Type: [ModelType](#modeltype)
#### LLModel
LLModel class representing a language model.
This is a base class that provides common functionality for different types of language models.
##### constructor
Initialize a new LLModel.
###### Parameters
* `path` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** Absolute path to the model file.
<!---->
* Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model file does not exist.
##### type
either 'gpt', mpt', or 'llama' or undefined
Returns **([ModelType](#modeltype) | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))**&#x20;
##### name
The name of the model.
Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
##### stateSize
Get the size of the internal state of the model.
NOTE: This state data is specific to the type of model you have created.
Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** the size in bytes of the internal state of the model
##### threadCount
Get the number of threads used for model inference.
The default is the number of physical cores your computer has.
Returns **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The number of threads used for model inference.
##### setThreadCount
Set the number of threads used for model inference.
###### Parameters
* `newNumber` **[number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)** The new number of threads.
Returns **void**&#x20;
##### raw\_prompt
Prompt the model with a given input and optional parameters.
This is the raw output from model.
Use the prompt function exported for a value
###### Parameters
* `q` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The prompt input.
* `params` **Partial<[LLModelPromptContext](#llmodelpromptcontext)>** Optional parameters for the prompt context.
* `callback` **function (res: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)): void**&#x20;
Returns **void** The result of the model prompt.
##### embed
Embed text with the model. Keep in mind that
not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
Use the prompt function exported for a value
###### Parameters
* `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
* `q` The prompt input.
* `params` Optional parameters for the prompt context.
Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The result of the model prompt.
##### isModelLoaded
Whether the model is loaded or not.
Returns **[boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)**&#x20;
##### setLibraryPath
Where to search for the pluggable backend libraries
###### Parameters
* `s` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
Returns **void**&#x20;
##### getLibraryPath
Where to get the pluggable backend libraries
Returns **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)**&#x20;
#### loadModel
Loads a machine learning model with the specified name. The defacto way to create a model.
By default this will download a model from the official GPT4ALL website, if a model is not present at given path.
##### Parameters
* `modelName` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** The name of the model to load.
* `options` **(LoadModelOptions | [undefined](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined))?** (Optional) Additional options for loading the model.
Returns **[Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)<[LLModel](#llmodel)>** A promise that resolves to an instance of the loaded LLModel.
#### createCompletion
The nodejs equivalent to python binding's chat\_completion
##### Parameters
* `llmodel` **[LLModel](#llmodel)** The language model object.
* `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>** The array of messages for the conversation.
* `options` **[CompletionOptions](#completionoptions)** The options for creating the completion.
##### Examples
```javascript
const llmodel = new LLModel(model)
const messages = [
{ role: 'system', message: 'You are a weather forecaster.' },
{ role: 'user', message: 'should i go out today?' } ]
const completion = await createCompletion(llmodel, messages, {
verbose: true,
temp: 0.9,
})
console.log(completion.choices[0].message.content)
// No, it's going to be cold and rainy.
```
Returns **[CompletionReturn](#completionreturn)** The completion result.
#### createEmbedding
The nodejs moral equivalent to python binding's Embed4All().embed()
meow
##### Parameters
* `llmodel` **[LLModel](#llmodel)** The language model object.
* `text` **[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)** text to embed
Returns **[Float32Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Float32Array)** The completion result.
#### CompletionOptions
**Extends Partial\<LLModelPromptContext>**
The options for creating the completion.
##### verbose
Indicates if verbose logging is enabled.
Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
##### hasDefaultHeader
Indicates if the default header is included in the prompt.
Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
##### hasDefaultFooter
Indicates if the default footer is included in the prompt.
Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
#### PromptMessage
A message in the conversation, identical to OpenAI's chat message.
##### role
The role of the message.
Type: (`"system"` | `"assistant"` | `"user"`)
##### content
The message content.
Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
#### prompt\_tokens
The number of tokens used in the prompt.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
#### completion\_tokens
The number of tokens used in the completion.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
#### total\_tokens
The total number of tokens used.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
#### CompletionReturn
The result of the completion, similar to OpenAI's format.
##### model
The model name.
Type: [ModelFile](#modelfile)
##### usage
Token usage report.
Type: {prompt\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), completion\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number), total\_tokens: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)}
##### choices
The generated completions.
Type: [Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[CompletionChoice](#completionchoice)>
#### CompletionChoice
A completion choice, similar to OpenAI's format.
##### message
Response message
Type: [PromptMessage](#promptmessage)
#### LLModelPromptContext
Model inference arguments for generating completions.
##### logits\_size
The size of the raw logits vector.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### tokens\_size
The size of the raw tokens vector.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### n\_past
The number of tokens in the past conversation.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### n\_ctx
The number of tokens possible in the context window.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### n\_predict
The number of tokens to predict.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### top\_k
The top-k logits to sample from.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### top\_p
The nucleus sampling probability threshold.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### temp
The temperature to adjust the model's output distribution.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### n\_batch
The number of predictions to generate in parallel.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### repeat\_penalty
The penalty factor for repeated tokens.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### repeat\_last\_n
The number of last tokens to penalize.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
##### context\_erase
The percentage of context to erase if the context window is exceeded.
Type: [number](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number)
#### createTokenStream
TODO: Help wanted to implement this
##### Parameters
* `llmodel` **[LLModel](#llmodel)**&#x20;
* `messages` **[Array](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array)<[PromptMessage](#promptmessage)>**&#x20;
* `options` **[CompletionOptions](#completionoptions)**&#x20;
Returns **function (ll: [LLModel](#llmodel)): AsyncGenerator<[string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)>**&#x20;
#### DEFAULT\_DIRECTORY
From python api:
models will be stored in (homedir)/.cache/gpt4all/\`
Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
#### DEFAULT\_LIBRARIES\_DIRECTORY
From python api:
The default path for dynamic libraries to be stored.
You may separate paths by a semicolon to search in multiple areas.
This searches DEFAULT\_DIRECTORY/libraries, cwd/libraries, and finally cwd.
Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
#### downloadModel
Initiates the download of a model file of a specific model type.
By default this downloads without waiting. use the controller returned to alter this behavior.
##### Parameters
* `modelName` **[ModelFile](#modelfile)** The model file to be downloaded.
* `options` **DownloadOptions** to pass into the downloader. Default is { location: (cwd), debug: false }.
##### Examples
```javascript
const controller = download('ggml-gpt4all-j-v1.3-groovy.bin')
controller.promise().then(() => console.log('Downloaded!'))
```
* Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model already exists in the specified location.
* Throws **[Error](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error)** If the model cannot be found at the specified url.
Returns **[DownloadController](#downloadcontroller)** object that allows controlling the download process.
#### DownloadModelOptions
Options for the model download process.
##### modelPath
location to download the model.
Default is process.cwd(), or the current working directory
Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
##### debug
Debug mode -- check how long it took to download in seconds
Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
##### url
Remote download url. Defaults to `https://gpt4all.io/models`
Type: [string](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String)
##### md5sum
Whether to verify the hash of the download to ensure a proper download occurred.
Type: [boolean](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean)
#### DownloadController
Model download controller.
##### cancel
Cancel the request to download from gpt4all website if this is called.
Type: function (): void
##### promise
Convert the downloader into a promise, allowing people to await and manage its lifetime
Type: function (): [Promise](https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise)\<void>

View File

@ -13,6 +13,7 @@ nav:
- 'GPT4All in Python':
- 'Generation': 'gpt4all_python.md'
- 'Embedding': 'gpt4all_python_embedding.md'
- 'GPT4ALL in NodeJs': 'gpt4all_typescript.md'
- 'GPT4All Chat Client': 'gpt4all_chat.md'
- 'gpt4all_cli.md'
# - 'Tutorials':

View File

@ -1,3 +1,10 @@
node_modules/
build/
prebuilds/
.yarn/*
!.yarn/patches
!.yarn/plugins
!.yarn/releases
!.yarn/sdks
!.yarn/versions
runtimes/

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
yarnPath: .yarn/releases/yarn-3.6.1.cjs

View File

@ -1,17 +1,24 @@
### Javascript Bindings
# GPT4All Node.js API
```sh
yarn add gpt4all@alpha
npm install gpt4all@alpha
pnpm install gpt4all@alpha
```
The original [GPT4All typescript bindings](https://github.com/nomic-ai/gpt4all-ts) are now out of date.
- created by [jacoobes](https://github.com/jacoobes) and [nomic ai](https://home.nomic.ai) :D, for all to use.
* New bindings created by [jacoobes](https://github.com/jacoobes), [limez](https://github.com/iimez) and the [nomic ai community](https://home.nomic.ai), for all to use.
* [Documentation](#Documentation)
### Chat Completion (alpha)
### Code (alpha)
```js
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY } from '../src/gpt4all.js'
import { createCompletion, loadModel } from '../src/gpt4all.js'
const ll = new LLModel({
model_name: 'ggml-vicuna-7b-1.1-q4_2.bin',
model_path: './',
library_path: DEFAULT_LIBRARIES_DIRECTORY
});
const ll = await loadModel('ggml-vicuna-7b-1.1-q4_2', { verbose: true });
const response = await createCompletion(ll, [
{ role : 'system', content: 'You are meant to be annoying and unhelpful.' },
@ -19,80 +26,125 @@ const response = await createCompletion(ll, [
]);
```
### API
- The nodejs api has made strides to mirror the python api. It is not 100% mirrored, but many pieces of the api resemble its python counterpart.
- [docs](./docs/api.md)
### Embedding (alpha)
```js
import { createEmbedding, loadModel } from '../src/gpt4all.js'
const ll = await loadModel('ggml-all-MiniLM-L6-v2-f16', { verbose: true });
const fltArray = createEmbedding(ll, "Pain is inevitable, suffering optional");
```
### API
* The nodejs api has made strides to mirror the python api. It is not 100% mirrored, but many pieces of the api resemble its python counterpart.
* Everything should work out the box.
* [docs](./docs/api.md)
### Build Instructions
- As of 05/21/2023, Tested on windows (MSVC). (somehow got it to work on MSVC 🤯)
- binding.gyp is compile config
- Tested on Ubuntu. Everything seems to work fine
- MingW works as well to build the gpt4all-backend. HOWEVER, this package works only with MSVC built dlls.
* binding.gyp is compile config
* Tested on Ubuntu. Everything seems to work fine
* Tested on Windows. Everything works fine.
* Sparse testing on mac os.
* MingW works as well to build the gpt4all-backend. **HOWEVER**, this package works only with MSVC built dlls.
### Requirements
- git
- [node.js >= 18.0.0](https://nodejs.org/en)
- [yarn](https://yarnpkg.com/)
- [node-gyp](https://github.com/nodejs/node-gyp)
- all of its requirements.
### Build
* git
* [node.js >= 18.0.0](https://nodejs.org/en)
* [yarn](https://yarnpkg.com/)
* [node-gyp](https://github.com/nodejs/node-gyp)
* all of its requirements.
* (unix) gcc version 12
* (win) msvc version 143
* Can be obtained with visual studio 2022 build tools
* python 3
### Build (from source)
```sh
git clone https://github.com/nomic-ai/gpt4all.git
cd gpt4all-bindings/typescript
```
- The below shell commands assume the current working directory is `typescript`.
- To Build and Rebuild:
```sh
yarn
```
- llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
```sh
* The below shell commands assume the current working directory is `typescript`.
* To Build and Rebuild:
```sh
yarn
```
* llama.cpp git submodule for gpt4all can be possibly absent. If this is the case, make sure to run in llama.cpp parent directory
```sh
git submodule update --init --depth 1 --recursive
```
```
**AS OF NEW BACKEND** to build the backend,
```sh
yarn build:backend
```
This will build platform-dependent dynamic libraries, and will be located in runtimes/(platform)/native The only current way to use them is to put them in the current working directory of your application. That is, **WHEREVER YOU RUN YOUR NODE APPLICATION**
- llama-xxxx.dll is required.
- According to whatever model you are using, you'll need to select the proper model loader.
- For example, if you running an Mosaic MPT model, you will need to select the mpt-(buildvariant).(dynamiclibrary)
* llama-xxxx.dll is required.
* According to whatever model you are using, you'll need to select the proper model loader.
* For example, if you running an Mosaic MPT model, you will need to select the mpt-(buildvariant).(dynamiclibrary)
### Test
```sh
yarn test
```
### Source Overview
#### src/
- Extra functions to help aid devex
- Typings for the native node addon
- the javascript interface
* Extra functions to help aid devex
* Typings for the native node addon
* the javascript interface
#### test/
- simple unit testings for some functions exported.
- more advanced ai testing is not handled
* simple unit testings for some functions exported.
* more advanced ai testing is not handled
#### spec/
- Average look and feel of the api
- Should work assuming a model and libraries are installed locally in working directory
* Average look and feel of the api
* Should work assuming a model and libraries are installed locally in working directory
#### index.cc
- The bridge between nodejs and c. Where the bindings are.
#### prompt.cc
- Handling prompting and inference of models in a threadsafe, asynchronous way.
#### docs/
- Autogenerated documentation using the script `yarn docs:build`
* The bridge between nodejs and c. Where the bindings are.
#### prompt.cc
* Handling prompting and inference of models in a threadsafe, asynchronous way.
#### docs/
* Autogenerated documentation using the script `yarn docs:build`
### Known Issues
* why your model may be spewing bull 💩
- The downloaded model is broken (just reinstall or download from official site)
- That's it so far
### Roadmap
This package is in active development, and breaking changes may happen until the api stabilizes. Here's what's the todo list:
- [x] prompt models via a threadsafe function in order to have proper non blocking behavior in nodejs
- [ ] createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator)
- [ ] proper unit testing (integrate with circle ci)
- [ ] publish to npm under alpha tag `gpt4all@alpha`
- [ ] have more people test on other platforms (mac tester needed)
- [x] switch to new pluggable backend
* \[x] prompt models via a threadsafe function in order to have proper non blocking behavior in nodejs
* \[ ] ~~createTokenStream, an async iterator that streams each token emitted from the model. Planning on following this [example](https://github.com/nodejs/node-addon-examples/tree/main/threadsafe-async-iterator)~~ May not implement unless someone else can complete
* \[x] proper unit testing (integrate with circle ci)
* \[x] publish to npm under alpha tag `gpt4all@alpha`
* \[x] have more people test on other platforms (mac tester needed)
* \[x] switch to new pluggable backend
* \[ ] NPM bundle size reduction via optionalDependencies strategy (need help)
- Should include prebuilds to avoid painful node-gyp errors
* \[ ] createChatSession ( the python equivalent to create\_chat\_session )
### Documentation

View File

@ -0,0 +1,62 @@
{
"targets": [
{
"target_name": "gpt4all", # gpt4all-ts will cause compile error
"include_dirs": [
"<!@(node -p \"require('node-addon-api').include\")",
"gpt4all-backend",
],
"sources": [
# PREVIOUS VERSION: had to required the sources, but with newest changes do not need to
#"../../gpt4all-backend/llama.cpp/examples/common.cpp",
#"../../gpt4all-backend/llama.cpp/ggml.c",
#"../../gpt4all-backend/llama.cpp/llama.cpp",
# "../../gpt4all-backend/utils.cpp",
"gpt4all-backend/llmodel_c.cpp",
"gpt4all-backend/llmodel.cpp",
"prompt.cc",
"index.cc",
],
"conditions": [
['OS=="mac"', {
'xcode_settings': {
'GCC_ENABLE_CPP_EXCEPTIONS': 'YES'
},
'defines': [
'LIB_FILE_EXT=".dylib"',
'NAPI_CPP_EXCEPTIONS',
],
'cflags_cc': [
"-fexceptions"
]
}],
['OS=="win"', {
'defines': [
'LIB_FILE_EXT=".dll"',
'NAPI_CPP_EXCEPTIONS',
],
"msvs_settings": {
"VCCLCompilerTool": {
"AdditionalOptions": [
"/std:c++20",
"/EHsc",
],
},
},
}],
['OS=="linux"', {
'defines': [
'LIB_FILE_EXT=".so"',
'NAPI_CPP_EXCEPTIONS',
],
'cflags_cc!': [
'-fno-rtti',
],
'cflags_cc': [
'-std=c++2a',
'-fexceptions'
]
}]
]
}]
}

View File

@ -2,7 +2,6 @@
"targets": [
{
"target_name": "gpt4all", # gpt4all-ts will cause compile error
"cflags_cc!": [ "-fno-exceptions"],
"include_dirs": [
"<!@(node -p \"require('node-addon-api').include\")",
"../../gpt4all-backend",
@ -20,9 +19,15 @@
],
"conditions": [
['OS=="mac"', {
'xcode_settings': {
'GCC_ENABLE_CPP_EXCEPTIONS': 'YES'
},
'defines': [
'LIB_FILE_EXT=".dylib"',
'NAPI_CPP_EXCEPTIONS',
],
'cflags_cc': [
"-fexceptions"
]
}],
['OS=="win"', {
@ -48,7 +53,8 @@
'-fno-rtti',
],
'cflags_cc': [
'-std=c++20'
'-std=c++2a',
'-fexceptions'
]
}]
]

View File

@ -1,623 +0,0 @@
<!-- Generated by documentation.js. Update this documentation by updating the source code. -->
### Table of Contents
* [download][1]
* [Parameters][2]
* [Examples][3]
* [DownloadOptions][4]
* [location][5]
* [debug][6]
* [url][7]
* [DownloadController][8]
* [cancel][9]
* [promise][10]
* [ModelType][11]
* [ModelFile][12]
* [gptj][13]
* [llama][14]
* [mpt][15]
* [type][16]
* [LLModel][17]
* [constructor][18]
* [Parameters][19]
* [type][20]
* [name][21]
* [stateSize][22]
* [threadCount][23]
* [setThreadCount][24]
* [Parameters][25]
* [raw\_prompt][26]
* [Parameters][27]
* [isModelLoaded][28]
* [setLibraryPath][29]
* [Parameters][30]
* [getLibraryPath][31]
* [createCompletion][32]
* [Parameters][33]
* [Examples][34]
* [CompletionOptions][35]
* [verbose][36]
* [hasDefaultHeader][37]
* [hasDefaultFooter][38]
* [PromptMessage][39]
* [role][40]
* [content][41]
* [prompt\_tokens][42]
* [completion\_tokens][43]
* [total\_tokens][44]
* [CompletionReturn][45]
* [model][46]
* [usage][47]
* [choices][48]
* [CompletionChoice][49]
* [message][50]
* [LLModelPromptContext][51]
* [logits\_size][52]
* [tokens\_size][53]
* [n\_past][54]
* [n\_ctx][55]
* [n\_predict][56]
* [top\_k][57]
* [top\_p][58]
* [temp][59]
* [n\_batch][60]
* [repeat\_penalty][61]
* [repeat\_last\_n][62]
* [context\_erase][63]
* [createTokenStream][64]
* [Parameters][65]
* [DEFAULT\_DIRECTORY][66]
* [DEFAULT\_LIBRARIES\_DIRECTORY][67]
## download
Initiates the download of a model file of a specific model type.
By default this downloads without waiting. use the controller returned to alter this behavior.
### Parameters
* `model` **[ModelFile][12]** The model file to be downloaded.
* `options` **[DownloadOptions][4]** to pass into the downloader. Default is { location: (cwd), debug: false }.
### Examples
```javascript
const controller = download('ggml-gpt4all-j-v1.3-groovy.bin')
controller.promise().then(() => console.log('Downloaded!'))
```
* Throws **[Error][68]** If the model already exists in the specified location.
* Throws **[Error][68]** If the model cannot be found at the specified url.
Returns **[DownloadController][8]** object that allows controlling the download process.
## DownloadOptions
Options for the model download process.
### location
location to download the model.
Default is process.cwd(), or the current working directory
Type: [string][69]
### debug
Debug mode -- check how long it took to download in seconds
Type: [boolean][70]
### url
Remote download url. Defaults to `https://gpt4all.io/models`
Type: [string][69]
## DownloadController
Model download controller.
### cancel
Cancel the request to download from gpt4all website if this is called.
Type: function (): void
### promise
Convert the downloader into a promise, allowing people to await and manage its lifetime
Type: function (): [Promise][71]\<void>
## ModelType
Type of the model
Type: (`"gptj"` | `"llama"` | `"mpt"`)
## ModelFile
Full list of models available
### gptj
List of GPT-J Models
Type: (`"ggml-gpt4all-j-v1.3-groovy.bin"` | `"ggml-gpt4all-j-v1.2-jazzy.bin"` | `"ggml-gpt4all-j-v1.1-breezy.bin"` | `"ggml-gpt4all-j.bin"`)
### llama
List Llama Models
Type: (`"ggml-gpt4all-l13b-snoozy.bin"` | `"ggml-vicuna-7b-1.1-q4_2.bin"` | `"ggml-vicuna-13b-1.1-q4_2.bin"` | `"ggml-wizardLM-7B.q4_2.bin"` | `"ggml-stable-vicuna-13B.q4_2.bin"` | `"ggml-nous-gpt4-vicuna-13b.bin"`)
### mpt
List of MPT Models
Type: (`"ggml-mpt-7b-base.bin"` | `"ggml-mpt-7b-chat.bin"` | `"ggml-mpt-7b-instruct.bin"`)
## type
Model architecture. This argument currently does not have any functionality and is just used as descriptive identifier for user.
Type: [ModelType][11]
## LLModel
LLModel class representing a language model.
This is a base class that provides common functionality for different types of language models.
### constructor
Initialize a new LLModel.
#### Parameters
* `path` **[string][69]** Absolute path to the model file.
<!---->
* Throws **[Error][68]** If the model file does not exist.
### type
either 'gpt', mpt', or 'llama' or undefined
Returns **([ModelType][11] | [undefined][72])**&#x20;
### name
The name of the model.
Returns **[ModelFile][12]**&#x20;
### stateSize
Get the size of the internal state of the model.
NOTE: This state data is specific to the type of model you have created.
Returns **[number][73]** the size in bytes of the internal state of the model
### threadCount
Get the number of threads used for model inference.
The default is the number of physical cores your computer has.
Returns **[number][73]** The number of threads used for model inference.
### setThreadCount
Set the number of threads used for model inference.
#### Parameters
* `newNumber` **[number][73]** The new number of threads.
Returns **void**&#x20;
### raw\_prompt
Prompt the model with a given input and optional parameters.
This is the raw output from std out.
Use the prompt function exported for a value
#### Parameters
* `q` **[string][69]** The prompt input.
* `params` **Partial<[LLModelPromptContext][51]>?** Optional parameters for the prompt context.
Returns **any** The result of the model prompt.
### isModelLoaded
Whether the model is loaded or not.
Returns **[boolean][70]**&#x20;
### setLibraryPath
Where to search for the pluggable backend libraries
#### Parameters
* `s` **[string][69]**&#x20;
Returns **void**&#x20;
### getLibraryPath
Where to get the pluggable backend libraries
Returns **[string][69]**&#x20;
## createCompletion
The nodejs equivalent to python binding's chat\_completion
### Parameters
* `llmodel` **[LLModel][17]** The language model object.
* `messages` **[Array][74]<[PromptMessage][39]>** The array of messages for the conversation.
* `options` **[CompletionOptions][35]** The options for creating the completion.
### Examples
```javascript
const llmodel = new LLModel(model)
const messages = [
{ role: 'system', message: 'You are a weather forecaster.' },
{ role: 'user', message: 'should i go out today?' } ]
const completion = await createCompletion(llmodel, messages, {
verbose: true,
temp: 0.9,
})
console.log(completion.choices[0].message.content)
// No, it's going to be cold and rainy.
```
Returns **[CompletionReturn][45]** The completion result.
## CompletionOptions
**Extends Partial\<LLModelPromptContext>**
The options for creating the completion.
### verbose
Indicates if verbose logging is enabled.
Type: [boolean][70]
### hasDefaultHeader
Indicates if the default header is included in the prompt.
Type: [boolean][70]
### hasDefaultFooter
Indicates if the default footer is included in the prompt.
Type: [boolean][70]
## PromptMessage
A message in the conversation, identical to OpenAI's chat message.
### role
The role of the message.
Type: (`"system"` | `"assistant"` | `"user"`)
### content
The message content.
Type: [string][69]
## prompt\_tokens
The number of tokens used in the prompt.
Type: [number][73]
## completion\_tokens
The number of tokens used in the completion.
Type: [number][73]
## total\_tokens
The total number of tokens used.
Type: [number][73]
## CompletionReturn
The result of the completion, similar to OpenAI's format.
### model
The model name.
Type: [ModelFile][12]
### usage
Token usage report.
Type: {prompt\_tokens: [number][73], completion\_tokens: [number][73], total\_tokens: [number][73]}
### choices
The generated completions.
Type: [Array][74]<[CompletionChoice][49]>
## CompletionChoice
A completion choice, similar to OpenAI's format.
### message
Response message
Type: [PromptMessage][39]
## LLModelPromptContext
Model inference arguments for generating completions.
### logits\_size
The size of the raw logits vector.
Type: [number][73]
### tokens\_size
The size of the raw tokens vector.
Type: [number][73]
### n\_past
The number of tokens in the past conversation.
Type: [number][73]
### n\_ctx
The number of tokens possible in the context window.
Type: [number][73]
### n\_predict
The number of tokens to predict.
Type: [number][73]
### top\_k
The top-k logits to sample from.
Type: [number][73]
### top\_p
The nucleus sampling probability threshold.
Type: [number][73]
### temp
The temperature to adjust the model's output distribution.
Type: [number][73]
### n\_batch
The number of predictions to generate in parallel.
Type: [number][73]
### repeat\_penalty
The penalty factor for repeated tokens.
Type: [number][73]
### repeat\_last\_n
The number of last tokens to penalize.
Type: [number][73]
### context\_erase
The percentage of context to erase if the context window is exceeded.
Type: [number][73]
## createTokenStream
TODO: Help wanted to implement this
### Parameters
* `llmodel` **[LLModel][17]**&#x20;
* `messages` **[Array][74]<[PromptMessage][39]>**&#x20;
* `options` **[CompletionOptions][35]**&#x20;
Returns **function (ll: [LLModel][17]): AsyncGenerator<[string][69]>**&#x20;
## DEFAULT\_DIRECTORY
From python api:
models will be stored in (homedir)/.cache/gpt4all/\`
Type: [string][69]
## DEFAULT\_LIBRARIES\_DIRECTORY
From python api:
The default path for dynamic libraries to be stored.
You may separate paths by a semicolon to search in multiple areas.
This searches DEFAULT\_DIRECTORY/libraries, cwd/libraries, and finally cwd.
Type: [string][69]
[1]: #download
[2]: #parameters
[3]: #examples
[4]: #downloadoptions
[5]: #location
[6]: #debug
[7]: #url
[8]: #downloadcontroller
[9]: #cancel
[10]: #promise
[11]: #modeltype
[12]: #modelfile
[13]: #gptj
[14]: #llama
[15]: #mpt
[16]: #type
[17]: #llmodel
[18]: #constructor
[19]: #parameters-1
[20]: #type-1
[21]: #name
[22]: #statesize
[23]: #threadcount
[24]: #setthreadcount
[25]: #parameters-2
[26]: #raw_prompt
[27]: #parameters-3
[28]: #ismodelloaded
[29]: #setlibrarypath
[30]: #parameters-4
[31]: #getlibrarypath
[32]: #createcompletion
[33]: #parameters-5
[34]: #examples-1
[35]: #completionoptions
[36]: #verbose
[37]: #hasdefaultheader
[38]: #hasdefaultfooter
[39]: #promptmessage
[40]: #role
[41]: #content
[42]: #prompt_tokens
[43]: #completion_tokens
[44]: #total_tokens
[45]: #completionreturn
[46]: #model
[47]: #usage
[48]: #choices
[49]: #completionchoice
[50]: #message
[51]: #llmodelpromptcontext
[52]: #logits_size
[53]: #tokens_size
[54]: #n_past
[55]: #n_ctx
[56]: #n_predict
[57]: #top_k
[58]: #top_p
[59]: #temp
[60]: #n_batch
[61]: #repeat_penalty
[62]: #repeat_last_n
[63]: #context_erase
[64]: #createtokenstream
[65]: #parameters-6
[66]: #default_directory
[67]: #default_libraries_directory
[68]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Error
[69]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/String
[70]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Boolean
[71]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Promise
[72]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/undefined
[73]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Number
[74]: https://developer.mozilla.org/docs/Web/JavaScript/Reference/Global_Objects/Array

View File

@ -10,6 +10,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
InstanceMethod("stateSize", &NodeModelWrapper::StateSize),
InstanceMethod("raw_prompt", &NodeModelWrapper::Prompt),
InstanceMethod("setThreadCount", &NodeModelWrapper::SetThreadCount),
InstanceMethod("embed", &NodeModelWrapper::GenerateEmbedding),
InstanceMethod("threadCount", &NodeModelWrapper::ThreadCount),
InstanceMethod("getLibraryPath", &NodeModelWrapper::GetLibraryPath),
});
@ -66,6 +67,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
if(GetInference() == nullptr) {
std::cerr << "Tried searching libraries in \"" << library_path << "\"" << std::endl;
std::cerr << "Tried searching for model weight in \"" << full_weight_path << "\"" << std::endl;
std::cerr << "Do you have runtime libraries installed?" << std::endl;
Napi::Error::New(env, "Had an issue creating llmodel object, inference is null").ThrowAsJavaScriptException();
return;
}
@ -90,6 +92,33 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
return Napi::Number::New(info.Env(), static_cast<int64_t>(llmodel_get_state_size(GetInference())));
}
Napi::Value NodeModelWrapper::GenerateEmbedding(const Napi::CallbackInfo& info) {
auto env = info.Env();
std::string text = info[0].As<Napi::String>().Utf8Value();
size_t embedding_size = 0;
float* arr = llmodel_embedding(GetInference(), text.c_str(), &embedding_size);
if(arr == nullptr) {
Napi::Error::New(
env,
"Cannot embed. native embedder returned 'nullptr'"
).ThrowAsJavaScriptException();
return env.Undefined();
}
if(embedding_size == 0 && text.size() != 0 ) {
std::cout << "Warning: embedding length 0 but input text length > 0" << std::endl;
}
Napi::Float32Array js_array = Napi::Float32Array::New(env, embedding_size);
for (size_t i = 0; i < embedding_size; ++i) {
float element = *(arr + i);
js_array[i] = element;
}
llmodel_free_embedding(arr);
return js_array;
}
/**
* Generate a response using the model.
@ -156,12 +185,12 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
promptContext.context_erase = inputObject.Get("context_erase").As<Napi::Number>().FloatValue();
}
//copy to protect llmodel resources when splitting to new thread
llmodel_prompt_context copiedPrompt = promptContext;
std::string copiedQuestion = question;
PromptWorkContext pc = {
copiedQuestion,
inference_.load(),
std::ref(inference_),
copiedPrompt,
};
auto threadSafeContext = new TsfnContext(env, pc);
@ -201,7 +230,7 @@ Napi::Function NodeModelWrapper::GetClass(Napi::Env env) {
}
llmodel_model NodeModelWrapper::GetInference() {
return *inference_.load();
return *inference_;
}
//Exports Bindings

View File

@ -23,6 +23,7 @@ public:
void SetThreadCount(const Napi::CallbackInfo& info);
Napi::Value getName(const Napi::CallbackInfo& info);
Napi::Value ThreadCount(const Napi::CallbackInfo& info);
Napi::Value GenerateEmbedding(const Napi::CallbackInfo& info);
/*
* The path that is used to search for the dynamic libraries
*/
@ -36,7 +37,7 @@ private:
/**
* The underlying inference that interfaces with the C interface
*/
std::atomic<std::shared_ptr<llmodel_model>> inference_;
std::shared_ptr<llmodel_model> inference_;
std::string type;
// corresponds to LLModel::name() in typescript

View File

@ -1,17 +1,29 @@
{
"name": "gpt4all",
"version": "2.0.0",
"packageManager": "yarn@3.5.1",
"version": "2.1.0-alpha",
"packageManager": "yarn@3.6.1",
"main": "src/gpt4all.js",
"repository": "nomic-ai/gpt4all",
"scripts": {
"test": "node ./test/index.mjs",
"build:backend": "node scripts/build.js",
"install": "node-gyp-build",
"prebuild": "node scripts/prebuild.js",
"docs:build": "documentation build ./src/gpt4all.d.ts --parse-extension d.ts --format md --output docs/api.md"
"test": "jest",
"build:backend": "node scripts/build.js",
"build": "node-gyp-build",
"predocs:build": "node scripts/docs.js",
"docs:build": "documentation readme ./src/gpt4all.d.ts --parse-extension js d.ts --format md --section documentation --readme-file ../python/docs/gpt4all_typescript.md",
"postdocs:build": "node scripts/docs.js"
},
"files": [
"src/**/*",
"runtimes/**/*",
"binding.gyp",
"prebuilds/**/*",
"*.h",
"*.cc",
"gpt4all-backend/**/*"
],
"dependencies": {
"md5-file": "^5.0.0",
"mkdirp": "^3.0.1",
"node-addon-api": "^6.1.0",
"node-gyp-build": "^4.6.0"
@ -19,14 +31,21 @@
"devDependencies": {
"@types/node": "^20.1.5",
"documentation": "^14.0.2",
"jest": "^29.5.0",
"prebuildify": "^5.0.1",
"prettier": "^2.8.8"
},
"optionalDependencies": {
"node-gyp": "9.x.x"
},
"engines": {
"node": ">= 18.x.x"
},
"prettier": {
"endOfLine": "lf",
"tabWidth": 4
},
"jest": {
"verbose": true
}
}

View File

@ -4,11 +4,12 @@
TsfnContext::TsfnContext(Napi::Env env, const PromptWorkContext& pc)
: deferred_(Napi::Promise::Deferred::New(env)), pc(pc) {
}
namespace {
static std::string *res;
}
std::mutex mtx;
static thread_local std::string res;
bool response_callback(int32_t token_id, const char *response) {
res+=response;
*res += response;
return token_id != -1;
}
bool recalculate_callback (bool isrecalculating) {
@ -21,10 +22,12 @@ bool prompt_callback (int32_t tid) {
// The thread entry point. This takes as its arguments the specific
// threadsafe-function context created inside the main thread.
void threadEntry(TsfnContext* context) {
static std::mutex mtx;
std::lock_guard<std::mutex> lock(mtx);
res = &context->pc.res;
// Perform a call into JavaScript.
napi_status status =
context->tsfn.NonBlockingCall(&context->pc,
context->tsfn.BlockingCall(&context->pc,
[](Napi::Env env, Napi::Function jsCallback, PromptWorkContext* pc) {
llmodel_prompt(
*pc->inference_,
@ -34,8 +37,6 @@ void threadEntry(TsfnContext* context) {
&recalculate_callback,
&pc->prompt_params
);
jsCallback.Call({ Napi::String::New(env, res)} );
res.clear();
});
if (status != napi_ok) {
@ -43,7 +44,6 @@ void threadEntry(TsfnContext* context) {
"ThreadEntry",
"Napi::ThreadSafeNapi::Function.NonBlockingCall() failed");
}
// Release the thread-safe function. This decrements the internal thread
// count, and will perform finalization since the count will reach 0.
context->tsfn.Release();
@ -52,11 +52,9 @@ void threadEntry(TsfnContext* context) {
void FinalizerCallback(Napi::Env env,
void* finalizeData,
TsfnContext* context) {
// Join the thread
context->nativeThread.join();
// Resolve the Promise previously returned to JS via the CreateTSFN method.
context->deferred_.Resolve(Napi::Boolean::New(env, true));
delete context;
// Resolve the Promise previously returned to JS
context->deferred_.Resolve(Napi::String::New(env, context->pc.res));
// Wait for the thread to finish executing before proceeding.
context->nativeThread.join();
delete context;
}

View File

@ -10,8 +10,10 @@
#include <memory>
struct PromptWorkContext {
std::string question;
std::shared_ptr<llmodel_model> inference_;
std::shared_ptr<llmodel_model>& inference_;
llmodel_prompt_context prompt_params;
std::string res;
};
struct TsfnContext {
@ -29,12 +31,12 @@ public:
// The thread entry point. This takes as its arguments the specific
// threadsafe-function context created inside the main thread.
void threadEntry(TsfnContext* context);
void threadEntry(TsfnContext*);
// The thread-safe function finalizer callback. This callback executes
// at destruction of thread-safe function, taking as arguments the finalizer
// data and threadsafe-function context.
void FinalizerCallback(Napi::Env env, void* finalizeData, TsfnContext* context);
void FinalizerCallback(Napi::Env, void* finalizeData, TsfnContext*);
bool response_callback(int32_t token_id, const char *response);
bool recalculate_callback (bool isrecalculating);

View File

@ -2,7 +2,6 @@ const { spawn } = require("node:child_process");
const { resolve } = require("path");
const args = process.argv.slice(2);
const platform = process.platform;
//windows 64bit or 32
if (platform === "win32") {
const path = "scripts/build_msvc.bat";
@ -10,8 +9,9 @@ if (platform === "win32") {
process.on("data", (s) => console.log(s.toString()));
} else if (platform === "linux" || platform === "darwin") {
const path = "scripts/build_unix.sh";
const bash = spawn(`sh`, [path, ...args]);
bash.stdout.on("data", (s) => console.log(s.toString()), {
spawn(`sh `, [path, args], {
shell: true,
stdio: "inherit",
});
process.on("data", (s) => console.log(s.toString()));
}

View File

@ -0,0 +1,33 @@
@echo off
set "BUILD_TYPE=Release"
set "BUILD_DIR=.\build\win-x64-msvc"
set "LIBS_DIR=.\runtimes\win32-x64"
REM Cleanup env
rmdir /s /q %BUILD_DIR%
REM Create directories
mkdir %BUILD_DIR%
mkdir %LIBS_DIR%
REM Build
cmake -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=%BUILD_TYPE% -S ..\..\gpt4all-backend -B %BUILD_DIR% -A x64
:BUILD
REM Build the project
cmake --build "%BUILD_DIR%" --parallel --config %BUILD_TYPE%
REM Check the exit code of the build command
if %errorlevel% neq 0 (
echo Build failed. Retrying...
goto BUILD
)
mkdir runtimes\win32-x64
REM Copy the DLLs to the desired location
del /F /A /Q %LIBS_DIR%
xcopy /Y "%BUILD_DIR%\bin\%BUILD_TYPE%\*.dll" runtimes\win32-x64\native\
echo Batch script execution completed.

View File

@ -0,0 +1,8 @@
//Maybe some command line piping would work better, but can't think of platform independent command line tool
const fs = require('fs');
const newPath = '../python/docs/gpt4all_typescript.md';
const filepath = 'README.md';
const data = fs.readFileSync(filepath);
fs.writeFileSync(newPath, data);

View File

@ -6,6 +6,7 @@ async function createPrebuilds(combinations) {
platform,
arch,
napi: true,
targets: ["18.16.0"]
};
try {
await createPrebuild(opts);
@ -33,17 +34,24 @@ function createPrebuild(opts) {
});
}
const prebuildConfigs = [
{ platform: "win32", arch: "x64" },
{ platform: "win32", arch: "arm64" },
// { platform: 'win32', arch: 'armv7' },
{ platform: "darwin", arch: "x64" },
{ platform: "darwin", arch: "arm64" },
// { platform: 'darwin', arch: 'armv7' },
let prebuildConfigs;
if(process.platform === 'win32') {
prebuildConfigs = [
{ platform: "win32", arch: "x64" }
];
} else if(process.platform === 'linux') {
//Unsure if darwin works, need mac tester!
prebuildConfigs = [
{ platform: "linux", arch: "x64" },
{ platform: "linux", arch: "arm64" },
{ platform: "linux", arch: "armv7" },
];
//{ platform: "linux", arch: "arm64" },
//{ platform: "linux", arch: "armv7" },
]
} else if(process.platform === 'darwin') {
prebuildConfigs = [
{ platform: "darwin", arch: "x64" },
{ platform: "darwin", arch: "arm64" },
]
}
createPrebuilds(prebuildConfigs)
.then(() => console.log("All builds succeeded"))

View File

@ -0,0 +1,65 @@
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY, loadModel } from '../src/gpt4all.js'
const ll = await loadModel(
'orca-mini-3b.ggmlv3.q4_0.bin',
{ verbose: true }
);
try {
class Extended extends LLModel {
}
} catch(e) {
console.log("Extending from native class gone wrong " + e)
}
console.log("state size " + ll.stateSize())
console.log("thread count " + ll.threadCount());
ll.setThreadCount(5);
console.log("thread count " + ll.threadCount());
ll.setThreadCount(4);
console.log("thread count " + ll.threadCount());
console.log("name " + ll.name());
console.log("type: " + ll.type());
console.log("Default directory for models", DEFAULT_DIRECTORY);
console.log("Default directory for libraries", DEFAULT_LIBRARIES_DIRECTORY);
const completion1 = await createCompletion(ll, [
{ role : 'system', content: 'You are an advanced mathematician.' },
{ role : 'user', content: 'What is 1 + 1?' },
], { verbose: true })
console.log(completion1.choices[0].message)
const completion2 = await createCompletion(ll, [
{ role : 'system', content: 'You are an advanced mathematician.' },
{ role : 'user', content: 'What is two plus two?' },
], { verbose: true })
console.log(completion2.choices[0].message)
// At the moment, from testing this code, concurrent model prompting is not possible.
// Behavior: The last prompt gets answered, but the rest are cancelled
// my experience with threading is not the best, so if anyone who is good is willing to give this a shot,
// maybe this is possible
// INFO: threading with llama.cpp is not the best maybe not even possible, so this will be left here as reference
//const responses = await Promise.all([
// createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
// ], { verbose: true }),
// createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
// ], { verbose: true }),
//
//createCompletion(ll, [
// { role : 'system', content: 'You are an advanced mathematician.' },
// { role : 'user', content: 'What is 1 + 1?' },
//], { verbose: true })
//
//])
//console.log(responses.map(s => s.choices[0].message))

View File

@ -0,0 +1,8 @@
import { loadModel, createEmbedding } from '../src/gpt4all.js'
const embedder = await loadModel("ggml-all-MiniLM-L6-v2-f16.bin", { verbose: true })
console.log(
createEmbedding(embedder, "Accept your current situation")
)

View File

@ -1,46 +0,0 @@
import { LLModel, createCompletion, DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY } from '../src/gpt4all.js'
const ll = new LLModel({
model_name: 'ggml-vicuna-7b-1.1-q4_2.bin',
model_path: './',
library_path: DEFAULT_LIBRARIES_DIRECTORY
});
try {
class Extended extends LLModel {
}
} catch(e) {
console.log("Extending from native class gone wrong " + e)
}
console.log("state size " + ll.stateSize())
console.log("thread count " + ll.threadCount());
ll.setThreadCount(5);
console.log("thread count " + ll.threadCount());
ll.setThreadCount(4);
console.log("thread count " + ll.threadCount());
console.log("name " + ll.name());
console.log("type: " + ll.type());
console.log("Default directory for models", DEFAULT_DIRECTORY);
console.log("Default directory for libraries", DEFAULT_LIBRARIES_DIRECTORY);
console.log(await createCompletion(
ll,
[
{ role : 'system', content: 'You are a girl who likes playing league of legends.' },
{ role : 'user', content: 'What is the best top laner to play right now?' },
],
{ verbose: false}
));
console.log(await createCompletion(
ll,
[
{ role : 'user', content: 'What is the best bottom laner to play right now?' },
],
))

View File

@ -1,7 +1,6 @@
/// <reference types="node" />
declare module "gpt4all";
export * from "./util.d.ts";
/** Type of the model */
type ModelType = "gptj" | "llama" | "mpt" | "replit";
@ -61,7 +60,7 @@ declare class LLModel {
type(): ModelType | undefined;
/** The name of the model. */
name(): ModelFile;
name(): string;
/**
* Get the size of the internal state of the model.
@ -85,7 +84,7 @@ declare class LLModel {
/**
* Prompt the model with a given input and optional parameters.
* This is the raw output from std out.
* This is the raw output from model.
* Use the prompt function exported for a value
* @param q The prompt input.
* @param params Optional parameters for the prompt context.
@ -93,6 +92,15 @@ declare class LLModel {
*/
raw_prompt(q: string, params: Partial<LLModelPromptContext>, callback: (res: string) => void): void; // TODO work on return type
/**
* Embed text with the model. Keep in mind that
* not all models can embed text, (only bert can embed as of 07/16/2023 (mm/dd/yyyy))
* Use the prompt function exported for a value
* @param q The prompt input.
* @param params Optional parameters for the prompt context.
* @returns The result of the model prompt.
*/
embed(text: string) : Float32Array
/**
* Whether the model is loaded or not.
*/
@ -115,11 +123,20 @@ interface LoadModelOptions {
verbose?: boolean;
}
/**
* Loads a machine learning model with the specified name. The defacto way to create a model.
* By default this will download a model from the official GPT4ALL website, if a model is not present at given path.
*
* @param {string} modelName - The name of the model to load.
* @param {LoadModelOptions|undefined} [options] - (Optional) Additional options for loading the model.
* @returns {Promise<LLModel>} A promise that resolves to an instance of the loaded LLModel.
*/
declare function loadModel(
modelName: string,
options?: LoadModelOptions
): Promise<LLModel>;
/**
* The nodejs equivalent to python binding's chat_completion
* @param {LLModel} llmodel - The language model object.
@ -144,6 +161,19 @@ declare function createCompletion(
options?: CompletionOptions
): Promise<CompletionReturn>;
/**
* The nodejs moral equivalent to python binding's Embed4All().embed()
* meow
* @param {LLModel} llmodel - The language model object.
* @param {string} text - text to embed
* @returns {Float32Array} The completion result.
*/
declare function createEmbedding(
llmodel: LLModel,
text: string,
): Float32Array
/**
* The options for creating the completion.
*/
@ -294,6 +324,77 @@ interface PromptMessage {
role: "system" | "assistant" | "user";
content: string;
}
/**
* Initiates the download of a model file of a specific model type.
* By default this downloads without waiting. use the controller returned to alter this behavior.
* @param {ModelFile} modelName - The model file to be downloaded.
* @param {DownloadOptions} options - to pass into the downloader. Default is { location: (cwd), debug: false }.
* @returns {DownloadController} object that allows controlling the download process.
*
* @throws {Error} If the model already exists in the specified location.
* @throws {Error} If the model cannot be found at the specified url.
*
* @example
* const controller = download('ggml-gpt4all-j-v1.3-groovy.bin')
* controller.promise().then(() => console.log('Downloaded!'))
*/
declare function downloadModel(
modelName: string,
options?: DownloadModelOptions
): DownloadController;
/**
* Options for the model download process.
*/
interface DownloadModelOptions {
/**
* location to download the model.
* Default is process.cwd(), or the current working directory
*/
modelPath?: string;
/**
* Debug mode -- check how long it took to download in seconds
* @default false
*/
debug?: boolean;
/**
* Remote download url. Defaults to `https://gpt4all.io/models`
* @default https://gpt4all.io/models
*/
url?: string;
/**
* Whether to verify the hash of the download to ensure a proper download occurred.
* @default true
*/
md5sum?: boolean;
}
declare function listModels(): Promise<Record<string, string>[]>;
interface RetrieveModelOptions {
allowDownload?: boolean;
verbose?: boolean;
modelPath?: string;
}
declare function retrieveModel(
model: string,
options?: RetrieveModelOptions
): Promise<string>;
/**
* Model download controller.
*/
interface DownloadController {
/** Cancel the request to download from gpt4all website if this is called. */
cancel: () => void;
/** Convert the downloader into a promise, allowing people to await and manage its lifetime */
promise: () => Promise<void>;
}
export {
ModelType,
ModelFile,
@ -304,7 +405,14 @@ export {
LoadModelOptions,
loadModel,
createCompletion,
createEmbedding,
createTokenStream,
DEFAULT_DIRECTORY,
DEFAULT_LIBRARIES_DIRECTORY,
downloadModel,
retrieveModel,
listModels,
DownloadController,
RetrieveModelOptions,
DownloadModelOptions
};

View File

@ -10,12 +10,12 @@ const {
downloadModel,
appendBinSuffixIfMissing,
} = require("./util.js");
const config = require("./config.js");
const { DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY } = require("./config.js");
async function loadModel(modelName, options = {}) {
const loadOptions = {
modelPath: config.DEFAULT_DIRECTORY,
librariesPath: config.DEFAULT_LIBRARIES_DIRECTORY,
modelPath: DEFAULT_DIRECTORY,
librariesPath: DEFAULT_LIBRARIES_DIRECTORY,
allowDownload: true,
verbose: true,
...options,
@ -37,7 +37,9 @@ async function loadModel(modelName, options = {}) {
break;
}
}
if(!libPath) {
throw Error("Could not find a valid path from " + libSearchPaths);
}
const llmOptions = {
model_name: appendBinSuffixIfMissing(modelName),
model_path: loadOptions.modelPath,
@ -53,38 +55,40 @@ async function loadModel(modelName, options = {}) {
}
function createPrompt(messages, hasDefaultHeader, hasDefaultFooter) {
let fullPrompt = "";
let fullPrompt = [];
for (const message of messages) {
if (message.role === "system") {
const systemMessage = message.content + "\n";
fullPrompt += systemMessage;
const systemMessage = message.content;
fullPrompt.push(systemMessage);
}
}
if (hasDefaultHeader) {
fullPrompt += `### Instruction:
The prompt below is a question to answer, a task to complete, or a conversation
to respond to; decide which and write an appropriate response.
\n### Prompt:
`;
fullPrompt.push(`### Instruction: The prompt below is a question to answer, a task to complete, or a conversation to respond to; decide which and write an appropriate response.`);
}
let prompt = "### Prompt:";
for (const message of messages) {
if (message.role === "user") {
const user_message = "\n" + message["content"];
fullPrompt += user_message;
const user_message = message["content"];
prompt += user_message;
}
if (message["role"] == "assistant") {
const assistant_message = "\nResponse: " + message["content"];
fullPrompt += assistant_message;
const assistant_message = "Response:" + message["content"];
prompt += assistant_message;
}
}
fullPrompt.push(prompt);
if (hasDefaultFooter) {
fullPrompt += "\n### Response:";
fullPrompt.push("### Response:");
}
return fullPrompt;
return fullPrompt.join('\n');
}
function createEmbedding(llmodel, text) {
return llmodel.embed(text)
}
async function createCompletion(
llmodel,
messages,
@ -98,16 +102,12 @@ async function createCompletion(
const fullPrompt = createPrompt(
messages,
options.hasDefaultHeader ?? true,
options.hasDefaultFooter
options.hasDefaultFooter ?? true
);
if (options.verbose) {
console.log("Sent: " + fullPrompt);
}
const promisifiedRawPrompt = new Promise((resolve, rej) => {
llmodel.raw_prompt(fullPrompt, options, (s) => {
resolve(s);
});
});
const promisifiedRawPrompt = llmodel.raw_prompt(fullPrompt, options, (s) => {});
return promisifiedRawPrompt.then((response) => {
return {
llmodel: llmodel.name(),
@ -128,11 +128,18 @@ async function createCompletion(
});
}
function createTokenStream() {
throw Error("This API has not been completed yet!")
}
module.exports = {
...config,
DEFAULT_LIBRARIES_DIRECTORY,
DEFAULT_DIRECTORY,
LLModel,
createCompletion,
createEmbedding,
downloadModel,
retrieveModel,
loadModel,
createTokenStream
};

View File

@ -1,69 +0,0 @@
/// <reference types="node" />
declare module "gpt4all";
/**
* Initiates the download of a model file of a specific model type.
* By default this downloads without waiting. use the controller returned to alter this behavior.
* @param {ModelFile} model - The model file to be downloaded.
* @param {DownloadOptions} options - to pass into the downloader. Default is { location: (cwd), debug: false }.
* @returns {DownloadController} object that allows controlling the download process.
*
* @throws {Error} If the model already exists in the specified location.
* @throws {Error} If the model cannot be found at the specified url.
*
* @example
* const controller = download('ggml-gpt4all-j-v1.3-groovy.bin')
* controller.promise().then(() => console.log('Downloaded!'))
*/
declare function downloadModel(
modelName: string,
options?: DownloadModelOptions
): DownloadController;
/**
* Options for the model download process.
*/
export interface DownloadModelOptions {
/**
* location to download the model.
* Default is process.cwd(), or the current working directory
*/
modelPath?: string;
/**
* Debug mode -- check how long it took to download in seconds
* @default false
*/
debug?: boolean;
/**
* Remote download url. Defaults to `https://gpt4all.io/models`
* @default https://gpt4all.io/models
*/
url?: string;
}
declare function listModels(): Promise<Record<string, string>[]>;
interface RetrieveModelOptions {
allowDownload?: boolean;
verbose?: boolean;
modelPath?: string;
}
declare async function retrieveModel(
model: string,
options?: RetrieveModelOptions
): Promise<string>;
/**
* Model download controller.
*/
interface DownloadController {
/** Cancel the request to download from gpt4all website if this is called. */
cancel: () => void;
/** Convert the downloader into a promise, allowing people to await and manage its lifetime */
promise: () => Promise<void>;
}
export { downloadModel, DownloadModelOptions, DownloadController, listModels, retrieveModel, RetrieveModelOptions };

View File

@ -1,9 +1,10 @@
const { createWriteStream, existsSync } = require("fs");
const { createWriteStream, existsSync, statSync } = require("node:fs");
const fsp = require('node:fs/promises')
const { performance } = require("node:perf_hooks");
const path = require("node:path");
const {mkdirp} = require("mkdirp");
const { DEFAULT_DIRECTORY, DEFAULT_LIBRARIES_DIRECTORY } = require("./config.js");
const md5File = require('md5-file');
async function listModels() {
const res = await fetch("https://gpt4all.io/models/models.json");
const modelList = await res.json();
@ -31,62 +32,108 @@ function readChunks(reader) {
};
}
function downloadModel(
modelName,
options = {}
) {
function downloadModel(modelName, options = {}) {
const downloadOptions = {
modelPath: DEFAULT_DIRECTORY,
debug: false,
url: "https://gpt4all.io/models",
md5sum: true,
...options,
};
const modelFileName = appendBinSuffixIfMissing(modelName);
const fullModelPath = path.join(downloadOptions.modelPath, modelFileName);
const modelUrl = `${downloadOptions.url}/${modelFileName}`
const partialModelPath = path.join(
downloadOptions.modelPath,
modelName + ".part"
);
const finalModelPath = path.join(downloadOptions.modelPath, modelFileName);
const modelUrl = downloadOptions.url ?? `https://gpt4all.io/models/${modelFileName}`;
if (existsSync(fullModelPath)) {
throw Error(`Model already exists at ${fullModelPath}`);
if (existsSync(finalModelPath)) {
throw Error(`Model already exists at ${finalModelPath}`);
}
const headers = {
"Accept-Ranges": "arraybuffer",
"Response-Type": "arraybuffer",
};
const writeStreamOpts = {};
if (existsSync(partialModelPath)) {
console.log("Partial model exists, resuming download...");
const startRange = statSync(partialModelPath).size;
headers["Range"] = `bytes=${startRange}-`;
writeStreamOpts.flags = "a";
}
const abortController = new AbortController();
const signal = abortController.signal;
//wrapper function to get the readable stream from request
// const baseUrl = options.url ?? "https://gpt4all.io/models";
const fetchModel = () =>
// wrapper function to get the readable stream from request
const fetchModel = (fetchOpts = {}) =>
fetch(modelUrl, {
signal,
...fetchOpts,
}).then((res) => {
if (!res.ok) {
throw Error(`Failed to download model from ${modelUrl} - ${res.statusText}`);
throw Error(
`Failed to download model from ${modelUrl} - ${res.statusText}`
);
}
return res.body.getReader();
});
//a promise that executes and writes to a stream. Resolves when done writing.
// a promise that executes and writes to a stream. Resolves when done writing.
const res = new Promise((resolve, reject) => {
fetchModel()
//Resolves an array of a reader and writestream.
.then((reader) => [reader, createWriteStream(fullModelPath)])
fetchModel({ headers })
// Resolves an array of a reader and writestream.
.then((reader) => [
reader,
createWriteStream(partialModelPath, writeStreamOpts),
])
.then(async ([readable, wstream]) => {
console.log("Downloading @ ", fullModelPath);
console.log("Downloading @ ", partialModelPath);
let perf;
if (options.debug) {
perf = performance.now();
}
wstream.on("finish", () => {
if (options.debug) {
console.log(
"Time taken: ",
(performance.now() - perf).toFixed(2),
" ms"
);
}
wstream.close();
});
wstream.on("error", (e) => {
wstream.close();
reject(e);
});
for await (const chunk of readChunks(readable)) {
wstream.write(chunk);
}
if (options.debug) {
console.log(
"Time taken: ",
(performance.now() - perf).toFixed(2),
" ms"
);
if (options.md5sum) {
const fileHash = await md5File(partialModelPath);
if (fileHash !== options.md5sum) {
await fsp.unlink(partialModelPath);
return reject(
Error(`Model "${modelName}" failed verification: Hashes mismatch`)
);
}
if (options.debug) {
console.log("MD5 hash verified: ", fileHash);
}
}
resolve(fullModelPath);
await fsp.rename(partialModelPath, finalModelPath);
resolve(finalModelPath);
})
.catch(reject);
});
@ -95,7 +142,7 @@ function downloadModel(
cancel: () => abortController.abort(),
promise: () => res,
};
};
}
async function retrieveModel (
modelName,
@ -123,12 +170,13 @@ async function retrieveModel (
}
const availableModels = await listModels();
const foundModel = availableModels.find((model) => model.filename === modelFileName);
if (!foundModel) {
throw Error(`Model "${modelName}" is not available.`);
}
//todo
if (retrieveOptions.verbose) {
console.log(`Downloading ${modelName}...`);
}
@ -136,6 +184,7 @@ async function retrieveModel (
const downloadController = downloadModel(modelName, {
modelPath: retrieveOptions.modelPath,
debug: retrieveOptions.verbose,
url: foundModel.url
});
const downloadPath = await downloadController.promise();
@ -153,4 +202,5 @@ module.exports = {
appendBinSuffixIfMissing,
downloadModel,
retrieveModel,
};
listModels
};

View File

@ -0,0 +1,79 @@
const path = require('node:path');
const os = require('node:os');
const { LLModel } = require('node-gyp-build')(path.resolve(__dirname, '..'));
const {
listModels,
downloadModel,
appendBinSuffixIfMissing,
} = require('../src/util.js');
const {
DEFAULT_DIRECTORY,
DEFAULT_LIBRARIES_DIRECTORY,
} = require('../src/config.js');
const {
loadModel,
createPrompt,
createCompletion,
} = require('../src/gpt4all.js');
global.fetch = jest.fn(() =>
Promise.resolve({
json: () => Promise.resolve([{}, {}, {}]),
})
);
jest.mock('../src/util.js', () => {
const actualModule = jest.requireActual('../src/util.js');
return {
...actualModule,
downloadModel: jest.fn(() =>
({ cancel: jest.fn(), promise: jest.fn() })
)
}
})
beforeEach(() => {
downloadModel.mockClear()
});
afterEach( () => {
fetch.mockClear();
jest.clearAllMocks()
})
describe('utils', () => {
test("appendBinSuffixIfMissing", () => {
expect(appendBinSuffixIfMissing("filename")).toBe("filename.bin")
expect(appendBinSuffixIfMissing("filename.bin")).toBe("filename.bin")
})
test("default paths", () => {
expect(DEFAULT_DIRECTORY).toBe(path.resolve(os.homedir(), ".cache/gpt4all"))
const paths = [
path.join(DEFAULT_DIRECTORY, "libraries"),
path.resolve("./libraries"),
path.resolve(
__dirname,
"..",
`runtimes/${process.platform}-${process.arch}/native`
),
process.cwd(),
];
expect(typeof DEFAULT_LIBRARIES_DIRECTORY).toBe('string')
expect(DEFAULT_LIBRARIES_DIRECTORY).toBe(paths.join(';'))
})
test("listModels", async () => {
try {
await listModels();
} catch(e) {}
expect(fetch).toHaveBeenCalledTimes(1)
expect(fetch).toHaveBeenCalledWith(
"https://gpt4all.io/models/models.json"
);
})
})

View File

@ -1,8 +0,0 @@
import * as assert from 'node:assert'
import { download } from '../src/gpt4all.js'
assert.rejects(async () => download('poo.bin').promise());
console.log('OK')

File diff suppressed because it is too large Load Diff