mirror of
https://github.com/xtekky/gpt4free.git
synced 2024-11-26 09:57:24 +03:00
~ | support local llm inference
This commit is contained in:
parent
dd46cfdf65
commit
b7342b1f13
2
.gitignore
vendored
2
.gitignore
vendored
@ -50,3 +50,5 @@ prv.py
|
||||
x.js
|
||||
x.py
|
||||
info.txt
|
||||
local.py
|
||||
*.gguf
|
109
g4f/local/__init__.py
Normal file
109
g4f/local/__init__.py
Normal file
@ -0,0 +1,109 @@
|
||||
import random, string, time, re
|
||||
|
||||
from ..typing import Union, Iterator, Messages
|
||||
from ..stubs import ChatCompletion, ChatCompletionChunk
|
||||
from .core.engine import LocalProvider
|
||||
from .core.models import models
|
||||
|
||||
IterResponse = Iterator[Union[ChatCompletion, ChatCompletionChunk]]
|
||||
|
||||
def read_json(text: str) -> dict:
|
||||
match = re.search(r"```(json|)\n(?P<code>[\S\s]+?)\n```", text)
|
||||
if match:
|
||||
return match.group("code")
|
||||
return text
|
||||
|
||||
def iter_response(
|
||||
response: Iterator[str],
|
||||
stream: bool,
|
||||
response_format: dict = None,
|
||||
max_tokens: int = None,
|
||||
stop: list = None
|
||||
) -> IterResponse:
|
||||
|
||||
content = ""
|
||||
finish_reason = None
|
||||
completion_id = ''.join(random.choices(string.ascii_letters + string.digits, k=28))
|
||||
for idx, chunk in enumerate(response):
|
||||
content += str(chunk)
|
||||
if max_tokens is not None and idx + 1 >= max_tokens:
|
||||
finish_reason = "length"
|
||||
first = -1
|
||||
word = None
|
||||
if stop is not None:
|
||||
for word in list(stop):
|
||||
first = content.find(word)
|
||||
if first != -1:
|
||||
content = content[:first]
|
||||
break
|
||||
if stream and first != -1:
|
||||
first = chunk.find(word)
|
||||
if first != -1:
|
||||
chunk = chunk[:first]
|
||||
else:
|
||||
first = 0
|
||||
if first != -1:
|
||||
finish_reason = "stop"
|
||||
if stream:
|
||||
yield ChatCompletionChunk(chunk, None, completion_id, int(time.time()))
|
||||
if finish_reason is not None:
|
||||
break
|
||||
finish_reason = "stop" if finish_reason is None else finish_reason
|
||||
if stream:
|
||||
yield ChatCompletionChunk(None, finish_reason, completion_id, int(time.time()))
|
||||
else:
|
||||
if response_format is not None and "type" in response_format:
|
||||
if response_format["type"] == "json_object":
|
||||
content = read_json(content)
|
||||
yield ChatCompletion(content, finish_reason, completion_id, int(time.time()))
|
||||
|
||||
def filter_none(**kwargs):
|
||||
for key in list(kwargs.keys()):
|
||||
if kwargs[key] is None:
|
||||
del kwargs[key]
|
||||
return kwargs
|
||||
|
||||
class LocalClient():
|
||||
def __init__(
|
||||
self,
|
||||
**kwargs
|
||||
) -> None:
|
||||
self.chat: Chat = Chat(self)
|
||||
|
||||
@staticmethod
|
||||
def list_models():
|
||||
return list(models.keys())
|
||||
|
||||
class Completions():
|
||||
def __init__(self, client: LocalClient):
|
||||
self.client: LocalClient = client
|
||||
|
||||
def create(
|
||||
self,
|
||||
messages: Messages,
|
||||
model: str,
|
||||
stream: bool = False,
|
||||
response_format: dict = None,
|
||||
max_tokens: int = None,
|
||||
stop: Union[list[str], str] = None,
|
||||
**kwargs
|
||||
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
|
||||
|
||||
stop = [stop] if isinstance(stop, str) else stop
|
||||
response = LocalProvider.create_completion(
|
||||
model, messages, stream,
|
||||
**filter_none(
|
||||
max_tokens=max_tokens,
|
||||
stop=stop,
|
||||
),
|
||||
**kwargs
|
||||
)
|
||||
response = iter_response(response, stream, response_format, max_tokens, stop)
|
||||
return response if stream else next(response)
|
||||
|
||||
class Chat():
|
||||
completions: Completions
|
||||
|
||||
def __init__(self, client: LocalClient):
|
||||
self.completions = Completions(client)
|
||||
|
42
g4f/local/core/engine.py
Normal file
42
g4f/local/core/engine.py
Normal file
@ -0,0 +1,42 @@
|
||||
import os
|
||||
|
||||
from gpt4all import GPT4All
|
||||
from .models import models
|
||||
|
||||
class LocalProvider:
|
||||
@staticmethod
|
||||
def create_completion(model, messages, stream, **kwargs):
|
||||
if model not in models:
|
||||
raise ValueError(f"Model '{model}' not found / not yet implemented")
|
||||
|
||||
model = models[model]
|
||||
model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../models/')
|
||||
full_model_path = os.path.join(model_dir, model['path'])
|
||||
|
||||
if not os.path.isfile(full_model_path):
|
||||
print(f"Model file '{full_model_path}' not found.")
|
||||
download = input(f'Do you want to download {model["path"]} ? [y/n]')
|
||||
|
||||
if download in ['y', 'Y']:
|
||||
GPT4All.download_model(model['path'], model_dir)
|
||||
else:
|
||||
raise ValueError(f"Model '{model['path']}' not found.")
|
||||
|
||||
model = GPT4All(model_name=model['path'],
|
||||
n_threads=8,
|
||||
verbose=False,
|
||||
allow_download=False,
|
||||
model_path=model_dir)
|
||||
|
||||
system_template = next((message['content'] for message in messages if message['role'] == 'system'),
|
||||
'A chat between a curious user and an artificial intelligence assistant.')
|
||||
|
||||
prompt_template = 'USER: {0}\nASSISTANT: '
|
||||
conversation = '\n'.join(f"{msg['role'].upper()}: {msg['content']}" for msg in messages) + "\nASSISTANT: "
|
||||
|
||||
with model.chat_session(system_template, prompt_template):
|
||||
if stream:
|
||||
for token in model.generate(conversation, streaming=True):
|
||||
yield token
|
||||
else:
|
||||
yield model.generate(conversation)
|
86
g4f/local/core/models.py
Normal file
86
g4f/local/core/models.py
Normal file
@ -0,0 +1,86 @@
|
||||
models = {
|
||||
"mistral-7b": {
|
||||
"path": "mistral-7b-openorca.gguf2.Q4_0.gguf",
|
||||
"ram": "8",
|
||||
"prompt": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n",
|
||||
"system": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>"
|
||||
},
|
||||
"mistral-7b-instruct": {
|
||||
"path": "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||
"ram": "8",
|
||||
"prompt": "[INST] %1 [/INST]",
|
||||
"system": None
|
||||
},
|
||||
"gpt4all-falcon": {
|
||||
"path": "gpt4all-falcon-newbpe-q4_0.gguf",
|
||||
"ram": "8",
|
||||
"prompt": "### Instruction:\n%1\n### Response:\n",
|
||||
"system": None
|
||||
},
|
||||
"orca-2": {
|
||||
"path": "orca-2-13b.Q4_0.gguf",
|
||||
"ram": "16",
|
||||
"prompt": None,
|
||||
"system": None
|
||||
},
|
||||
"wizardlm-13b": {
|
||||
"path": "wizardlm-13b-v1.2.Q4_0.gguf",
|
||||
"ram": "16",
|
||||
"prompt": None,
|
||||
"system": None
|
||||
},
|
||||
"nous-hermes-llama2": {
|
||||
"path": "nous-hermes-llama2-13b.Q4_0.gguf",
|
||||
"ram": "16",
|
||||
"prompt": "### Instruction:\n%1\n### Response:\n",
|
||||
"system": None
|
||||
},
|
||||
"gpt4all-13b-snoozy": {
|
||||
"path": "gpt4all-13b-snoozy-q4_0.gguf",
|
||||
"ram": "16",
|
||||
"prompt": None,
|
||||
"system": None
|
||||
},
|
||||
"mpt-7b-chat": {
|
||||
"path": "mpt-7b-chat-newbpe-q4_0.gguf",
|
||||
"ram": "8",
|
||||
"prompt": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n",
|
||||
"system": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
|
||||
},
|
||||
"orca-mini-3b": {
|
||||
"path": "orca-mini-3b-gguf2-q4_0.gguf",
|
||||
"ram": "4",
|
||||
"prompt": "### User:\n%1\n### Response:\n",
|
||||
"system": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
|
||||
},
|
||||
"replit-code-3b": {
|
||||
"path": "replit-code-v1_5-3b-newbpe-q4_0.gguf",
|
||||
"ram": "4",
|
||||
"prompt": "%1",
|
||||
"system": None
|
||||
},
|
||||
"starcoder": {
|
||||
"path": "starcoder-newbpe-q4_0.gguf",
|
||||
"ram": "4",
|
||||
"prompt": "%1",
|
||||
"system": None
|
||||
},
|
||||
"rift-coder-7b": {
|
||||
"path": "rift-coder-v0-7b-q4_0.gguf",
|
||||
"ram": "8",
|
||||
"prompt": "%1",
|
||||
"system": None
|
||||
},
|
||||
"all-MiniLM-L6-v2": {
|
||||
"path": "all-MiniLM-L6-v2-f16.gguf",
|
||||
"ram": "1",
|
||||
"prompt": None,
|
||||
"system": None
|
||||
},
|
||||
"mistral-7b-german": {
|
||||
"path": "em_german_mistral_v01.Q4_0.gguf",
|
||||
"ram": "8",
|
||||
"prompt": "USER: %1 ASSISTANT: ",
|
||||
"system": "Du bist ein hilfreicher Assistent. "
|
||||
}
|
||||
}
|
1
g4f/local/models/model-here
Normal file
1
g4f/local/models/model-here
Normal file
@ -0,0 +1 @@
|
||||
.
|
Loading…
Reference in New Issue
Block a user