~ | support local llm inference

2024-11-26 09:57:24 +03:00 · 2024-03-11 18:26:34 +00:00 · 2024-03-11 18:26:34 +00:00 · b7342b1f13
commit b7342b1f13
parent dd46cfdf65
5 changed files with 240 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -50,3 +50,5 @@ prv.py
 x.js
 x.py
 info.txt
+local.py
+*.gguf
--- a/g4f/local/init.py
+++ b/g4f/local/init.py
@ -0,0 +1,109 @@
+import random, string, time, re
+
+from ..typing import Union, Iterator, Messages
+from ..stubs  import ChatCompletion, ChatCompletionChunk
+from .core.engine import LocalProvider
+from .core.models import models
+
+IterResponse = Iterator[Union[ChatCompletion, ChatCompletionChunk]]
+
+def read_json(text: str) -> dict:
+    match = re.search(r"```(json|)\n(?P<code>[\S\s]+?)\n```", text)
+    if match:
+        return match.group("code")
+    return text
+
+def iter_response(
+    response: Iterator[str],
+    stream: bool,
+    response_format: dict = None,
+    max_tokens: int = None,
+    stop: list = None
+) -> IterResponse:
+    
+    content = ""
+    finish_reason = None
+    completion_id = ''.join(random.choices(string.ascii_letters + string.digits, k=28))
+    for idx, chunk in enumerate(response):
+        content += str(chunk)
+        if max_tokens is not None and idx + 1 >= max_tokens:
+            finish_reason = "length"
+        first = -1
+        word = None
+        if stop is not None:
+            for word in list(stop):
+                first = content.find(word)
+                if first != -1:
+                    content = content[:first]
+                    break
+            if stream and first != -1:
+                first = chunk.find(word)
+                if first != -1:
+                    chunk = chunk[:first]
+                else:
+                    first = 0
+        if first != -1:
+            finish_reason = "stop"
+        if stream:
+            yield ChatCompletionChunk(chunk, None, completion_id, int(time.time()))
+        if finish_reason is not None:
+            break
+    finish_reason = "stop" if finish_reason is None else finish_reason
+    if stream:
+        yield ChatCompletionChunk(None, finish_reason, completion_id, int(time.time()))
+    else:
+        if response_format is not None and "type" in response_format:
+            if response_format["type"] == "json_object":
+                content = read_json(content)
+        yield ChatCompletion(content, finish_reason, completion_id, int(time.time()))
+
+def filter_none(**kwargs):
+    for key in list(kwargs.keys()):
+        if kwargs[key] is None:
+            del kwargs[key]
+    return kwargs
+
+class LocalClient():
+    def __init__(
+        self,
+        **kwargs
+    ) -> None:
+        self.chat: Chat = Chat(self)
+        
+    @staticmethod
+    def list_models():
+        return list(models.keys())
+        
+class Completions():
+    def __init__(self, client: LocalClient):
+        self.client: LocalClient = client
+
+    def create(
+        self,
+        messages: Messages,
+        model: str,
+        stream: bool = False,
+        response_format: dict = None,
+        max_tokens: int = None,
+        stop: Union[list[str], str] = None,
+        **kwargs
+    ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
+
+        stop = [stop] if isinstance(stop, str) else stop
+        response = LocalProvider.create_completion(
+            model, messages, stream,            
+            **filter_none(
+                max_tokens=max_tokens,
+                stop=stop,
+            ),
+            **kwargs
+        )
+        response = iter_response(response, stream, response_format, max_tokens, stop)
+        return response if stream else next(response)
+    
+class Chat():
+    completions: Completions
+
+    def __init__(self, client: LocalClient):
+        self.completions = Completions(client)
+    
--- a/g4f/local/core/engine.py
+++ b/g4f/local/core/engine.py
@ -0,0 +1,42 @@
+import os
+
+from gpt4all import GPT4All
+from .models import models
+
+class LocalProvider:
+    @staticmethod
+    def create_completion(model, messages, stream, **kwargs):
+        if model not in models:
+            raise ValueError(f"Model '{model}' not found / not yet implemented")
+        
+        model           = models[model]
+        model_dir       = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../models/')
+        full_model_path = os.path.join(model_dir, model['path'])
+        
+        if not os.path.isfile(full_model_path):
+            print(f"Model file '{full_model_path}' not found.")
+            download = input(f'Do you want to download {model["path"]} ? [y/n]')
+            
+            if download in ['y', 'Y']:
+                GPT4All.download_model(model['path'], model_dir)
+            else:
+                raise ValueError(f"Model '{model['path']}' not found.")
+        
+        model = GPT4All(model_name=model['path'],
+                               n_threads=8,
+                               verbose=False,
+                               allow_download=False,
+                               model_path=model_dir)
+        
+        system_template = next((message['content'] for message in messages if message['role'] == 'system'), 
+                               'A chat between a curious user and an artificial intelligence assistant.')
+        
+        prompt_template = 'USER: {0}\nASSISTANT: '
+        conversation    = '\n'.join(f"{msg['role'].upper()}: {msg['content']}" for msg in messages) + "\nASSISTANT: "
+        
+        with model.chat_session(system_template, prompt_template):
+            if stream:
+                for token in model.generate(conversation, streaming=True):
+                    yield token
+            else:
+                yield model.generate(conversation)
--- a/g4f/local/core/models.py
+++ b/g4f/local/core/models.py
@ -0,0 +1,86 @@
+models = {
+    "mistral-7b": {
+        "path": "mistral-7b-openorca.gguf2.Q4_0.gguf",
+        "ram": "8",
+        "prompt": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n",
+        "system": "<|im_start|>system\nYou are MistralOrca, a large language model trained by Alignment Lab AI. For multi-step problems, write out your reasoning for each step.\n<|im_end|>"
+    },
+    "mistral-7b-instruct": {
+        "path": "mistral-7b-instruct-v0.1.Q4_0.gguf",
+        "ram": "8",
+        "prompt": "[INST] %1 [/INST]",
+        "system": None
+    },
+    "gpt4all-falcon": {
+        "path": "gpt4all-falcon-newbpe-q4_0.gguf",
+        "ram": "8",
+        "prompt": "### Instruction:\n%1\n### Response:\n",
+        "system": None
+    },
+    "orca-2": {
+        "path": "orca-2-13b.Q4_0.gguf",
+        "ram": "16",
+        "prompt": None,
+        "system": None
+    },
+    "wizardlm-13b": {
+        "path": "wizardlm-13b-v1.2.Q4_0.gguf",
+        "ram": "16",
+        "prompt": None,
+        "system": None
+    },
+    "nous-hermes-llama2": {
+        "path": "nous-hermes-llama2-13b.Q4_0.gguf",
+        "ram": "16",
+        "prompt": "### Instruction:\n%1\n### Response:\n",
+        "system": None
+    },
+    "gpt4all-13b-snoozy": {
+        "path": "gpt4all-13b-snoozy-q4_0.gguf",
+        "ram": "16",
+        "prompt": None,
+        "system": None
+    },
+    "mpt-7b-chat": {
+        "path": "mpt-7b-chat-newbpe-q4_0.gguf",
+        "ram": "8",
+        "prompt": "<|im_start|>user\n%1<|im_end|>\n<|im_start|>assistant\n",
+        "system": "<|im_start|>system\n- You are a helpful assistant chatbot trained by MosaicML.\n- You answer questions.\n- You are excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.\n- You are more than just an information source, you are also able to write poetry, short stories, and make jokes.<|im_end|>"
+    },
+    "orca-mini-3b": {
+        "path": "orca-mini-3b-gguf2-q4_0.gguf",
+        "ram": "4",
+        "prompt": "### User:\n%1\n### Response:\n",
+        "system": "### System:\nYou are an AI assistant that follows instruction extremely well. Help as much as you can.\n\n"
+    },
+    "replit-code-3b": {
+        "path": "replit-code-v1_5-3b-newbpe-q4_0.gguf",
+        "ram": "4",
+        "prompt": "%1",
+        "system": None
+    },
+    "starcoder": {
+        "path": "starcoder-newbpe-q4_0.gguf",
+        "ram": "4",
+        "prompt": "%1",
+        "system": None
+    },
+    "rift-coder-7b": {
+        "path": "rift-coder-v0-7b-q4_0.gguf",
+        "ram": "8",
+        "prompt": "%1",
+        "system": None
+    },
+    "all-MiniLM-L6-v2": {
+        "path": "all-MiniLM-L6-v2-f16.gguf",
+        "ram": "1",
+        "prompt": None,
+        "system": None
+    },
+    "mistral-7b-german": {
+        "path": "em_german_mistral_v01.Q4_0.gguf",
+        "ram": "8",
+        "prompt": "USER: %1 ASSISTANT: ",
+        "system": "Du bist ein hilfreicher Assistent. "
+    }
+}
--- a/g4f/local/models/model-here
+++ b/g4f/local/models/model-here
@ -0,0 +1 @@
+.