new gpt-4 beating opensource models

2024-12-27 05:04:19 +03:00 · 2024-04-13 03:09:11 +01:00 · 2024-04-13 03:09:11 +01:00 · 2fad27b2c5
commit 2fad27b2c5
parent ed8afc20e8
4 changed files with 26 additions and 17 deletions
--- a/README.md
+++ b/README.md
@ -281,13 +281,14 @@ set G4F_PROXY=http://host:port
 | [beta.theb.ai](https://beta.theb.ai) | `g4f.Provider.Theb` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ |
 | [you.com](https://you.com) | `g4f.Provider.You` | ✔️ | ✔️ | ✔️ | ![Unknown](https://img.shields.io/badge/Unknown-grey) | ❌ |

-## New OpenSource Models
-While we wait for gpt-5, here is a list of new models that are at least better than gpt-3.5-turbo. Some rival gpt-4. Expect this list to grow.
+## Best OpenSource Models
+While we wait for gpt-5, here is a list of new models that are at least better than gpt-3.5-turbo. **Some are better than gpt-4**. Expect this list to grow.

-| Website | Provider |  parameters |
-| ------  | -------  |  ------ | 
-| [mixtral-8x22b](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) | `g4f.Provider.DeepInfra` | 176B / 44b active |
-| [dbrx-instruct](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) | `g4f.Provider.DeepInfra` | 132B / 36B active|
+| Website | Provider |  parameters | better than |
+| ------  | -------  |  ------ |  ------ | 
+| [mixtral-8x22b](https://huggingface.co/mistral-community/Mixtral-8x22B-v0.1) | `g4f.Provider.DeepInfra` | 176B / 44b active | gpt-3.5-turbo |
+| [dbrx-instruct](https://www.databricks.com/blog/introducing-dbrx-new-state-art-open-llm) | `g4f.Provider.DeepInfra` | 132B / 36B active| gpt-3.5-turbo |
+| [command-r+](https://txt.cohere.com/command-r-plus-microsoft-azure/) | `g4f.Provider.HuggingChat` | 104B | gpt-4-0613 |


 ### GPT-3.5
--- a/g4f/Provider/HuggingChat.py
+++ b/g4f/Provider/HuggingChat.py
@ -14,13 +14,12 @@ class HuggingChat(AsyncGeneratorProvider, ProviderModelMixin):
    working = True
    default_model = "mistralai/Mixtral-8x7B-Instruct-v0.1"
    models = [
-        "mistralai/Mixtral-8x7B-Instruct-v0.1",
-        "google/gemma-7b-it",
-        "meta-llama/Llama-2-70b-chat-hf",
-        "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
-        "codellama/CodeLlama-34b-Instruct-hf",
-        "mistralai/Mistral-7B-Instruct-v0.2",
-        "openchat/openchat-3.5-0106",
+        "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
+        'CohereForAI/c4ai-command-r-plus',
+        'mistralai/Mixtral-8x7B-Instruct-v0.1',
+        'google/gemma-1.1-7b-it',
+        'NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO',
+        'mistralai/Mistral-7B-Instruct-v0.2'
    ]
    model_aliases = {
        "openchat/openchat_3.5": "openchat/openchat-3.5-0106",
@ -48,6 +47,7 @@ class HuggingChat(AsyncGeneratorProvider, ProviderModelMixin):
        **kwargs
    ) -> AsyncResult:
        options = {"model": cls.get_model(model)}
+
        system_prompt = "\n".join([message["content"] for message in messages if message["role"] == "system"])
        if system_prompt:
            options["preprompt"] = system_prompt
--- a/g4f/Provider/PerplexityLabs.py
+++ b/g4f/Provider/PerplexityLabs.py
@ -19,13 +19,14 @@ class PerplexityLabs(AsyncGeneratorProvider, ProviderModelMixin):
        "sonar-small-online", "sonar-medium-online", "sonar-small-chat", "sonar-medium-chat", "mistral-7b-instruct", 
        "codellama-70b-instruct", "llava-v1.5-7b-wrapper", "llava-v1.6-34b", "mixtral-8x7b-instruct",
        "gemma-2b-it", "gemma-7b-it"
-        "mistral-medium", "related"
+        "mistral-medium", "related", "dbrx-instruct"
    ]
    model_aliases = {
        "mistralai/Mistral-7B-Instruct-v0.1": "mistral-7b-instruct", 
        "mistralai/Mixtral-8x7B-Instruct-v0.1": "mixtral-8x7b-instruct",
        "codellama/CodeLlama-70b-Instruct-hf": "codellama-70b-instruct",
-        "llava-v1.5-7b": "llava-v1.5-7b-wrapper"
+        "llava-v1.5-7b": "llava-v1.5-7b-wrapper",
+        'databricks/dbrx-instruct': "dbrx-instruct"
    }

    @classmethod
--- a/g4f/models.py
+++ b/g4f/models.py
@ -165,7 +165,7 @@ mistral_7b_v02 = Model(
 mixtral_8x22b = Model(
    name          = "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
    base_provider = "huggingface",
-    best_provider = DeepInfra
+    best_provider = RetryProvider([HuggingChat, DeepInfra])
 )

 # Misc models
@ -269,7 +269,13 @@ pi = Model(
 dbrx_instruct = Model(
    name = 'databricks/dbrx-instruct',
    base_provider = 'mistral',
-    best_provider = DeepInfra
+    best_provider = RetryProvider([DeepInfra, PerplexityLabs])
+)
+
+command_r_plus = Model(
+    name = 'CohereForAI/c4ai-command-r-plus',
+    base_provider = 'mistral',
+    best_provider = HuggingChat
 )

 class ModelUtils:
@ -324,6 +330,7 @@ class ModelUtils:
        'claude-3-sonnet': claude_3_sonnet,
        
        # other
+        'command-r+': command_r_plus,
        'dbrx-instruct': dbrx_instruct,
        'lzlv-70b': lzlv_70b,
        'airoboros-70b': airoboros_70b,