Add websearch to gui (#1314)

* Add websearch to gui
* Fix version_check config
* Add version badge in README.md
* Show version in gui
* Add docker hub build
* Fix gui backend, improve style
This commit is contained in:
H Lohaus 2023-12-07 07:18:05 +01:00 committed by GitHub
parent 5862d55abf
commit 484b96d850
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 354 additions and 130 deletions

View File

@ -9,21 +9,30 @@ jobs:
publish:
runs-on: ubuntu-latest
steps:
- name: Setup Buildx
uses: docker/setup-buildx-action@v3
- name: Checkout repository
uses: actions/checkout@v4
- name: Get metadata for Docker
id: metadata
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}
images: |
hlohaus789/g4f
ghcr.io/${{ github.repository }}
- name: Log in to Docker Hub
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GHCR_PAT }}
- name: Build and push image
uses: docker/build-push-action@v5
with:
@ -32,3 +41,5 @@ jobs:
push: true
tags: ${{ steps.metadata.outputs.tags }}
labels: ${{ steps.metadata.outputs.labels }}
build-args: |
G4F_VERSION=${{ github.ref_name }}

View File

@ -1,4 +1,4 @@
![g4f](g4f.png)
![248433934-7886223b-c1d1-4260-82aa-da5741f303bb](https://github.com/xtekky/gpt4free/assets/98614666/ea012c87-76e0-496a-8ac4-e2de090cc6c9)
<a href='https://ko-fi.com/xtekky' target='_blank'><img height='35' style='border:0px;height:46px;' src='https://az743702.vo.msecnd.net/cdn/kofi3.png?v=0' border='0' alt='Buy Me a Coffee at ko-fi.com' />
<div id="top"></div>
@ -6,7 +6,8 @@
> By using this repository or any code related to it, you agree to the [legal notice](LEGAL_NOTICE.md). The author is not responsible for any copies, forks, re-uploads made by other users, or anything else related to GPT4Free. This is the author's only account and repository. To prevent impersonation or irresponsible actions, please comply with the GNU GPL license this Repository uses.
> [!Note]
> Latest pypi version: [`0.1.9.2`](https://pypi.org/project/g4f/0.1.9.2)
Lastet version:
>> [![PyPI version](https://badge.fury.io/py/g4f.svg)](https://pypi.org/project/g4f)
```sh
pip install -U g4f
```
@ -165,8 +166,7 @@ docker-compose down
import g4f
g4f.debug.logging = True # Enable logging
g4f.check_version = False # Disable automatic version checking
print(g4f.version) # Check version
g4f.debug.check_version = False # Disable automatic version checking
print(g4f.Provider.Ails.params) # Supported args
# Automatic selection of provider

View File

@ -2,13 +2,13 @@ version: '3'
services:
gpt4free:
image: ghcr.io/xtekky/gpt4free:latest
image: hlohaus789/g4f:latest
shm_size: 2gb
build:
context: .
dockerfile: docker/Dockerfile
volumes:
- .:/app
# volumes:
# - .:/app
ports:
- '8080:80'
- '1337:1337'

View File

@ -3,6 +3,8 @@ FROM selenium/node-chrome
ENV SE_SCREEN_WIDTH 1850
ENV G4F_LOGIN_URL http://localhost:7900/?autoconnect=1&resize=scale&password=secret
ENV PYTHONUNBUFFERED 1
ARG G4F_VERSION
ENV G4F_VERSION ${G4F_VERSION}
USER root
@ -22,7 +24,7 @@ RUN rm -rf /var/lib/apt/lists/* /var/cache/apt/* \
COPY docker/supervisor.conf /etc/supervisor/conf.d/selenium.conf
# Change background image
COPY g4f.png /usr/share/images/fluxbox/ubuntu-light.png
COPY docker/background.png /usr/share/images/fluxbox/ubuntu-light.png
# Switch user
USER 1200

View File

Before

Width:  |  Height:  |  Size: 152 KiB

After

Width:  |  Height:  |  Size: 152 KiB

View File

@ -49,7 +49,7 @@ stderr_logfile_backups=5
stdout_capture_maxbytes=50MB
stderr_capture_maxbytes=50MB
[program:g4f-cli]
[program:g4f-api]
priority=15
command=python -m g4f.cli api
directory=/app

View File

@ -5,7 +5,7 @@ import time, json
from ..typing import CreateResult, Messages
from .base_provider import BaseProvider
from .helper import format_prompt
from ..webdriver import WebDriver, WebDriverSession
from ..webdriver import WebDriver, WebDriverSession, bypass_cloudflare
class MyShell(BaseProvider):
url = "https://app.myshell.ai/chat"
@ -25,16 +25,8 @@ class MyShell(BaseProvider):
**kwargs
) -> CreateResult:
with WebDriverSession(webdriver, "", proxy=proxy) as driver:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get(cls.url)
# Wait for page load and cloudflare validation
WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)"))
)
bypass_cloudflare(driver, cls.url, timeout)
# Send request with message
data = {
"botId": "4738",
@ -58,11 +50,11 @@ window._reader = response.body.pipeThrough(new TextDecoderStream()).getReader();
driver.execute_script(script.replace("{body}", json.dumps(data)))
script = """
chunk = await window._reader.read();
if (chunk['done']) {
if (chunk.done) {
return null;
}
content = '';
chunk['value'].split('\\n').forEach((line, index) => {
chunk.value.split('\\n').forEach((line, index) => {
if (line.startsWith('data: ')) {
try {
const data = JSON.parse(line.substring('data: '.length));

View File

@ -0,0 +1,66 @@
from __future__ import annotations
from urllib.parse import unquote
from ...typing import AsyncResult, Messages
from ..base_provider import BaseProvider
from ...webdriver import WebDriver
from ...requests import Session, get_session_from_browser
class AiChatting(BaseProvider):
url = "https://www.aichatting.net"
supports_gpt_35_turbo = True
_session: Session = None
@classmethod
def create_completion(
cls,
model: str,
messages: Messages,
stream: bool,
proxy: str = None,
timeout: int = 120,
webdriver: WebDriver = None,
**kwargs
) -> AsyncResult:
if not cls._session:
cls._session = get_session_from_browser(cls.url, webdriver, proxy, timeout)
visitorId = unquote(cls._session.cookies.get("aichatting.website.visitorId"))
headers = {
"accept": "application/json, text/plain, */*",
"lang": "en",
"source": "web"
}
data = {
"roleId": 0,
}
try:
response = cls._session.post("https://aga-api.aichatting.net/aigc/chat/record/conversation/create", json=data, headers=headers)
response.raise_for_status()
conversation_id = response.json()["data"]["conversationId"]
except Exception as e:
cls.reset()
raise e
headers = {
"authority": "aga-api.aichatting.net",
"accept": "text/event-stream,application/json, text/event-stream",
"lang": "en",
"source": "web",
"vtoken": visitorId,
}
data = {
"spaceHandle": True,
"roleId": 0,
"messages": messages,
"conversationId": conversation_id,
}
response = cls._session.post("https://aga-api.aichatting.net/aigc/chat/v2/stream", json=data, headers=headers, stream=True)
response.raise_for_status()
for chunk in response.iter_lines():
if chunk.startswith(b"data:"):
yield chunk[5:].decode().replace("-=- --", " ").replace("-=-n--", "\n").replace("--@DONE@--", "")
@classmethod
def reset(cls):
cls._session = None

View File

@ -1,3 +1,4 @@
from .MikuChat import MikuChat
from .Komo import Komo
from .ChatAiGpt import ChatAiGpt
from .ChatAiGpt import ChatAiGpt
from .AiChatting import AiChatting

View File

@ -1,25 +1,44 @@
from __future__ import annotations
from requests import get
from .models import Model, ModelUtils, _all_models
from .Provider import BaseProvider, AsyncGeneratorProvider, RetryProvider
from .typing import Messages, CreateResult, AsyncResult, Union, List
from . import debug
version = '0.1.9.2'
version_check = True
import os
from requests import get
from importlib.metadata import version as get_package_version, PackageNotFoundError
from subprocess import check_output, CalledProcessError, PIPE
from .models import Model, ModelUtils, _all_models
from .Provider import BaseProvider, AsyncGeneratorProvider, RetryProvider
from .typing import Messages, CreateResult, AsyncResult, Union, List
from . import debug
def get_version() -> str:
# Read from package manager
try:
return get_package_version("g4f")
except PackageNotFoundError:
pass
# Read from docker environment
current_version = os.environ.get("G4F_VERSION")
if current_version:
return current_version
# Read from git repository
try:
command = ["git", "describe", "--tags", "--abbrev=0"]
return check_output(command, text=True, stderr=PIPE).strip()
except CalledProcessError:
pass
def get_lastet_version() -> str:
response = get("https://pypi.org/pypi/g4f/json").json()
return response["info"]["version"]
def check_pypi_version() -> None:
try:
response = get("https://pypi.org/pypi/g4f/json").json()
latest_version = response["info"]["version"]
if version != latest_version:
print(f'New pypi version: {latest_version} (current: {version}) | pip install -U g4f')
return False
return True
version = get_version()
latest_version = get_lastet_version()
except Exception as e:
print(f'Failed to check g4f pypi version: {e}')
if version != latest_version:
print(f'New pypi version: {latest_version} (current: {version}) | pip install -U g4f')
def get_model_and_provider(model : Union[Model, str],
provider : Union[type[BaseProvider], None],
@ -27,6 +46,9 @@ def get_model_and_provider(model : Union[Model, str],
ignored : List[str] = None,
ignore_working: bool = False,
ignore_stream: bool = False) -> tuple[Model, type[BaseProvider]]:
if debug.version_check:
check_pypi_version()
debug.version_check = False
if isinstance(model, str):
if model in ModelUtils.convert:
@ -118,7 +140,4 @@ class Completion:
result = provider.create_completion(model.name, [{"role": "user", "content": prompt}], stream, **kwargs)
return result if stream else ''.join(result)
if version_check:
check_pypi_version()
return result if stream else ''.join(result)

View File

@ -1 +1,2 @@
logging = False
logging = False
version_check = True

View File

@ -211,6 +211,9 @@ body {
.convo-title {
color: var(--colour-3);
font-size: 14px;
text-overflow: ellipsis;
overflow: hidden;
white-space: nowrap;
}
.message {

View File

@ -88,6 +88,10 @@
<span class="convo-title">github ~ <a href="https://github.com/xtekky/gpt4free">@gpt4free</a>
</span>
</div>
<div class="info">
<i class="fa-solid fa-star"></i>
<span id="version_text" class="convo-title"></span>
</div>
</div>
</div>
<div class="conversation">

View File

@ -628,4 +628,19 @@ observer.observe(message_input, { attributes: true });
option.value = option.text = provider;
select.appendChild(option);
}
})();
(async () => {
response = await fetch('/backend-api/v2/version')
versions = await response.json()
document.title = 'g4f - gui - ' + versions["version"];
text = "version ~ "
if (versions["version"] != versions["lastet_version"]) {
release_url = 'https://github.com/xtekky/gpt4free/releases/tag/' + versions["lastet_version"];
text += '<a href="' + release_url +'" target="_blank" title="New version: ' + versions["lastet_version"] +'">' + versions["version"] + ' 🆕</a>';
} else {
text += versions["version"];
}
document.getElementById("version_text").innerHTML = text
})();

View File

@ -1,8 +1,7 @@
import g4f
from flask import request
from .internet import search
from .config import special_instructions
from .internet import get_search_message
g4f.debug.logging = True
@ -18,6 +17,10 @@ class Backend_Api:
'function': self.providers,
'methods' : ['GET']
},
'/backend-api/v2/version': {
'function': self.version,
'methods' : ['GET']
},
'/backend-api/v2/conversation': {
'function': self._conversation,
'methods': ['POST']
@ -45,6 +48,12 @@ class Backend_Api:
provider.__name__ for provider in g4f.Provider.__providers__
if provider.working and provider is not g4f.Provider.RetryProvider
]
def version(self):
return {
"version": g4f.get_version(),
"lastet_version": g4f.get_lastet_version(),
}
def _gen_title(self):
return {
@ -53,14 +62,15 @@ class Backend_Api:
def _conversation(self):
try:
#jailbreak = request.json['jailbreak']
#internet_access = request.json['meta']['content']['internet_access']
#conversation = request.json['meta']['content']['conversation']
#jailbreak = request.json['jailbreak']
web_search = request.json['meta']['content']['internet_access']
messages = request.json['meta']['content']['parts']
if web_search:
messages[-1]["content"] = get_search_message(messages[-1]["content"])
model = request.json.get('model')
model = model if model else g4f.models.default
provider = request.json.get('provider', 'Auto').replace('g4f.Provider.', '')
provider = provider if provider != "Auto" else None
provider = request.json.get('provider').replace('g4f.Provider.', '')
provider = provider if provider and provider != "Auto" else None
if provider != None:
provider = g4f.Provider.ProviderUtils.convert.get(provider)

View File

@ -1,58 +1,149 @@
from __future__ import annotations
from datetime import datetime
from bs4 import BeautifulSoup
from aiohttp import ClientSession, ClientTimeout
from duckduckgo_search import DDGS
import asyncio
ddgs = DDGS(timeout=20)
class SearchResults():
def __init__(self, results: list):
self.results = results
def __iter__(self):
yield from self.results
def search(internet_access, prompt):
print(prompt)
def __str__(self):
search = ""
for idx, result in enumerate(self.results):
if search:
search += "\n\n\n"
search += f"Title: {result.title}\n\n"
if result.text:
search += result.text
else:
search += result.snippet
search += f"\n\nSource: [[{idx}]]({result.url})"
return search
class SearchResultEntry():
def __init__(self, title: str, url: str, snippet: str, text: str = None):
self.title = title
self.url = url
self.snippet = snippet
self.text = text
def set_text(self, text: str):
self.text = text
def scrape_text(html: str, max_words: int = None) -> str:
soup = BeautifulSoup(html, "html.parser")
for exclude in soup(["script", "style"]):
exclude.extract()
for selector in [
"main",
".main-content-wrapper",
".main-content",
".emt-container-inner",
".content-wrapper",
"#content",
"#mainContent",
]:
select = soup.select_one(selector)
if select:
soup = select
break
# Zdnet
for remove in [".c-globalDisclosure"]:
select = soup.select_one(remove)
if select:
select.extract()
clean_text = ""
for paragraph in soup.select("p"):
text = paragraph.get_text()
for line in text.splitlines():
words = []
for word in line.replace("\t", " ").split(" "):
if word:
words.append(word)
count = len(words)
if not count:
continue
if max_words:
max_words -= count
if max_words <= 0:
break
if clean_text:
clean_text += "\n"
clean_text += " ".join(words)
return clean_text
async def fetch_and_scrape(session: ClientSession, url: str, max_words: int = None) -> str:
try:
if not internet_access:
return []
async with session.get(url) as response:
if response.status == 200:
html = await response.text()
return scrape_text(html, max_words)
except:
return
results = duckduckgo_search(q=prompt)
async def search(query: str, n_results: int = 5, max_words: int = 2500, add_text: bool = True) -> SearchResults:
with DDGS() as ddgs:
results = []
for result in ddgs.text(
query,
region="wt-wt",
safesearch="moderate",
timelimit="y",
):
results.append(SearchResultEntry(
result["title"],
result["href"],
result["body"]
))
if len(results) >= n_results:
break
if not search:
return []
if add_text:
requests = []
async with ClientSession(timeout=ClientTimeout(5)) as session:
for entry in results:
requests.append(fetch_and_scrape(session, entry.url, int(max_words / (n_results - 1))))
texts = await asyncio.gather(*requests)
blob = ''.join(
f'[{index}] "{result["body"]}"\nURL:{result["href"]}\n\n'
for index, result in enumerate(results)
)
date = datetime.now().strftime('%d/%m/%y')
formatted_results = []
left_words = max_words;
for i, entry in enumerate(results):
if add_text:
entry.text = texts[i]
if left_words:
left_words -= entry.title.count(" ") + 5
if entry.text:
left_words -= entry.text.count(" ")
else:
left_words -= entry.snippet.count(" ")
if 0 > left_words:
break
formatted_results.append(entry)
blob += f'Current date: {date}\n\nInstructions: Using the provided web search results, write a comprehensive reply to the next user query. Make sure to cite results using [[number](URL)] notation after the reference. If the provided search results refer to multiple subjects with the same name, write separate answers for each subject. Ignore your previous response if any.'
return SearchResults(formatted_results)
return [{'role': 'user', 'content': blob}]
def get_search_message(prompt) -> str:
try:
search_results = asyncio.run(search(prompt))
message = f"""
{search_results}
Instruction: Using the provided web search results, to write a comprehensive reply to the user request.
Make sure to add the sources of cites using [[Number]](Url) notation after the reference. Example: [[0]](http://google.com)
If the provided search results refer to multiple subjects with the same name, write separate answers for each subject.
User request:
{prompt}
"""
return message
except Exception as e:
print("Couldn't search DuckDuckGo:", e)
print(e.__traceback__.tb_next)
return []
def duckduckgo_search(q: str, max_results: int = 3, safesearch: str = "moderate", region: str = "us-en") -> list | None:
if region is None:
region = "us-en"
if safesearch is None:
safesearch = "moderate"
if q is None:
return None
results = []
try:
for r in ddgs.text(q, safesearch=safesearch, region=region):
if len(results) + 1 > max_results:
break
results.append(r)
except Exception as e:
print(e)
return results
return prompt

View File

@ -6,10 +6,7 @@ from functools import partialmethod
from typing import AsyncGenerator
from urllib.parse import urlparse
from curl_cffi.requests import AsyncSession, Session, Response
from .webdriver import WebDriver, WebDriverSession
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from .webdriver import WebDriver, WebDriverSession, bypass_cloudflare
class StreamResponse:
def __init__(self, inner: Response) -> None:
@ -58,28 +55,7 @@ class StreamSession(AsyncSession):
def get_session_from_browser(url: str, webdriver: WebDriver = None, proxy: str = None, timeout: int = 120):
with WebDriverSession(webdriver, "", proxy=proxy, virtual_display=True) as driver:
driver.get(url)
# Is cloudflare protection
if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js":
try:
# Click button in iframe
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#turnstile-wrapper iframe"))
)
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe"))
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input"))
)
driver.find_element(By.CSS_SELECTOR, "#challenge-stage input").click()
except:
pass
finally:
driver.switch_to.default_content()
# No cloudflare protection
WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)"))
)
bypass_cloudflare(driver, url, timeout)
cookies = dict([(cookie["name"], cookie["value"]) for cookie in driver.get_cookies()])
user_agent = driver.execute_script("return navigator.userAgent")

View File

@ -1,10 +1,12 @@
from __future__ import annotations
import time
from platformdirs import user_config_dir
from selenium.webdriver.remote.webdriver import WebDriver
from undetected_chromedriver import Chrome, ChromeOptions
import os.path
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from os import path
from . import debug
try:
@ -21,16 +23,47 @@ def get_browser(
) -> WebDriver:
if user_data_dir == None:
user_data_dir = user_config_dir("g4f")
if debug.logging:
print("Open browser with config dir:", user_data_dir)
if user_data_dir and debug.logging:
print("Open browser with config dir:", user_data_dir)
if not options:
options = ChromeOptions()
if proxy:
options.add_argument(f'--proxy-server={proxy}')
driver = '/usr/bin/chromedriver'
if not os.path.isfile(driver):
if not path.isfile(driver):
driver = None
return Chrome(options=options, user_data_dir=user_data_dir, driver_executable_path=driver, headless=headless)
return Chrome(
options=options,
user_data_dir=user_data_dir,
driver_executable_path=driver,
headless=headless
)
def bypass_cloudflare(driver: WebDriver, url: str, timeout: int) -> None:
# Open website
driver.get(url)
# Is cloudflare protection
if driver.find_element(By.TAG_NAME, "body").get_attribute("class") == "no-js":
if debug.logging:
print("Cloudflare protection detected:", url)
try:
# Click button in iframe
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#turnstile-wrapper iframe"))
)
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, "#turnstile-wrapper iframe"))
WebDriverWait(driver, 5).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#challenge-stage input"))
)
driver.find_element(By.CSS_SELECTOR, "#challenge-stage input").click()
except:
pass
finally:
driver.switch_to.default_content()
# No cloudflare protection
WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "body:not(.no-js)"))
)
class WebDriverSession():
def __init__(
@ -47,7 +80,7 @@ class WebDriverSession():
self.headless = headless
self.virtual_display = None
if has_pyvirtualdisplay and virtual_display:
self.virtual_display = Display(size=(1920,1080))
self.virtual_display = Display(size=(1920, 1080))
self.proxy = proxy
self.options = options
self.default_driver = None
@ -82,7 +115,6 @@ class WebDriverSession():
self.default_driver.close()
except:
pass
time.sleep(0.1)
self.default_driver.quit()
if self.virtual_display:
self.virtual_display.stop()

View File

@ -25,4 +25,5 @@ asyncstdlib
async-property
undetected-chromedriver
asyncstdlib
async_property
async_property
bs4