2024-04-21 23:39:00 +03:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
from .base_provider import AsyncGeneratorProvider, ProviderModelMixin
|
|
|
|
from .helper import format_prompt, filter_none
|
|
|
|
from ..typing import AsyncResult, Messages
|
|
|
|
from ..requests import raise_for_status
|
|
|
|
from ..requests.aiohttp import StreamSession
|
|
|
|
from ..errors import ResponseError, MissingAuthError
|
|
|
|
|
|
|
|
class Replicate(AsyncGeneratorProvider, ProviderModelMixin):
|
|
|
|
url = "https://replicate.com"
|
|
|
|
working = True
|
2024-05-15 22:07:49 +03:00
|
|
|
needs_auth = True
|
2024-04-21 23:39:00 +03:00
|
|
|
default_model = "meta/meta-llama-3-70b-instruct"
|
2024-04-22 21:02:17 +03:00
|
|
|
model_aliases = {
|
|
|
|
"meta-llama/Meta-Llama-3-70B-Instruct": default_model
|
|
|
|
}
|
2024-04-21 23:39:00 +03:00
|
|
|
|
|
|
|
@classmethod
|
|
|
|
async def create_async_generator(
|
|
|
|
cls,
|
|
|
|
model: str,
|
|
|
|
messages: Messages,
|
|
|
|
api_key: str = None,
|
|
|
|
proxy: str = None,
|
|
|
|
timeout: int = 180,
|
|
|
|
system_prompt: str = None,
|
|
|
|
max_new_tokens: int = None,
|
|
|
|
temperature: float = None,
|
|
|
|
top_p: float = None,
|
|
|
|
top_k: float = None,
|
|
|
|
stop: list = None,
|
|
|
|
extra_data: dict = {},
|
|
|
|
headers: dict = {
|
|
|
|
"accept": "application/json",
|
|
|
|
},
|
|
|
|
**kwargs
|
|
|
|
) -> AsyncResult:
|
|
|
|
model = cls.get_model(model)
|
|
|
|
if cls.needs_auth and api_key is None:
|
|
|
|
raise MissingAuthError("api_key is missing")
|
|
|
|
if api_key is not None:
|
|
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
|
|
api_base = "https://api.replicate.com/v1/models/"
|
|
|
|
else:
|
|
|
|
api_base = "https://replicate.com/api/models/"
|
|
|
|
async with StreamSession(
|
|
|
|
proxy=proxy,
|
|
|
|
headers=headers,
|
|
|
|
timeout=timeout
|
|
|
|
) as session:
|
|
|
|
data = {
|
|
|
|
"stream": True,
|
|
|
|
"input": {
|
|
|
|
"prompt": format_prompt(messages),
|
|
|
|
**filter_none(
|
|
|
|
system_prompt=system_prompt,
|
|
|
|
max_new_tokens=max_new_tokens,
|
|
|
|
temperature=temperature,
|
|
|
|
top_p=top_p,
|
|
|
|
top_k=top_k,
|
|
|
|
stop_sequences=",".join(stop) if stop else None
|
|
|
|
),
|
|
|
|
**extra_data
|
|
|
|
},
|
|
|
|
}
|
|
|
|
url = f"{api_base.rstrip('/')}/{model}/predictions"
|
|
|
|
async with session.post(url, json=data) as response:
|
|
|
|
message = "Model not found" if response.status == 404 else None
|
|
|
|
await raise_for_status(response, message)
|
|
|
|
result = await response.json()
|
|
|
|
if "id" not in result:
|
|
|
|
raise ResponseError(f"Invalid response: {result}")
|
|
|
|
async with session.get(result["urls"]["stream"], headers={"Accept": "text/event-stream"}) as response:
|
|
|
|
await raise_for_status(response)
|
|
|
|
event = None
|
|
|
|
async for line in response.iter_lines():
|
|
|
|
if line.startswith(b"event: "):
|
|
|
|
event = line[7:]
|
|
|
|
if event == b"done":
|
|
|
|
break
|
|
|
|
elif event == b"output":
|
|
|
|
if line.startswith(b"data: "):
|
|
|
|
new_text = line[6:].decode()
|
|
|
|
if new_text:
|
|
|
|
yield new_text
|
|
|
|
else:
|
|
|
|
yield "\n"
|