mirror of
https://github.com/StanGirard/quivr.git
synced 2024-12-29 14:25:20 +03:00
e1a740472f
* feat(chat): add name update * chore(linting): add flake8 * feat: add chat name edit
106 lines
3.0 KiB
Python
106 lines
3.0 KiB
Python
import os
|
|
|
|
import guidance
|
|
import openai
|
|
from logger import get_logger
|
|
|
|
logger = get_logger(__name__)
|
|
|
|
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
|
openai.api_key = openai_api_key
|
|
summary_llm = guidance.llms.OpenAI("gpt-3.5-turbo-0613", caching=False)
|
|
|
|
|
|
def llm_summerize(document):
|
|
summary = guidance(
|
|
"""
|
|
{{#system~}}
|
|
You are a world best summarizer. \n
|
|
Condense the text, capturing essential points and core ideas. Include relevant \
|
|
examples, omit excess details, and ensure the summary's length matches the \
|
|
original's complexity.
|
|
{{/system~}}
|
|
{{#user~}}
|
|
Summarize the following text:
|
|
---
|
|
{{document}}
|
|
{{/user~}}
|
|
|
|
{{#assistant~}}
|
|
{{gen 'summarization' temperature=0.2 max_tokens=100}}
|
|
{{/assistant~}}
|
|
""",
|
|
llm=summary_llm,
|
|
)
|
|
|
|
summary = summary(document=document)
|
|
logger.info("Summarization: %s", summary)
|
|
return summary["summarization"]
|
|
|
|
|
|
def llm_evaluate_summaries(question, summaries, model):
|
|
if not model.startswith("gpt"):
|
|
logger.info(f"Model {model} not supported. Using gpt-3.5-turbo instead.")
|
|
model = "gpt-3.5-turbo-0613"
|
|
logger.info(f"Evaluating summaries with {model}")
|
|
evaluation_llm = guidance.llms.OpenAI(model, caching=False)
|
|
evaluation = guidance(
|
|
"""
|
|
{{#system~}}
|
|
You are a world best evaluator. You evaluate the relevance of summaries based \
|
|
on user input question. Return evaluation in following csv format, csv headers \
|
|
are [summary_id,document_id,evaluation,reason].
|
|
Evaluator Task
|
|
- Evaluation should be a score number between 0 and 5.
|
|
- Reason should be a short sentence within 20 words explain why the evaluation.
|
|
---
|
|
Example
|
|
summary_id,document_id,evaluation,reason
|
|
1,4,3,"not mentioned about topic A"
|
|
2,2,4,"It is not relevant to the question"
|
|
{{/system~}}
|
|
{{#user~}}
|
|
Based on the question, do Evaluator Task for each summary.
|
|
---
|
|
Question: {{question}}
|
|
{{#each summaries}}
|
|
Summary
|
|
summary_id: {{this.id}}
|
|
document_id: {{this.document_id}}
|
|
evaluation: ""
|
|
reason: ""
|
|
Summary Content: {{this.content}}
|
|
File Name: {{this.metadata.file_name}}
|
|
{{/each}}
|
|
{{/user~}}
|
|
{{#assistant~}}
|
|
{{gen 'evaluation' temperature=0.2 stop='<|im_end|>'}}
|
|
{{/assistant~}}
|
|
""",
|
|
llm=evaluation_llm,
|
|
)
|
|
result = evaluation(question=question, summaries=summaries)
|
|
evaluations = {}
|
|
for evaluation in result["evaluation"].split("\n"):
|
|
if evaluation == "" or not evaluation[0].isdigit():
|
|
continue
|
|
logger.info("Evaluation Row: %s", evaluation)
|
|
summary_id, document_id, score, *reason = evaluation.split(",")
|
|
if not score.isdigit():
|
|
continue
|
|
score = int(score)
|
|
if score < 3 or score > 5:
|
|
continue
|
|
evaluations[summary_id] = {
|
|
"evaluation": score,
|
|
"reason": ",".join(reason),
|
|
"summary_id": summary_id,
|
|
"document_id": document_id,
|
|
}
|
|
return [
|
|
e
|
|
for e in sorted(
|
|
evaluations.values(), key=lambda x: x["evaluation"], reverse=True
|
|
)
|
|
]
|