quivr/backend/llm/utils/summarization.py
Matt e931d29017
feat: new classes (#499)
* feat: new classes

* chore: private llm refactor and enable streaming

* fix: validation errors in pytest

* fix: default brain behavior

* Update backend/llm/base.py

* chore: remove 🪦

* fix: remove 🪦
2023-07-04 16:56:54 +01:00

106 lines
3.0 KiB
Python

import os
import guidance
import openai
from logger import get_logger
logger = get_logger(__name__)
openai_api_key = os.environ.get("OPENAI_API_KEY")
openai.api_key = openai_api_key
summary_llm = guidance.llms.OpenAI("gpt-3.5-turbo-0613", caching=False)
def llm_summerize(document):
summary = guidance(
"""
{{#system~}}
You are a world best summarizer. \n
Condense the text, capturing essential points and core ideas. Include relevant \
examples, omit excess details, and ensure the summary's length matches the \
original's complexity.
{{/system~}}
{{#user~}}
Summarize the following text:
---
{{document}}
{{/user~}}
{{#assistant~}}
{{gen 'summarization' temperature=0.2 max_tokens=100}}
{{/assistant~}}
""",
llm=summary_llm,
)
summary = summary(document=document)
logger.info("Summarization: %s", summary)
return summary["summarization"]
def llm_evaluate_summaries(question, summaries, model):
if not model.startswith("gpt"):
logger.info(f"Model {model} not supported. Using gpt-3.5-turbo instead.")
model = "gpt-3.5-turbo-0613"
logger.info(f"Evaluating summaries with {model}")
evaluation_llm = guidance.llms.OpenAI(model, caching=False)
evaluation = guidance(
"""
{{#system~}}
You are a world best evaluator. You evaluate the relevance of summaries based \
on user input question. Return evaluation in following csv format, csv headers \
are [summary_id,document_id,evaluation,reason].
Evaluator Task
- Evaluation should be a score number between 0 and 5.
- Reason should be a short sentence within 20 words explain why the evaluation.
---
Example
summary_id,document_id,evaluation,reason
1,4,3,"not mentioned about topic A"
2,2,4,"It is not relevant to the question"
{{/system~}}
{{#user~}}
Based on the question, do Evaluator Task for each summary.
---
Question: {{question}}
{{#each summaries}}
Summary
summary_id: {{this.id}}
document_id: {{this.document_id}}
evaluation: ""
reason: ""
Summary Content: {{this.content}}
File Name: {{this.metadata.file_name}}
{{/each}}
{{/user~}}
{{#assistant~}}
{{gen 'evaluation' temperature=0.2 stop='<|im_end|>'}}
{{/assistant~}}
""",
llm=evaluation_llm,
)
result = evaluation(question=question, summaries=summaries)
evaluations = {}
for evaluation in result["evaluation"].split("\n"):
if evaluation == "" or not evaluation[0].isdigit():
continue
logger.info("Evaluation Row: %s", evaluation)
summary_id, document_id, score, *reason = evaluation.split(",")
if not score.isdigit():
continue
score = int(score)
if score < 3 or score > 5:
continue
evaluations[summary_id] = {
"evaluation": score,
"reason": ",".join(reason),
"summary_id": summary_id,
"document_id": document_id,
}
return [
e
for e in sorted(
evaluations.values(), key=lambda x: x["evaluation"], reverse=True
)
]