Merge pull request #315 from yu-shaonian/main

web spider
This commit is contained in:
Alpha Liu 2023-12-29 14:53:01 +08:00 committed by GitHub
commit db2e1ec854
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 2 deletions

View File

@ -96,7 +96,8 @@
"clear_structure": "True", "clear_structure": "True",
"gui_design": "True", "gui_design": "True",
"git_management": "False", "git_management": "False",
"web_spider": "False",
"self_improve": "False", "self_improve": "False",
"incremental_develop": "False", "incremental_develop": "False",
"background_prompt": "ChatDev is a software company powered by multiple intelligent agents, such as chief executive officer, chief human resources officer, chief product officer, chief technology officer, etc, with a multi-agent organizational structure and the mission of 'changing the digital world through programming'." "background_prompt": "ChatDev is a software company powered by multiple intelligent agents, such as chief executive officer, chief human resources officer, chief product officer, chief technology officer, etc, with a multi-agent organizational structure and the mission of 'changing the digital world through programming'."
} }

View File

@ -36,6 +36,7 @@
"phase_prompt": [ "phase_prompt": [
"According to the new user's task and our software designs listed below: ", "According to the new user's task and our software designs listed below: ",
"Task: \"{task}\".", "Task: \"{task}\".",
"Task description: \"{description}\".",
"Modality: \"{modality}\".", "Modality: \"{modality}\".",
"Programming Language: \"{language}\"", "Programming Language: \"{language}\"",
"Ideas:\"{ideas}\"", "Ideas:\"{ideas}\"",

89
camel/web_spider.py Normal file
View File

@ -0,0 +1,89 @@
import requests
from bs4 import BeautifulSoup
import openai
from openai import OpenAI
import wikipediaapi
import os
import time
self_api_key = os.environ.get('OPENAI_API_KEY')
BASE_URL = os.environ.get('BASE_URL')
if BASE_URL:
client = openai.OpenAI(
api_key=self_api_key,
base_url=BASE_URL,
)
else:
client = openai.OpenAI(
api_key=self_api_key
)
def get_baidu_baike_content(keyword):
# design api by the baidubaike
url = f'https://baike.baidu.com/item/{keyword}'
# post request
response = requests.get(url)
# Beautiful Soup part for the html content
soup = BeautifulSoup(response.content, 'html.parser')
# find the main content in the page
# main_content = soup.find('div', class_='lemma-summary')
main_content = soup.contents[-1].contents[0].contents[4].attrs['content']
# find the target content
# content_text = main_content.get_text().strip()
return main_content
def get_wiki_content(keyword):
# Wikipedia API ready
wiki_wiki = wikipediaapi.Wikipedia('MyProjectName (merlin@example.com)', 'en')
#the topic content which you want to spider
search_topic = keyword
# get the page content
page_py = wiki_wiki.page(search_topic)
# check the existence of the content in the page
if page_py.exists():
print("Page - Title:", page_py.title)
print("Page - Summary:", page_py.summary)
else:
print("Page not found.")
return page_py.summary
def modal_trans(task_dsp):
try:
task_in ="'" + task_dsp + \
"'Just give me the most important keyword about this sentence without explaining it and your answer should be only one keyword."
messages = [{"role": "user", "content": task_in}]
response = client.chat.completions.create(messages=messages,
model="gpt-3.5-turbo-16k",
temperature=0.2,
top_p=1.0,
n=1,
stream=False,
frequency_penalty=0.0,
presence_penalty=0.0,
logit_bias={})
response_text = response.choices[0].message.content
spider_content = get_wiki_content(response_text)
# time.sleep(1)
task_in = "'" + spider_content + \
"',Summarize this paragraph and return the key information."
messages = [{"role": "user", "content": task_in}]
response = client.chat.completions.create(messages=messages,
model="gpt-3.5-turbo-16k",
temperature=0.2,
top_p=1.0,
n=1,
stream=False,
frequency_penalty=0.0,
presence_penalty=0.0,
logit_bias={})
result = response.choices[0].message.content
print("web spider content:", result)
except:
result = ''
print("the content is none")
return result

View File

@ -11,6 +11,7 @@ from camel.configs import ChatGPTConfig
from camel.typing import TaskType, ModelType from camel.typing import TaskType, ModelType
from chatdev.chat_env import ChatEnv, ChatEnvConfig from chatdev.chat_env import ChatEnv, ChatEnvConfig
from chatdev.statistics import get_info from chatdev.statistics import get_info
from camel.web_spider import modal_trans
from chatdev.utils import log_visualize, now from chatdev.utils import log_visualize, now
@ -59,6 +60,7 @@ class ChatChain:
# init chatchain config and recruitments # init chatchain config and recruitments
self.chain = self.config["chain"] self.chain = self.config["chain"]
self.recruitments = self.config["recruitments"] self.recruitments = self.config["recruitments"]
self.web_spider = self.config["web_spider"]
# init default max chat turn # init default max chat turn
self.chat_turn_limit_default = 10 self.chat_turn_limit_default = 10
@ -243,6 +245,8 @@ class ChatChain:
self.chat_env.env_dict['task_prompt'] = self.self_task_improve(self.task_prompt_raw) self.chat_env.env_dict['task_prompt'] = self.self_task_improve(self.task_prompt_raw)
else: else:
self.chat_env.env_dict['task_prompt'] = self.task_prompt_raw self.chat_env.env_dict['task_prompt'] = self.task_prompt_raw
if(check_bool(self.web_spider)):
self.chat_env.env_dict['task_description'] = modal_trans(self.task_prompt_raw)
def post_processing(self): def post_processing(self):
""" """

View File

@ -57,6 +57,7 @@ class ChatEnv:
self.env_dict = { self.env_dict = {
"directory": "", "directory": "",
"task_prompt": "", "task_prompt": "",
"task_description":"",
"modality": "", "modality": "",
"ideas": "", "ideas": "",
"language": "", "language": "",

View File

@ -324,6 +324,7 @@ class LanguageChoose(Phase):
def update_phase_env(self, chat_env): def update_phase_env(self, chat_env):
self.phase_env.update({"task": chat_env.env_dict['task_prompt'], self.phase_env.update({"task": chat_env.env_dict['task_prompt'],
"description":"chat_env.env_dict['task_description']",
"modality": chat_env.env_dict['modality'], "modality": chat_env.env_dict['modality'],
"ideas": chat_env.env_dict['ideas']}) "ideas": chat_env.env_dict['ideas']})
@ -345,6 +346,7 @@ class Coding(Phase):
gui = "" if not chat_env.config.gui_design \ gui = "" if not chat_env.config.gui_design \
else "The software should be equipped with graphical user interface (GUI) so that user can visually and graphically use it; so you must choose a GUI framework (e.g., in Python, you can implement GUI via tkinter, Pygame, Flexx, PyGUI, etc,)." else "The software should be equipped with graphical user interface (GUI) so that user can visually and graphically use it; so you must choose a GUI framework (e.g., in Python, you can implement GUI via tkinter, Pygame, Flexx, PyGUI, etc,)."
self.phase_env.update({"task": chat_env.env_dict['task_prompt'], self.phase_env.update({"task": chat_env.env_dict['task_prompt'],
"description": "chat_env.env_dict['task_description']",
"modality": chat_env.env_dict['modality'], "modality": chat_env.env_dict['modality'],
"ideas": chat_env.env_dict['ideas'], "ideas": chat_env.env_dict['ideas'],
"language": chat_env.env_dict['language'], "language": chat_env.env_dict['language'],
@ -366,6 +368,7 @@ class ArtDesign(Phase):
def update_phase_env(self, chat_env): def update_phase_env(self, chat_env):
self.phase_env = {"task": chat_env.env_dict['task_prompt'], self.phase_env = {"task": chat_env.env_dict['task_prompt'],
"description": chat_env.env_dict['task_description'],
"language": chat_env.env_dict['language'], "language": chat_env.env_dict['language'],
"codes": chat_env.get_codes()} "codes": chat_env.get_codes()}

View File

@ -11,4 +11,5 @@ tiktoken==0.4.0
virtualenv==20.23.0 virtualenv==20.23.0
Werkzeug==2.3.6 Werkzeug==2.3.6
Markdown==3.4.4 Markdown==3.4.4
Pillow==10.1.0 Pillow==10.1.0
Wikipedia-API==0.6.0