Merge pull request #315 from yu-shaonian/main

web spider
This commit is contained in:
Alpha Liu 2023-12-29 14:53:01 +08:00 committed by GitHub
commit db2e1ec854
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 102 additions and 2 deletions

View File

@ -96,7 +96,8 @@
"clear_structure": "True",
"gui_design": "True",
"git_management": "False",
"web_spider": "False",
"self_improve": "False",
"incremental_develop": "False",
"background_prompt": "ChatDev is a software company powered by multiple intelligent agents, such as chief executive officer, chief human resources officer, chief product officer, chief technology officer, etc, with a multi-agent organizational structure and the mission of 'changing the digital world through programming'."
}
}

View File

@ -36,6 +36,7 @@
"phase_prompt": [
"According to the new user's task and our software designs listed below: ",
"Task: \"{task}\".",
"Task description: \"{description}\".",
"Modality: \"{modality}\".",
"Programming Language: \"{language}\"",
"Ideas:\"{ideas}\"",

89
camel/web_spider.py Normal file
View File

@ -0,0 +1,89 @@
import requests
from bs4 import BeautifulSoup
import openai
from openai import OpenAI
import wikipediaapi
import os
import time
self_api_key = os.environ.get('OPENAI_API_KEY')
BASE_URL = os.environ.get('BASE_URL')
if BASE_URL:
client = openai.OpenAI(
api_key=self_api_key,
base_url=BASE_URL,
)
else:
client = openai.OpenAI(
api_key=self_api_key
)
def get_baidu_baike_content(keyword):
# design api by the baidubaike
url = f'https://baike.baidu.com/item/{keyword}'
# post request
response = requests.get(url)
# Beautiful Soup part for the html content
soup = BeautifulSoup(response.content, 'html.parser')
# find the main content in the page
# main_content = soup.find('div', class_='lemma-summary')
main_content = soup.contents[-1].contents[0].contents[4].attrs['content']
# find the target content
# content_text = main_content.get_text().strip()
return main_content
def get_wiki_content(keyword):
# Wikipedia API ready
wiki_wiki = wikipediaapi.Wikipedia('MyProjectName (merlin@example.com)', 'en')
#the topic content which you want to spider
search_topic = keyword
# get the page content
page_py = wiki_wiki.page(search_topic)
# check the existence of the content in the page
if page_py.exists():
print("Page - Title:", page_py.title)
print("Page - Summary:", page_py.summary)
else:
print("Page not found.")
return page_py.summary
def modal_trans(task_dsp):
try:
task_in ="'" + task_dsp + \
"'Just give me the most important keyword about this sentence without explaining it and your answer should be only one keyword."
messages = [{"role": "user", "content": task_in}]
response = client.chat.completions.create(messages=messages,
model="gpt-3.5-turbo-16k",
temperature=0.2,
top_p=1.0,
n=1,
stream=False,
frequency_penalty=0.0,
presence_penalty=0.0,
logit_bias={})
response_text = response.choices[0].message.content
spider_content = get_wiki_content(response_text)
# time.sleep(1)
task_in = "'" + spider_content + \
"',Summarize this paragraph and return the key information."
messages = [{"role": "user", "content": task_in}]
response = client.chat.completions.create(messages=messages,
model="gpt-3.5-turbo-16k",
temperature=0.2,
top_p=1.0,
n=1,
stream=False,
frequency_penalty=0.0,
presence_penalty=0.0,
logit_bias={})
result = response.choices[0].message.content
print("web spider content:", result)
except:
result = ''
print("the content is none")
return result

View File

@ -11,6 +11,7 @@ from camel.configs import ChatGPTConfig
from camel.typing import TaskType, ModelType
from chatdev.chat_env import ChatEnv, ChatEnvConfig
from chatdev.statistics import get_info
from camel.web_spider import modal_trans
from chatdev.utils import log_visualize, now
@ -59,6 +60,7 @@ class ChatChain:
# init chatchain config and recruitments
self.chain = self.config["chain"]
self.recruitments = self.config["recruitments"]
self.web_spider = self.config["web_spider"]
# init default max chat turn
self.chat_turn_limit_default = 10
@ -243,6 +245,8 @@ class ChatChain:
self.chat_env.env_dict['task_prompt'] = self.self_task_improve(self.task_prompt_raw)
else:
self.chat_env.env_dict['task_prompt'] = self.task_prompt_raw
if(check_bool(self.web_spider)):
self.chat_env.env_dict['task_description'] = modal_trans(self.task_prompt_raw)
def post_processing(self):
"""

View File

@ -57,6 +57,7 @@ class ChatEnv:
self.env_dict = {
"directory": "",
"task_prompt": "",
"task_description":"",
"modality": "",
"ideas": "",
"language": "",

View File

@ -324,6 +324,7 @@ class LanguageChoose(Phase):
def update_phase_env(self, chat_env):
self.phase_env.update({"task": chat_env.env_dict['task_prompt'],
"description":"chat_env.env_dict['task_description']",
"modality": chat_env.env_dict['modality'],
"ideas": chat_env.env_dict['ideas']})
@ -345,6 +346,7 @@ class Coding(Phase):
gui = "" if not chat_env.config.gui_design \
else "The software should be equipped with graphical user interface (GUI) so that user can visually and graphically use it; so you must choose a GUI framework (e.g., in Python, you can implement GUI via tkinter, Pygame, Flexx, PyGUI, etc,)."
self.phase_env.update({"task": chat_env.env_dict['task_prompt'],
"description": "chat_env.env_dict['task_description']",
"modality": chat_env.env_dict['modality'],
"ideas": chat_env.env_dict['ideas'],
"language": chat_env.env_dict['language'],
@ -366,6 +368,7 @@ class ArtDesign(Phase):
def update_phase_env(self, chat_env):
self.phase_env = {"task": chat_env.env_dict['task_prompt'],
"description": chat_env.env_dict['task_description'],
"language": chat_env.env_dict['language'],
"codes": chat_env.get_codes()}

View File

@ -11,4 +11,5 @@ tiktoken==0.4.0
virtualenv==20.23.0
Werkzeug==2.3.6
Markdown==3.4.4
Pillow==10.1.0
Pillow==10.1.0
Wikipedia-API==0.6.0