mirror of
https://github.com/OpenBMB/ChatDev.git
synced 2024-11-07 18:40:13 +03:00
312 lines
14 KiB
Python
312 lines
14 KiB
Python
import os
|
|
import time
|
|
from graph import Graph, Node, Edge
|
|
import sys
|
|
import openai
|
|
import numpy as np
|
|
from codes import Codes
|
|
from utils import get_easyDict_from_filepath,OpenAIModel,log_and_print_online
|
|
from embedding import OpenAIEmbedding
|
|
sys.path.append(os.path.join(os.getcwd(),"ecl"))
|
|
class Shortcut:
|
|
def __init__(self, sourceMID, targetMID, valueGain,instructionStar,edgeIDPath):
|
|
self.sourceMID = sourceMID
|
|
self.targetMID = targetMID
|
|
self.valueGain = valueGain
|
|
self.embedding = None
|
|
self.instructionStar = instructionStar
|
|
self.edgeIDPath = edgeIDPath
|
|
|
|
def __str__(self):
|
|
return "{} -> {} valueGain={:.6f} len(instructionPath)={} instructionStar={}".format(self.sourceMID, self.targetMID, self.valueGain, len(self.edgeIDPath), self.instructionStar[:100].replace("\n", ""))
|
|
|
|
class Experience:
|
|
def __init__(self, graph: Graph, directory: str):
|
|
cfg = get_easyDict_from_filepath("./ecl/config.yaml")
|
|
self.graph: Graph = graph
|
|
self.directory = directory
|
|
self.threshold = cfg.experience.threshold
|
|
self.upperLimit = cfg.experience.upper_limit
|
|
self.experiences = []
|
|
|
|
self.model = OpenAIModel(model_type="gpt-3.5-turbo-16k")
|
|
self.embedding_method = OpenAIEmbedding()
|
|
|
|
for edge in self.graph.edges:
|
|
node = self.graph.nodes[edge.targetMID]
|
|
node.degree += 1
|
|
assert len(self.graph.edges) * 1 == sum([self.graph.nodes[mid].degree for mid in self.graph.nodes.keys()]) # unidirectional
|
|
|
|
for mid in self.graph.nodes.keys():
|
|
node = self.graph.nodes[mid]
|
|
node.value = 1.0
|
|
|
|
def reap_zombie(self):
|
|
|
|
pathNodes, pathEdges = self.graph.find_shortest_path()
|
|
|
|
zombieEdges = [edge for edge in self.graph.edges if edge not in pathEdges]
|
|
zombieNodes = [self.graph.nodes[mid] for mid in self.graph.nodes.keys() if mid not in pathNodes]
|
|
log_zombieedges = "ZOMBIE EDGES: \n"
|
|
log_zombienodes = "ZOMBIE NODES: \n"
|
|
for edge in zombieEdges:
|
|
self.graph.edges.remove(edge)
|
|
log_zombieedges += "Zombie Edge {} -> {} Removed\n".format(edge.sourceMID, edge.targetMID)
|
|
log_and_print_online(log_zombieedges)
|
|
|
|
for node in zombieNodes:
|
|
del self.graph.nodes[node.mID]
|
|
log_zombienodes += "Zombie Node {} Removed\n".format(node.mID)
|
|
log_and_print_online(log_zombienodes)
|
|
|
|
def estimate(self):
|
|
if len(self.graph.edges) == 0:
|
|
return
|
|
|
|
for mid in self.graph.nodes.keys():
|
|
node = self.graph.nodes[mid]
|
|
if len(node.code) == 0:
|
|
node.value *= 0.0
|
|
|
|
log_and_print_online()
|
|
|
|
vn = self.graph.nodes[self.graph.edges[-1].targetMID]
|
|
# print(vn.mID, "...")
|
|
|
|
for mid in self.graph.nodes.keys():
|
|
# print(mid)
|
|
vi = self.graph.nodes[mid]
|
|
vi.value = self._pairwise_estimate(vi, vn)
|
|
|
|
log_and_print_online("Init value:"+ str({mid: self.graph.nodes[mid].value for mid in self.graph.nodes.keys()})+"\n\nEstimated value:"+str({mid: self.graph.nodes[mid].value for mid in self.graph.nodes.keys()}))
|
|
|
|
def get_cosine_similarity(self, embeddingi, embeddingj):
|
|
embeddingi = np.array(embeddingi)
|
|
embeddingj = np.array(embeddingj)
|
|
cos_sim = embeddingi.dot(embeddingj) / (np.linalg.norm(embeddingi) * np.linalg.norm(embeddingj))
|
|
return cos_sim
|
|
|
|
def _pairwise_estimate(self, vi: Node, vj: Node):
|
|
|
|
if vi.value == 0.0:
|
|
return 0.0
|
|
|
|
pathNodes, pathEdges = self.graph.find_shortest_path(vi.mID, vj.mID)
|
|
distance_weight = 1.0 / len(pathEdges) if len(pathEdges) != 0 else 1.0
|
|
|
|
codes = Codes(vi.code)
|
|
codes._rewrite_codes()
|
|
(exist_bugs_flag, test_reports) = codes._run_codes()
|
|
compile_weight = 0.0 if exist_bugs_flag else 1.0
|
|
|
|
if compile_weight == 0.0:
|
|
return 0.0
|
|
|
|
maximum_degree = max([self.graph.nodes[mid].degree for mid in self.graph.nodes.keys()])
|
|
degree_weight = vi.degree * 1.0 / maximum_degree
|
|
|
|
if degree_weight == 0.0:
|
|
return 0.0
|
|
|
|
start_time = time.time()
|
|
vi_code_emb = self.embedding_method.get_code_embedding(vi.code) if vi.embedding is None else vi.embedding
|
|
if vi.embedding is None:
|
|
end_time =time.time()
|
|
log_and_print_online("DONE:get node embedding\ntime cost:{}\n".format(end_time-start_time))
|
|
vi.embedding = vi_code_emb
|
|
|
|
start_time = time.time()
|
|
vj_code_emb = self.embedding_method.get_code_embedding(vj.code) if vj.embedding is None else vj.embedding
|
|
if vj.embedding is None:
|
|
end_time =time.time()
|
|
log_and_print_online("DONE:get node embedding\ntime cost:{}\n".format(end_time-start_time))
|
|
vj.embedding = vj_code_emb
|
|
code_code_cos_sim = self.get_cosine_similarity(vi_code_emb, vj_code_emb)
|
|
|
|
if code_code_cos_sim == 0.0:
|
|
return 0.0
|
|
|
|
filenames = os.listdir(self.directory)
|
|
filename = [filename for filename in filenames if filename.endswith(".prompt")][0]
|
|
task_prompt = open(os.path.join(self.directory, filename), "r").read().strip()
|
|
start_time = time.time()
|
|
task_emb = self.embedding_method.get_text_embedding(task_prompt) if self.graph.task_embedding is None else self.graph.task_embedding
|
|
if self.graph.task_embedding is None:
|
|
end_time =time.time()
|
|
log_and_print_online("DONE:get task prompt embedding\ntime cost:{}\n".format(end_time-start_time))
|
|
self.graph.task = task_prompt
|
|
self.graph.task_embedding = task_emb
|
|
code_text_cos_sim = self.get_cosine_similarity(vi_code_emb, task_emb)
|
|
|
|
if code_text_cos_sim == 0.0:
|
|
return 0.0
|
|
|
|
assert distance_weight >= 0.0 and distance_weight <= 1.0
|
|
assert compile_weight >= 0.0 and compile_weight <= 1.0
|
|
assert degree_weight >= 0.0 and degree_weight <= 1.0
|
|
|
|
distance = vj.version - vi.version
|
|
|
|
if distance == 0:
|
|
return 1
|
|
else:
|
|
return code_code_cos_sim * 1.0 / distance * code_text_cos_sim * compile_weight * degree_weight
|
|
#return distance_weight * compile_weight * degree_weight
|
|
|
|
def get_transitive_closure(self):
|
|
def print_matrix(matrix):
|
|
for nodei in matrix.keys():
|
|
for nodej in matrix.keys():
|
|
print(matrix[nodei][nodej], end=" ")
|
|
print()
|
|
print()
|
|
|
|
# Warshall Algorithm
|
|
matrix = {}
|
|
for mid1 in self.graph.nodes:
|
|
for mid2 in self.graph.nodes:
|
|
if mid1 not in matrix.keys():
|
|
matrix[mid1] = {}
|
|
matrix[mid1][mid2] = 0
|
|
# print_matrix(matrix)
|
|
|
|
pathNodes, pathEdges = self.graph.find_shortest_path()
|
|
for edge in pathEdges:
|
|
matrix[edge.sourceMID][edge.targetMID] = 1
|
|
print("Init Adjacent Matrix:")
|
|
print_matrix(matrix)
|
|
|
|
for nodek in matrix.keys():
|
|
for nodei in matrix.keys():
|
|
for nodej in matrix.keys():
|
|
if matrix[nodei][nodej] == 1 or (matrix[nodei][nodek] == 1 and matrix[nodek][nodej] == 1):
|
|
matrix[nodei][nodej] = 1
|
|
print("Transitive Closure:")
|
|
print_matrix(matrix)
|
|
|
|
return matrix
|
|
|
|
def extract_thresholded_experiences(self):
|
|
if len(self.graph.edges) == 0:
|
|
return []
|
|
if len(self.graph.nodes) < 2:
|
|
return []
|
|
assert len(self.graph.nodes.keys()) >= 2
|
|
matrix = self.get_transitive_closure()
|
|
|
|
experiences = []
|
|
pathNodes, _ = self.graph.find_shortest_path()
|
|
for id1 in pathNodes:
|
|
for id2 in pathNodes:
|
|
valueGain = self.graph.nodes[id2].value - self.graph.nodes[id1].value
|
|
flag0 = id1 != id2
|
|
flag1 = self.graph.exists_edge(id1, id2) == False
|
|
flag2 = matrix[id1][id2] == 1
|
|
flag3 = valueGain >= self.threshold
|
|
|
|
code_lines = [line.lower().strip() for line in self.graph.nodes[id2].code.split("\n")]
|
|
flag4 = not ("pass".lower() in code_lines or "TODO".lower() in code_lines)
|
|
|
|
if flag0 and flag1 and flag2 and flag3 and flag4:
|
|
_, edges = self.graph.find_shortest_path(uMID=id1, vMID=id2)
|
|
edgeIDPath = [edge.edgeId for edge in edges]
|
|
sourcecode=self.graph.nodes[id1].code
|
|
targetcode=self.graph.nodes[id2].code
|
|
shortcut = Shortcut(sourceMID=id1, targetMID=id2, valueGain=valueGain,instructionStar="", edgeIDPath=edgeIDPath)
|
|
experiences.append(shortcut)
|
|
|
|
experiences = sorted(experiences, key=lambda item: item.valueGain, reverse = True)
|
|
|
|
if len(experiences) > self.upperLimit:
|
|
log_and_print_online("{} experieces truncated.".format(len(experiences) - self.upperLimit))
|
|
experiences = experiences[:self.upperLimit]
|
|
|
|
prompt_template0 = """Provide detailed instructions to generate the following code:
|
|
{targetcode}
|
|
|
|
The instructions should encompass:
|
|
|
|
Modules and Classes:
|
|
- Enumerate necessary modules.
|
|
- Detail the classes, their attributes, and methods within these modules.
|
|
- Articulate the purpose and operation of each class.
|
|
|
|
Data Structures:
|
|
- Identify the requisite data structures.
|
|
- Describe their names, attributes, and operations.
|
|
|
|
Main Program Flow:
|
|
- Outline the principal progression of the program.
|
|
- Highlight the sequence for initializing and invoking other modules, classes, and methods within the primary file (e.g., main.py).
|
|
- Clarify the logical progression during runtime.
|
|
|
|
Input and Output:
|
|
- Specify the method by which the program accepts input, be it from users or external sources.
|
|
- Elaborate on the projected outputs or actions of the software.
|
|
|
|
Exception Handling:
|
|
- Instruct on the approach to manage potential anomalies or exceptions during execution to ascertain stability and robustness.
|
|
|
|
External Libraries and Dependencies:
|
|
- Explicitly list the necessary external libraries or dependencies, their versions, and their functionalities.
|
|
|
|
Please output the instructions directly."""
|
|
|
|
prompt_template1 = """Please provide detailed instructions on how to transition from the initial code version represented by source code to the final version indicated by target code.
|
|
|
|
Source Code:
|
|
{sourcecode}
|
|
|
|
Target Code:
|
|
{targetcode}
|
|
|
|
The instructions should encompass:
|
|
|
|
Modules and Classes: Detail the modules to be incorporated, along with the names, attributes, and operations of any classes to be added or amended. Furthermore, describe the intended function and utility of these new or altered classes.
|
|
|
|
Data Structures: Clearly define any data structures that need introduction or alteration, elucidating their names, attributes, and functionalities.
|
|
|
|
Main Program Flow: Outline the program's primary sequence of operations, highlighting the procedures to initialize and invoke other modules, classes, and methods in the primary file (e.g., main.py). Describe the program's logic sequence during its execution.
|
|
|
|
Input and Output: Define the methodology by which the program will acquire input, whether from users or external data sources. Also, characterize the projected outputs or behaviors of the application.
|
|
|
|
Exception Handling: Provide guidance on managing potential discrepancies or exceptions that might emerge during the software's operation, ensuring its resilience and reliability.
|
|
|
|
External Libraries and Dependencies: If the implementation requires external libraries or dependencies, specify their names, versions, and their respective purposes explicitly."""
|
|
|
|
|
|
for shortcut in experiences:
|
|
sourcecode = self.graph.nodes[shortcut.sourceMID].code
|
|
targetcode = self.graph.nodes[shortcut.targetMID].code
|
|
if sourcecode == "":
|
|
prompt = prompt_template0.replace("{targetcode}", targetcode)
|
|
response = self.model.run(messages=[{"role": "system", "content": prompt}])
|
|
print("instructionstar generated")
|
|
else:
|
|
prompt = prompt_template1.replace("{sourcecode}", sourcecode).replace("{targetcode}", targetcode)
|
|
response = self.model.run(messages=[{"role": "system", "content": prompt}])
|
|
print("instructionstar generated")
|
|
shortcut.instructionStar = response["choices"][0]["message"]["content"]
|
|
output = "Sorted-and-Truncated Experiences (with instructionStar):"
|
|
|
|
self.experiences = experiences
|
|
for experience in experiences:
|
|
output += str(experience)
|
|
log_and_print_online(output)
|
|
log_and_print_online("[Conclusion]:\nprompt_tokens:{}, completion_tokens:{}, total_tokens:{}".format(self.model.prompt_tokens,self.model.completion_tokens,self.model.total_tokens))
|
|
log_and_print_online("[Conclusion]:\ntext_prompt_tokens:{}, text_total_tokens:{}\ncode_prompt_tokens:{}, code_total_tokens:{}\nprompt_tokens:{}, total_tokens:{}".format(self.embedding_method.text_prompt_tokens,
|
|
self.embedding_method.text_total_tokens,
|
|
self.embedding_method.code_prompt_tokens,
|
|
self.embedding_method.code_total_tokens,
|
|
self.embedding_method.prompt_tokens,
|
|
self.embedding_method.total_tokens))
|
|
|
|
|
|
|
|
return experiences
|
|
def to_dict(self):
|
|
merged_data = []
|
|
for index, ex in enumerate(self.experiences):
|
|
merged_data.append(ex.__dict__)
|
|
return merged_data
|