2023-06-13 19:48:59 +03:00
|
|
|
#!/usr/bin/env /usr/bin/python3
|
2023-07-12 12:40:23 +03:00
|
|
|
# This file was used to generate test data before GitButler was able to work
|
|
|
|
# with real user data. It is unused for now, but we'll keep it around for a
|
|
|
|
# while in case it turns out to be useful for e.g. e2e testing.
|
|
|
|
|
2023-06-13 19:48:59 +03:00
|
|
|
import subprocess
|
|
|
|
import json
|
2023-06-15 02:25:06 +03:00
|
|
|
import openai
|
|
|
|
import os
|
|
|
|
|
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY")
|
2023-06-13 19:48:59 +03:00
|
|
|
|
|
|
|
try:
|
|
|
|
from unidiff import PatchSet
|
|
|
|
except ImportError as e:
|
|
|
|
print(
|
|
|
|
"unidiff is not installed, please install it first with: python3 -m pip install unidiff"
|
|
|
|
)
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
subprocess.check_output("gh --version", shell=True, text=True)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
|
|
print("gh is not installed, please install it first from https://cli.github.com/")
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
def get_last_n_pr_nums(n_prs):
|
|
|
|
list_prs = subprocess.check_output(
|
|
|
|
"gh pr list --state merged | head -n %d | awk '{print $1}'" % n_prs,
|
|
|
|
shell=True,
|
|
|
|
text=True,
|
|
|
|
)
|
|
|
|
return list_prs.splitlines()
|
|
|
|
|
|
|
|
|
2023-06-15 02:25:06 +03:00
|
|
|
def summarize_hunk(hunk):
|
|
|
|
prompt = """
|
|
|
|
Summarize the following git diff hunk in less than 80 characters:
|
|
|
|
|
|
|
|
```
|
|
|
|
{hunk}
|
|
|
|
```
|
|
|
|
""".format(
|
|
|
|
hunk=hunk[0:1000]
|
|
|
|
)
|
|
|
|
response = openai.ChatCompletion.create(
|
|
|
|
model="gpt-3.5-turbo",
|
|
|
|
messages=[{"role": "user", "content": prompt}],
|
|
|
|
max_tokens=32,
|
|
|
|
)
|
|
|
|
return response.choices[0].message.content.strip()
|
|
|
|
|
|
|
|
|
2023-06-13 19:48:59 +03:00
|
|
|
def process_pr(pr_number):
|
|
|
|
branch_name = subprocess.check_output(
|
|
|
|
"gh pr view %s --json headRefName -q '.headRefName'" % pr_number,
|
|
|
|
shell=True,
|
|
|
|
text=True,
|
|
|
|
).splitlines()[0]
|
|
|
|
updated_at = subprocess.check_output(
|
|
|
|
"gh pr view %s --json updatedAt -q '.updatedAt'" % pr_number,
|
|
|
|
shell=True,
|
|
|
|
text=True,
|
|
|
|
).splitlines()[0]
|
|
|
|
title = subprocess.check_output(
|
|
|
|
"gh pr view %s --json title -q '.title'" % pr_number,
|
|
|
|
shell=True,
|
|
|
|
text=True,
|
|
|
|
).splitlines()[0]
|
2023-06-15 01:13:49 +03:00
|
|
|
body = subprocess.check_output(
|
|
|
|
"gh pr view %s --json body -q '.body'" % pr_number,
|
|
|
|
shell=True,
|
|
|
|
text=True,
|
|
|
|
).splitlines()[0]
|
2023-06-13 19:48:59 +03:00
|
|
|
diff = subprocess.check_output("gh pr diff %s" % pr_number, shell=True, text=True)
|
|
|
|
patch = PatchSet(diff)
|
|
|
|
files = []
|
|
|
|
for file in patch:
|
|
|
|
hunks = []
|
|
|
|
for hunk in file:
|
|
|
|
hunk_out = {
|
|
|
|
"id": branch_name + ":" + file.path + ":" + str(hunk.target_start),
|
2023-06-15 02:25:06 +03:00
|
|
|
"name": summarize_hunk(str(hunk)),
|
2023-06-13 19:48:59 +03:00
|
|
|
"diff": str(hunk),
|
|
|
|
"kind": "hunk",
|
|
|
|
"modifiedAt": updated_at,
|
|
|
|
"filePath": file.path,
|
|
|
|
}
|
|
|
|
hunks.append(hunk_out)
|
|
|
|
file_out = {
|
|
|
|
"id": branch_name + ":" + file.path,
|
|
|
|
"path": file.path,
|
|
|
|
"kind": "file",
|
|
|
|
"hunks": hunks,
|
|
|
|
}
|
|
|
|
files.append(file_out)
|
|
|
|
branch = {
|
|
|
|
"id": branch_name + ":" + pr_number,
|
|
|
|
"name": branch_name,
|
|
|
|
"active": True,
|
|
|
|
"kind": "branch",
|
2023-06-14 15:19:36 +03:00
|
|
|
"files": files,
|
2023-06-15 01:13:49 +03:00
|
|
|
"description": title + "\n" + body,
|
2023-06-13 19:48:59 +03:00
|
|
|
}
|
|
|
|
return branch
|
|
|
|
|
|
|
|
|
|
|
|
# prs = get_last_n_pr_nums(4)
|
2023-06-15 09:46:57 +03:00
|
|
|
prs = [
|
|
|
|
"425",
|
|
|
|
"429",
|
|
|
|
"420",
|
|
|
|
"414",
|
|
|
|
"409",
|
|
|
|
"407",
|
|
|
|
] # feel free to paste some some specific PRs
|
2023-06-13 19:48:59 +03:00
|
|
|
|
|
|
|
branches = [process_pr(pr) for pr in prs]
|
|
|
|
|
|
|
|
with open("scripts/branch_testdata.json", "w") as json_file:
|
|
|
|
json.dump(branches, json_file, indent=4)
|