mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 07:17:55 +03:00
backup commits to sapling-pr-archive branch
Summary: Because `sl pr submit` relies on force-pushing to GitHub, we run the risk of commits from previous versions of a PR getting reaped, as GitHub periodically garbage collects commits (and trees and blobs) that are no longer reachable via a public branch. To ensure we can always see previous versions of a PR in its entirety, we merge the head of the development branch to a special branch named `sapling-pr-archive-{username}`. The idea is that this branch is not intended for development, but to defend against GitHub's garbage collection process. To support this, we add some new methods to `gh_submit.py`: - `get_username()` - `create_branch()` - `merge_into_branch()` These do what their names suggest via GitHub's GraphQL API. Because `merge_into_branch()` has some "expected" error cases (such as "branch does not exist"), we had to update the logic in `make_request()` to ensure we can read the error back out of the response to check whether it is one of these *expected* errors. Over in `submit.py`, we introduce new functions to test the response contents to see if it corresponds to an expected error: - `is_already_merged_error()` - `is_branch_does_not_exist_error()` Finally, `add_pr_head_to_archives()` is where we put it all together to merge the head of the development stack into the archive branch, creating the branch, if necessary. Reviewed By: quark-zju Differential Revision: D39919020 fbshipit-source-id: 3822e7cb22d0b6f308a912fe98eec8106a87b5d9
This commit is contained in:
parent
415868f503
commit
57e8b788df
@ -267,6 +267,80 @@ mutation ($pullRequestId: ID!, $title: String!, $body: String!) {
|
||||
return Result(ok=result.ok["data"]["updatePullRequest"]["pullRequest"]["id"])
|
||||
|
||||
|
||||
async def create_branch(*, repo_id: str, branch_name: str, oid: str) -> Result[str]:
|
||||
"""Attempts to create the branch. If successful, returns the ID of the newly
|
||||
created Ref.
|
||||
"""
|
||||
query = """
|
||||
mutation ($repositoryId: ID!, $name: String!, $oid: GitObjectID!) {
|
||||
createRef(input: {repositoryId: $repositoryId, name: $name, oid: $oid}) {
|
||||
ref {
|
||||
id
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
params: Dict[str, Union[str, int]] = {
|
||||
"query": query,
|
||||
"repositoryId": repo_id,
|
||||
"name": f"refs/heads/{branch_name}",
|
||||
"oid": oid,
|
||||
}
|
||||
result = await make_request(params)
|
||||
if result.is_error():
|
||||
return result
|
||||
else:
|
||||
return Result(ok=result.ok["data"]["createRef"]["ref"]["id"])
|
||||
|
||||
|
||||
async def merge_into_branch(
|
||||
*, repo_id: str, oid_to_merge: str, branch_name: str
|
||||
) -> Result[str]:
|
||||
"""Takes the hash, oid_to_merge, and merges it into the specified branch_name."""
|
||||
query = """
|
||||
mutation ($repositoryId: ID!, $base: String!, $head: String!) {
|
||||
mergeBranch(input: {repositoryId: $repositoryId, base: $base, head: $head}) {
|
||||
mergeCommit {
|
||||
oid
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
params: Dict[str, Union[str, int]] = {
|
||||
"query": query,
|
||||
"repositoryId": repo_id,
|
||||
"base": branch_name,
|
||||
"head": oid_to_merge,
|
||||
}
|
||||
result = await make_request(params)
|
||||
if result.is_error():
|
||||
return result
|
||||
else:
|
||||
return Result(ok=result.ok["data"]["mergeBranch"]["mergeCommit"]["oid"])
|
||||
|
||||
|
||||
async def get_username() -> Result[str]:
|
||||
"""Returns the username associated with the auth token. Note that it is
|
||||
slightly faster to call graphql.try_parse_oath_token_from_hosts_yml() and
|
||||
read the value from hosts.yml.
|
||||
"""
|
||||
query = """
|
||||
query {
|
||||
viewer {
|
||||
login
|
||||
}
|
||||
}
|
||||
"""
|
||||
params: Dict[str, Union[str, int]] = {
|
||||
"query": query,
|
||||
}
|
||||
result = await make_request(params)
|
||||
if result.is_error():
|
||||
return result
|
||||
else:
|
||||
return Result(ok=result.ok["data"]["viewer"]["login"])
|
||||
|
||||
|
||||
async def make_request(
|
||||
params: Dict[str, Union[str, int]], endpoint="graphql"
|
||||
) -> Result:
|
||||
@ -286,17 +360,20 @@ async def make_request(
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
|
||||
if proc.returncode != 0:
|
||||
return Result(
|
||||
error=f"Failure running {' '.join(args)}\nstdout: {stdout}\nstderr: {stderr}\n"
|
||||
)
|
||||
# If proc exits with a non-zero exit code, the stdout may still
|
||||
# be valid JSON, but we expect it to have an "errors" property defined.
|
||||
try:
|
||||
response = json.loads(stdout)
|
||||
except json.JSONDecodeError:
|
||||
response = None
|
||||
|
||||
return check_stdout(stdout)
|
||||
|
||||
|
||||
def check_stdout(stdout: bytes) -> Result:
|
||||
response = json.loads(stdout)
|
||||
if "errors" in response:
|
||||
return Result(error=json.dumps(response, indent=1))
|
||||
else:
|
||||
if proc.returncode == 0:
|
||||
assert response is not None
|
||||
assert "errors" not in response
|
||||
return Result(ok=response)
|
||||
elif response is None:
|
||||
return Result(
|
||||
error=f"exit({proc.returncode}) Failure running {' '.join(args)}\nstdout: {stdout}\nstderr: {stderr}\n"
|
||||
)
|
||||
else:
|
||||
return Result(error=json.dumps(response, indent=1))
|
||||
|
@ -100,7 +100,7 @@ async def update_commits_in_stack(ui, repo) -> int:
|
||||
|
||||
# These are set lazily because they require GraphQL calls.
|
||||
next_pull_request_number = None
|
||||
repository = None
|
||||
repository: Optional[Repository] = None
|
||||
|
||||
for partition in partitions:
|
||||
top = partition[0]
|
||||
@ -157,13 +157,14 @@ async def update_commits_in_stack(ui, repo) -> int:
|
||||
# create/update the pull request title and body, as appropriate.
|
||||
pr_numbers = [none_throws(p[0].pr).number for p in partitions]
|
||||
|
||||
rewrite_requests = [
|
||||
# Add the head of the stack to the sapling-pr-archive branch.
|
||||
tip = hex(partitions[0][0].node)
|
||||
|
||||
rewrite_and_archive_requests = [
|
||||
rewrite_pull_request_body(partition, index, pr_numbers, ui)
|
||||
for index, partition in enumerate(partitions)
|
||||
]
|
||||
await asyncio.gather(*rewrite_requests)
|
||||
|
||||
# TODO: Add the head of the stack to the reviewstack-archive branch.
|
||||
] + [add_pr_head_to_archives(origin=origin, repository=repository, tip=tip)]
|
||||
await asyncio.gather(*rewrite_and_archive_requests)
|
||||
return 0
|
||||
|
||||
|
||||
@ -318,6 +319,156 @@ async def get_pull_request_details_or_throw(pr_id: PullRequestId) -> PullRequest
|
||||
return none_throws(result.ok)
|
||||
|
||||
|
||||
async def add_pr_head_to_archives(
|
||||
*, origin: str, repository: Optional[Repository], tip: str
|
||||
):
|
||||
"""Takes the specified commit (tip) and merges it into the appropriate
|
||||
archive branch for the (repo, username). GitHub will periodically garbage
|
||||
collect commits that are no longer part of a public branch, but we want to
|
||||
prevent this to ensure previous version of a PR can be viewed later, even
|
||||
after it has been updated via a force-push.
|
||||
|
||||
tip is the hex version of the commit hash to be merged into the archive branch.
|
||||
"""
|
||||
username = await get_username()
|
||||
if not username:
|
||||
raise error.Abort(_("could not determine GitHub username"))
|
||||
|
||||
if not repository:
|
||||
repository = await get_repository_for_origin(origin)
|
||||
branch_name = f"sapling-pr-archive-{username}"
|
||||
# Try to merge the tip directly, though this may fail if tip has already
|
||||
# been merged or if the branch has not been created before. We try to merge
|
||||
# without checking for the existence of the branch to try to avoid a TOCTOU
|
||||
# error.
|
||||
result = await gh_submit.merge_into_branch(
|
||||
repo_id=repository.id, oid_to_merge=tip, branch_name=branch_name
|
||||
)
|
||||
if not result.is_error():
|
||||
return
|
||||
|
||||
import json
|
||||
|
||||
# TODO: Store Result.error as Dict so we don't have to parse it again.
|
||||
err = none_throws(result.error)
|
||||
response = None
|
||||
try:
|
||||
response = json.loads(err)
|
||||
except json.JSONDecodeError:
|
||||
# response is not guaranteed to be valid JSON.
|
||||
pass
|
||||
|
||||
if response and is_already_merged_error(response):
|
||||
# Nothing to do!
|
||||
return
|
||||
elif response and is_branch_does_not_exist_error(response):
|
||||
# Archive branch does not exist yet, so initialize it with the current
|
||||
# tip.
|
||||
result = await gh_submit.create_branch(
|
||||
repo_id=repository.id, branch_name=branch_name, oid=tip
|
||||
)
|
||||
if result.is_error():
|
||||
raise error.Abort(
|
||||
_("unexpected error when trying to create branch %s with commit %s: %s")
|
||||
% (branch_name, tip, result.error)
|
||||
)
|
||||
else:
|
||||
raise error.Abort(
|
||||
_("unexpected error when trying to merge %s into %s: %s")
|
||||
% (tip, branch_name, err)
|
||||
)
|
||||
|
||||
|
||||
def is_already_merged_error(response) -> bool:
|
||||
r"""
|
||||
>>> response = {
|
||||
... "data": {
|
||||
... "mergeBranch": None
|
||||
... },
|
||||
... "errors": [
|
||||
... {
|
||||
... "type": "UNPROCESSABLE",
|
||||
... "path": [
|
||||
... "mergeBranch"
|
||||
... ],
|
||||
... "locations": [
|
||||
... {
|
||||
... "line": 2,
|
||||
... "column": 3
|
||||
... }
|
||||
... ],
|
||||
... "message": "Failed to merge: \"Already merged\""
|
||||
... }
|
||||
... ]
|
||||
... }
|
||||
>>> is_already_merged_error(response)
|
||||
True
|
||||
"""
|
||||
errors = response.get("errors")
|
||||
if not errors or not isinstance(errors, list):
|
||||
return False
|
||||
for err in errors:
|
||||
if err.get("type") != "UNPROCESSABLE":
|
||||
continue
|
||||
message = err.get("message")
|
||||
if isinstance(message, str) and "Already merged" in message:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_branch_does_not_exist_error(response) -> bool:
|
||||
r"""
|
||||
>>> response = {
|
||||
... "data": {
|
||||
... "mergeBranch": None
|
||||
... },
|
||||
... "errors": [
|
||||
... {
|
||||
... "type": "NOT_FOUND",
|
||||
... "path": [
|
||||
... "mergeBranch"
|
||||
... ],
|
||||
... "locations": [
|
||||
... {
|
||||
... "line": 2,
|
||||
... "column": 3
|
||||
... }
|
||||
... ],
|
||||
... "message": "No such base."
|
||||
... }
|
||||
... ]
|
||||
... }
|
||||
"""
|
||||
errors = response.get("errors")
|
||||
if not errors or not isinstance(errors, list):
|
||||
return False
|
||||
for err in errors:
|
||||
if err.get("type") != "NOT_FOUND":
|
||||
continue
|
||||
message = err.get("message")
|
||||
if isinstance(message, str) and "No such base." in message:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
async def get_username() -> Optional[str]:
|
||||
"""Returns the username associated with the GitHub personal access token."""
|
||||
# First we try to parse the hosts.yml file directly, as that is faster,
|
||||
# but we fall back to GraphQL, as it is more reliable, if parsing hosts.yml
|
||||
# fails for some reason.
|
||||
from .graphql import try_parse_oauth_token_from_gh_config
|
||||
|
||||
username_and_token = try_parse_oauth_token_from_gh_config()
|
||||
if username_and_token:
|
||||
return username_and_token[0]
|
||||
else:
|
||||
result = await gh_submit.get_username()
|
||||
if result.is_error():
|
||||
return None
|
||||
else:
|
||||
return none_throws(result.ok)
|
||||
|
||||
|
||||
def run_git_command(args: List[str], gitdir: str):
|
||||
full_args = ["git", "--git-dir", gitdir] + args
|
||||
proc = subprocess.run(full_args, capture_output=True)
|
||||
|
Loading…
Reference in New Issue
Block a user