sapling/eden/scm/edenscm/mercurial/edenapi_upload.py

350 lines
11 KiB
Python
Raw Normal View History

# Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
from __future__ import absolute_import
import os
import bindings
from . import node as nodemod, error, mutation
from .i18n import _, _n
TOKEN_KEY = "token"
INDEX_KEY = "index"
def getreponame(repo):
"""get the configured reponame for this repo"""
reponame = repo.ui.config(
"remotefilelog",
"reponame",
os.path.basename(repo.ui.config("paths", "default")),
)
if not reponame:
raise error.Abort(repo.ui, _("unknown repo"))
return reponame
def _filtercommits(repo, nodes):
"""Returns list of missing commits"""
try:
with repo.ui.timesection("http.edenapi.upload_filter_commits"):
stream = repo.edenapi.commitknown(getreponame(repo), nodes)
return [
item["hgid"] for item in stream if item["known"].get("Ok") is not True
]
except (error.RustError, error.HttpError) as e:
raise error.Abort(e)
def _filteruploaded(repo, blobs, trees):
"""Returns list of missing blobs and trees"""
try:
with repo.ui.timesection("http.edenapi.upload_lookup"):
stream = repo.edenapi.lookup_filenodes_and_trees(
getreponame(repo),
[blob[1] for blob in blobs],
[tree[0] for tree in trees],
)
results = list(stream)
blobslen = len(blobs)
foundindicesblobs = {
item[INDEX_KEY]
for item in results
if item[TOKEN_KEY] and "HgFilenodeId" in item[TOKEN_KEY]["data"]["id"]
}
foundindicestrees = {
item[INDEX_KEY] - blobslen
for item in results
if item[TOKEN_KEY] and "HgTreeId" in item[TOKEN_KEY]["data"]["id"]
}
missingblobs = [
blob
for index, blob in enumerate(blobs)
if index not in foundindicesblobs
]
missingtrees = [
tree
for index, tree in enumerate(trees)
if index not in foundindicestrees
]
return missingblobs, missingtrees
except (error.RustError, error.HttpError) as e:
raise error.Abort(e)
Use bonsai changeset upload on client Summary: ## High level goal This stack aims to add a way to upload commits directly using the bonsai format via edenapi, instead of using the hg format and converting on server. The reason this is necessary is that snapshots will be uploaded on bonsai format directly, as hg format doesn't support them. So this is a stepping stone to do that, first being implemented on commit cloud upload, as that code already uses eden api, and later will be used by the snapshotting commands. ## This diff This diff actually ties everything together from the stack and makes it work end to end. By creating the following client side changes: - Add some config to use the bonsai format when uploading via EdenApi. The config is disabled by default. - Add wrapper around new uploadfileblobs method (from D29799484 (https://github.com/facebookexperimental/eden/commit/8586ae10771c79e42c26ac22ad43372e10ff077c)) - Getting the correct data to call the bonsai changeset upload endpoint created on D29849963 (https://github.com/facebookexperimental/eden/commit/b6548a10cbe3d0bbcc475e4283ea0fae6a64149e) - Some fields are String and not bytes - Some fields are renamed - File size and type can be acquired from file context. file content id, which is also required, is obtained as a response from the uploadfileblobs method: Behaviour added on D29879617 (https://github.com/facebookexperimental/eden/commit/9aae11a5abfe316e510becf0cd381bfa54b55a14) Reviewed By: liubov-dmitrieva Differential Revision: D29849964 fbshipit-source-id: a039159f927f49bbc45d4e0160ec1d3a01334eca
2021-07-28 12:14:31 +03:00
def _uploadfilenodes(repo, keys):
"""Upload file content and filenodes"""
if not keys:
return
dpack, _hpack = repo.fileslog.getmutablelocalpacks()
try:
with repo.ui.timesection("http.edenapi.upload_files"):
stream, _stats = repo.edenapi.uploadfiles(dpack, getreponame(repo), keys)
foundindices = {item[INDEX_KEY] for item in stream if item[TOKEN_KEY]}
repo.ui.status(
_n(
"uploaded %d file\n",
"uploaded %d files\n",
len(foundindices),
)
% len(foundindices),
component="edenapi",
)
return foundindices
Use bonsai changeset upload on client Summary: ## High level goal This stack aims to add a way to upload commits directly using the bonsai format via edenapi, instead of using the hg format and converting on server. The reason this is necessary is that snapshots will be uploaded on bonsai format directly, as hg format doesn't support them. So this is a stepping stone to do that, first being implemented on commit cloud upload, as that code already uses eden api, and later will be used by the snapshotting commands. ## This diff This diff actually ties everything together from the stack and makes it work end to end. By creating the following client side changes: - Add some config to use the bonsai format when uploading via EdenApi. The config is disabled by default. - Add wrapper around new uploadfileblobs method (from D29799484 (https://github.com/facebookexperimental/eden/commit/8586ae10771c79e42c26ac22ad43372e10ff077c)) - Getting the correct data to call the bonsai changeset upload endpoint created on D29849963 (https://github.com/facebookexperimental/eden/commit/b6548a10cbe3d0bbcc475e4283ea0fae6a64149e) - Some fields are String and not bytes - Some fields are renamed - File size and type can be acquired from file context. file content id, which is also required, is obtained as a response from the uploadfileblobs method: Behaviour added on D29879617 (https://github.com/facebookexperimental/eden/commit/9aae11a5abfe316e510becf0cd381bfa54b55a14) Reviewed By: liubov-dmitrieva Differential Revision: D29849964 fbshipit-source-id: a039159f927f49bbc45d4e0160ec1d3a01334eca
2021-07-28 12:14:31 +03:00
except (error.RustError, error.HttpError) as e:
raise error.Abort(e)
def _uploadtrees(repo, trees):
"""Upload trees"""
if not trees:
return
try:
with repo.ui.timesection("http.edenapi.upload_trees"):
stream, _stats = repo.edenapi.uploadtrees(getreponame(repo), trees)
foundindices = {item[INDEX_KEY] for item in stream if item[TOKEN_KEY]}
repo.ui.status(
_n(
"uploaded %d tree\n",
"uploaded %d trees\n",
len(foundindices),
)
% len(foundindices),
component="edenapi",
)
except (error.RustError, error.HttpError) as e:
raise error.Abort(e)
def _uploadchangesets(repo, changesets, mutations):
"""Upload changesets"""
uploaded, failed = [], []
if not changesets:
return uploaded, failed
try:
with repo.ui.timesection("http.edenapi.upload_changesets"):
stream, _stats = repo.edenapi.uploadchangesets(
getreponame(repo), changesets, mutations
)
foundindices = {item[INDEX_KEY] for item in stream if item[TOKEN_KEY]}
repo.ui.status(
_n(
"uploaded %d changeset\n",
"uploaded %d changesets\n",
len(foundindices),
)
% len(foundindices),
component="edenapi",
)
for index, cs in enumerate(changesets):
if index in foundindices:
uploaded.append(cs[0])
else:
failed.append(cs[0])
return uploaded, failed
except (error.RustError, error.HttpError) as e:
raise error.Abort(e)
def _getblobs(repo, nodes):
"""Get changed files"""
toupload = set()
for node in nodes.iterrev():
ctx = repo[node]
for f in ctx.files():
if f not in ctx:
continue
fctx = ctx[f]
p1, p2 = fctx.filelog().parents(fctx.filenode())
toupload.add((fctx.path(), fctx.filenode(), p1, p2))
return toupload
def _gettrees(repo, nodes):
"""Get changed trees"""
treedepth = 1 << 15
for node in nodes.iterrev():
parentnodes = repo.changelog.dag.parentnames(node)
mfnode = repo.changelog.changelogrevision(node).manifest
basemfnodes = [
repo.changelog.changelogrevision(p).manifest for p in parentnodes
]
difftrees = bindings.manifest.subdirdiff(
repo.manifestlog.datastore, "", mfnode, basemfnodes, treedepth
)
for subdir, treenode, treetext, _x, _x, _x in difftrees:
p1, p2, _link, _copy = repo.manifestlog.historystore.getnodeinfo(
subdir, treenode
)
yield treenode, p1, p2, treetext
def _torevs(repo, uploadednodes, failednodes):
"""Convert nodes back to revs"""
return set([repo[node].rev() for node in uploadednodes]), set(
[repo[node].rev() for node in failednodes]
)
def filetypefromfile(f):
Use bonsai changeset upload on client Summary: ## High level goal This stack aims to add a way to upload commits directly using the bonsai format via edenapi, instead of using the hg format and converting on server. The reason this is necessary is that snapshots will be uploaded on bonsai format directly, as hg format doesn't support them. So this is a stepping stone to do that, first being implemented on commit cloud upload, as that code already uses eden api, and later will be used by the snapshotting commands. ## This diff This diff actually ties everything together from the stack and makes it work end to end. By creating the following client side changes: - Add some config to use the bonsai format when uploading via EdenApi. The config is disabled by default. - Add wrapper around new uploadfileblobs method (from D29799484 (https://github.com/facebookexperimental/eden/commit/8586ae10771c79e42c26ac22ad43372e10ff077c)) - Getting the correct data to call the bonsai changeset upload endpoint created on D29849963 (https://github.com/facebookexperimental/eden/commit/b6548a10cbe3d0bbcc475e4283ea0fae6a64149e) - Some fields are String and not bytes - Some fields are renamed - File size and type can be acquired from file context. file content id, which is also required, is obtained as a response from the uploadfileblobs method: Behaviour added on D29879617 (https://github.com/facebookexperimental/eden/commit/9aae11a5abfe316e510becf0cd381bfa54b55a14) Reviewed By: liubov-dmitrieva Differential Revision: D29849964 fbshipit-source-id: a039159f927f49bbc45d4e0160ec1d3a01334eca
2021-07-28 12:14:31 +03:00
if f.isexec():
return "Executable"
elif f.islink():
return "Symlink"
else:
return "Regular"
def parentsfromctx(ctx):
p1 = ctx.p1().node()
p2 = ctx.p2().node()
if p1 != nodemod.nullid and p2 != nodemod.nullid:
return (p1, p2)
elif p1 != nodemod.nullid:
return p1
else:
return None
def uploadhgchangesets(repo, revs, force=False, skipknowncheck=False):
"""Upload list of revs via EdenApi Uploads protocol
EdenApi Uploads API consists of the following:
* Endpoint for lookup any type of data (file contents, hg filenodes, hg treemanifests, hg commits).
* Endpoint for upload file contents.
* Endpoint for upload hg filenodes.
* Endpoint for upload hg treemanifest.
* Endpoint for upload hg commits & mutation information.
The upload process is split into several stages:
* Check and skip commits that have been already uploaded building ``uploadcommitqueue``.
* Check and skip hg filenodes that have been already uploaded buiding ``uploadblobqueue``.
* Check and skip hg trees that have been already uploaded buiding ``uploadtreesqueue``.
* Calculate ContentIds hashes and upload all file contents for the ``uploadblobqueue``
but skipping already uploaded content ids first (this step also deduplicates content ids
if they are the same for some filenodes). See edenapi.uploadfiles.
* Upload hg filenodes (``uploadblobqueue``).
* Upload hg trees (``uploadtreesqueue``).
* Finally, upload hg changesets and hg mutation information (``uploadcommitqueue``).
If ``force`` is True (the default is False) the lookup check isn't performed prior to upload for commits, filenodes and trees.
It will be still performed for file contents.
If ``skipknowncheck`` is True (the default is False) the lookup check isn't performed to filter out already uploaded commits.
Assumed it is known already that they are missing on the server.
Returns newly uploaded revs and failed revs.
"""
nodes = [repo[r].node() for r in revs]
# Build a queue of commits to upload
uploadcommitqueue = (
nodes if (force or skipknowncheck) else _filtercommits(repo, nodes)
)
repo.ui.status(
_n(
"queue %d commit for upload\n",
"queue %d commits for upload\n",
len(uploadcommitqueue),
)
% len(uploadcommitqueue),
component="edenapi",
)
# Sort uploadcommitqueue in topological order (use iterrev() to iterate from parents to children)
uploadcommitqueue = repo.changelog.dag.sort(uploadcommitqueue)
# Build a queue of missing filenodes to upload
blobs = list(_getblobs(repo, uploadcommitqueue))
Use bonsai changeset upload on client Summary: ## High level goal This stack aims to add a way to upload commits directly using the bonsai format via edenapi, instead of using the hg format and converting on server. The reason this is necessary is that snapshots will be uploaded on bonsai format directly, as hg format doesn't support them. So this is a stepping stone to do that, first being implemented on commit cloud upload, as that code already uses eden api, and later will be used by the snapshotting commands. ## This diff This diff actually ties everything together from the stack and makes it work end to end. By creating the following client side changes: - Add some config to use the bonsai format when uploading via EdenApi. The config is disabled by default. - Add wrapper around new uploadfileblobs method (from D29799484 (https://github.com/facebookexperimental/eden/commit/8586ae10771c79e42c26ac22ad43372e10ff077c)) - Getting the correct data to call the bonsai changeset upload endpoint created on D29849963 (https://github.com/facebookexperimental/eden/commit/b6548a10cbe3d0bbcc475e4283ea0fae6a64149e) - Some fields are String and not bytes - Some fields are renamed - File size and type can be acquired from file context. file content id, which is also required, is obtained as a response from the uploadfileblobs method: Behaviour added on D29879617 (https://github.com/facebookexperimental/eden/commit/9aae11a5abfe316e510becf0cd381bfa54b55a14) Reviewed By: liubov-dmitrieva Differential Revision: D29849964 fbshipit-source-id: a039159f927f49bbc45d4e0160ec1d3a01334eca
2021-07-28 12:14:31 +03:00
# Build a queue of missing trees to upload
trees = list(_gettrees(repo, uploadcommitqueue))
uploadblobqueue, uploadtreesqueue = (
(blobs, trees) if force else _filteruploaded(repo, blobs, trees)
)
repo.ui.status(
_n(
"queue %d file for upload\n",
"queue %d files for upload\n",
len(uploadblobqueue),
)
% len(uploadblobqueue),
component="edenapi",
)
# Upload missing files and filenodes for the selected set of filenodes
Use bonsai changeset upload on client Summary: ## High level goal This stack aims to add a way to upload commits directly using the bonsai format via edenapi, instead of using the hg format and converting on server. The reason this is necessary is that snapshots will be uploaded on bonsai format directly, as hg format doesn't support them. So this is a stepping stone to do that, first being implemented on commit cloud upload, as that code already uses eden api, and later will be used by the snapshotting commands. ## This diff This diff actually ties everything together from the stack and makes it work end to end. By creating the following client side changes: - Add some config to use the bonsai format when uploading via EdenApi. The config is disabled by default. - Add wrapper around new uploadfileblobs method (from D29799484 (https://github.com/facebookexperimental/eden/commit/8586ae10771c79e42c26ac22ad43372e10ff077c)) - Getting the correct data to call the bonsai changeset upload endpoint created on D29849963 (https://github.com/facebookexperimental/eden/commit/b6548a10cbe3d0bbcc475e4283ea0fae6a64149e) - Some fields are String and not bytes - Some fields are renamed - File size and type can be acquired from file context. file content id, which is also required, is obtained as a response from the uploadfileblobs method: Behaviour added on D29879617 (https://github.com/facebookexperimental/eden/commit/9aae11a5abfe316e510becf0cd381bfa54b55a14) Reviewed By: liubov-dmitrieva Differential Revision: D29849964 fbshipit-source-id: a039159f927f49bbc45d4e0160ec1d3a01334eca
2021-07-28 12:14:31 +03:00
_uploadfilenodes(repo, uploadblobqueue)
repo.ui.status(
_n(
"queue %d tree for upload\n",
"queue %d trees for upload\n",
len(uploadtreesqueue),
)
% len(uploadtreesqueue),
component="edenapi",
)
# Upload missing trees
_uploadtrees(repo, uploadtreesqueue)
# Uploading changesets
changesets = []
for node in uploadcommitqueue.iterrev():
repo.ui.status(
_("uploading commit '%s'...\n") % nodemod.hex(node), component="edenapi"
)
ctx = repo[node]
extras = [
{"key": key.encode(), "value": value.encode()}
for key, value in ctx.extra().items()
if key != "branch"
]
(time, timezone) = ctx.date()
changesets.append(
(
node,
{
"parents": parentsfromctx(ctx),
"manifestid": ctx.manifestnode(),
"user": ctx.user().encode(),
"time": int(time),
"tz": timezone,
"extras": extras,
"files": ctx.files(),
"message": ctx.description().encode(),
},
)
)
mutations = mutation.entriesfornodes(repo, uploadcommitqueue)
mutations = [
{
"successor": mut.succ(),
"predecessors": mut.preds(),
"split": mut.split(),
"op": mut.op(),
"user": mut.user().encode(),
"time": mut.time(),
"tz": mut.tz(),
"extras": [{"key": key, "value": value} for key, value in mut.extra()],
}
for mut in mutations
]
return _torevs(repo, *_uploadchangesets(repo, changesets, mutations))