2022-01-06 01:42:23 +03:00
|
|
|
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
2021-07-22 21:51:49 +03:00
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2.
|
|
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
|
|
|
|
import bindings
|
|
|
|
|
apply import merging for fbcode (4 of 11)
Summary:
Applies new import merging and sorting from µsort v1.0.
When merging imports, µsort will make a best-effort to move associated
comments to match merged elements, but there are known limitations due to
the diynamic nature of Python and developer tooling. These changes should
not produce any dangerous runtime changes, but may require touch-ups to
satisfy linters and other tooling.
Note that µsort uses case-insensitive, lexicographical sorting, which
results in a different ordering compared to isort. This provides a more
consistent sorting order, matching the case-insensitive order used when
sorting import statements by module name, and ensures that "frog", "FROG",
and "Frog" always sort next to each other.
For details on µsort's sorting and merging semantics, see the user guide:
https://usort.readthedocs.io/en/stable/guide.html#sorting
Reviewed By: lisroach
Differential Revision: D36402162
fbshipit-source-id: 6d180e9003d466c4f866fc9d454c6531766ca1dd
2022-05-15 22:53:03 +03:00
|
|
|
from . import error, mutation, node as nodemod
|
2021-07-22 21:51:49 +03:00
|
|
|
from .i18n import _, _n
|
|
|
|
|
|
|
|
TOKEN_KEY = "token"
|
|
|
|
INDEX_KEY = "index"
|
|
|
|
|
|
|
|
|
|
|
|
def _filtercommits(repo, nodes):
|
|
|
|
"""Returns list of missing commits"""
|
|
|
|
try:
|
2021-09-24 15:32:01 +03:00
|
|
|
with repo.ui.timesection("http.edenapi.upload_filter_commits"):
|
2021-12-15 13:08:46 +03:00
|
|
|
stream = repo.edenapi.commitknown(nodes)
|
2021-09-24 15:32:01 +03:00
|
|
|
return [
|
|
|
|
item["hgid"] for item in stream if item["known"].get("Ok") is not True
|
|
|
|
]
|
2021-07-22 21:51:49 +03:00
|
|
|
except (error.RustError, error.HttpError) as e:
|
|
|
|
raise error.Abort(e)
|
|
|
|
|
|
|
|
|
2021-10-19 19:33:19 +03:00
|
|
|
def _filteruploaded(repo, files, trees):
|
2021-09-24 17:47:38 +03:00
|
|
|
"""Returns list of missing blobs and trees"""
|
2021-07-22 21:51:49 +03:00
|
|
|
try:
|
2021-09-24 15:32:01 +03:00
|
|
|
with repo.ui.timesection("http.edenapi.upload_lookup"):
|
2021-10-13 01:03:55 +03:00
|
|
|
stream = repo.edenapi.lookup_filenodes_and_trees(
|
2021-10-19 19:33:19 +03:00
|
|
|
[fctx.filenode() for fctx in files],
|
2021-09-24 17:47:38 +03:00
|
|
|
[tree[0] for tree in trees],
|
2021-09-24 15:32:01 +03:00
|
|
|
)
|
2021-07-22 21:51:49 +03:00
|
|
|
|
2021-09-24 17:47:38 +03:00
|
|
|
results = list(stream)
|
2021-10-19 19:33:19 +03:00
|
|
|
blobslen = len(files)
|
2021-09-24 17:47:38 +03:00
|
|
|
|
|
|
|
foundindicesblobs = {
|
Lookup client only fix
Summary:
We recently had to disable edenapi uploads, because it was failing on big commits.
## The issue is
We batch some types of edenapi requests. E.g. if we're trying to check for the existence of 80k files in the blobstore, we split this in two requests, one with 50k ids, and the other with the remaining 30k.
The problem is the client currently depends on the request being done in a single go. For lookup, the client sends a list of ids, and expects responses of type `LookupResponde {index, Option<token>}`, with indexes of each element, and the token in case it was already present.
This does not work when the request is batched: the second request will return invalid indexes when compared with the full array.
This affects **all code** that uses lookup. I believe it is being used for [commit lookup](https://fb.workplace.com/groups/3112141815538647/posts/4178113148941503), but we probably don't reach the problem because it must be rare to lookup thousands of commits at the same time.
## Possible solutions
1. Merge responses smartly: we can modify second responde to add an offset to every index.
2. Don't depend on how the request is batched.
This diff starts the fix to go with solution (1).
## What this diff does
Changes the response from
```
LookupResponse { index, Option<token> }
```
to
```
enum LookupResult {
Present(token),
NotPresent(id)
}
LookupResponse { result: LookupResult }
```
That is, we now no longer return indexes, but instead a result enum, that either has the token if blob was uploaded, or the id it was not (so client can tell which blobs are for sure not uploaded, in the future we can maybe just not return those, but it happened currently, so we can't change that)
In practice, we still need to keep all the fields for now, for the transition period.
### Transition:
1. Client supports both old and new server responses. It notices which response it was and converts all of them to the new format. Server response is unaffected. **(this diff)**
2. On server, return both new and old response. Can be landed together with this diff, since we're returning both responses.
3. Delete code that handles old format. Should be landed once (2) is rolled out.
This could go quicker (backwards incompatible change) only if we do not use this in prod, which is not the case.
Reviewed By: StanislavGlebik
Differential Revision: D31935606
fbshipit-source-id: 724098258681630e6941234befd694e6f1ef9e16
2021-11-02 20:57:09 +03:00
|
|
|
idx for idx, token in results if "HgFilenodeId" in token["data"]["id"]
|
2021-09-24 17:47:38 +03:00
|
|
|
}
|
|
|
|
foundindicestrees = {
|
Lookup client only fix
Summary:
We recently had to disable edenapi uploads, because it was failing on big commits.
## The issue is
We batch some types of edenapi requests. E.g. if we're trying to check for the existence of 80k files in the blobstore, we split this in two requests, one with 50k ids, and the other with the remaining 30k.
The problem is the client currently depends on the request being done in a single go. For lookup, the client sends a list of ids, and expects responses of type `LookupResponde {index, Option<token>}`, with indexes of each element, and the token in case it was already present.
This does not work when the request is batched: the second request will return invalid indexes when compared with the full array.
This affects **all code** that uses lookup. I believe it is being used for [commit lookup](https://fb.workplace.com/groups/3112141815538647/posts/4178113148941503), but we probably don't reach the problem because it must be rare to lookup thousands of commits at the same time.
## Possible solutions
1. Merge responses smartly: we can modify second responde to add an offset to every index.
2. Don't depend on how the request is batched.
This diff starts the fix to go with solution (1).
## What this diff does
Changes the response from
```
LookupResponse { index, Option<token> }
```
to
```
enum LookupResult {
Present(token),
NotPresent(id)
}
LookupResponse { result: LookupResult }
```
That is, we now no longer return indexes, but instead a result enum, that either has the token if blob was uploaded, or the id it was not (so client can tell which blobs are for sure not uploaded, in the future we can maybe just not return those, but it happened currently, so we can't change that)
In practice, we still need to keep all the fields for now, for the transition period.
### Transition:
1. Client supports both old and new server responses. It notices which response it was and converts all of them to the new format. Server response is unaffected. **(this diff)**
2. On server, return both new and old response. Can be landed together with this diff, since we're returning both responses.
3. Delete code that handles old format. Should be landed once (2) is rolled out.
This could go quicker (backwards incompatible change) only if we do not use this in prod, which is not the case.
Reviewed By: StanislavGlebik
Differential Revision: D31935606
fbshipit-source-id: 724098258681630e6941234befd694e6f1ef9e16
2021-11-02 20:57:09 +03:00
|
|
|
idx - blobslen
|
|
|
|
for idx, token in results
|
|
|
|
if "HgTreeId" in token["data"]["id"]
|
2021-09-24 17:47:38 +03:00
|
|
|
}
|
|
|
|
|
2021-10-19 19:33:19 +03:00
|
|
|
missingfiles = [
|
|
|
|
fctx
|
|
|
|
for index, fctx in enumerate(files)
|
2021-09-24 17:47:38 +03:00
|
|
|
if index not in foundindicesblobs
|
2021-09-24 15:32:01 +03:00
|
|
|
]
|
2021-09-24 17:47:38 +03:00
|
|
|
missingtrees = [
|
|
|
|
tree
|
|
|
|
for index, tree in enumerate(trees)
|
|
|
|
if index not in foundindicestrees
|
|
|
|
]
|
|
|
|
|
2021-10-19 19:33:19 +03:00
|
|
|
return missingfiles, missingtrees
|
2021-07-22 21:51:49 +03:00
|
|
|
except (error.RustError, error.HttpError) as e:
|
|
|
|
raise error.Abort(e)
|
|
|
|
|
|
|
|
|
2021-10-19 19:33:19 +03:00
|
|
|
def _uploadfilenodes(repo, fctxs):
|
2021-07-22 21:51:49 +03:00
|
|
|
"""Upload file content and filenodes"""
|
2021-10-19 19:33:19 +03:00
|
|
|
if not fctxs:
|
2021-07-22 21:51:49 +03:00
|
|
|
return
|
2021-10-19 19:33:19 +03:00
|
|
|
keys = []
|
|
|
|
for fctx in fctxs:
|
|
|
|
p1, p2 = fctx.filelog().parents(fctx.filenode())
|
|
|
|
keys.append((fctx.path(), fctx.filenode(), p1, p2))
|
2021-07-22 21:51:49 +03:00
|
|
|
dpack, _hpack = repo.fileslog.getmutablelocalpacks()
|
|
|
|
try:
|
2021-09-24 15:32:01 +03:00
|
|
|
with repo.ui.timesection("http.edenapi.upload_files"):
|
2021-12-15 13:08:46 +03:00
|
|
|
stream, _stats = repo.edenapi.uploadfiles(dpack, keys)
|
2021-11-03 18:56:17 +03:00
|
|
|
items = list(stream)
|
2021-09-24 15:32:01 +03:00
|
|
|
repo.ui.status(
|
|
|
|
_n(
|
|
|
|
"uploaded %d file\n",
|
|
|
|
"uploaded %d files\n",
|
2021-11-03 18:56:17 +03:00
|
|
|
len(items),
|
2021-09-24 15:32:01 +03:00
|
|
|
)
|
2021-11-03 18:56:17 +03:00
|
|
|
% len(items),
|
2021-09-24 15:32:01 +03:00
|
|
|
component="edenapi",
|
2021-07-22 21:51:49 +03:00
|
|
|
)
|
2021-07-28 12:14:31 +03:00
|
|
|
|
2021-07-22 21:51:49 +03:00
|
|
|
except (error.RustError, error.HttpError) as e:
|
|
|
|
raise error.Abort(e)
|
|
|
|
|
|
|
|
|
2021-11-12 22:15:32 +03:00
|
|
|
def _uploadtrees(repo, trees):
|
2021-07-22 21:51:49 +03:00
|
|
|
"""Upload trees"""
|
2021-11-12 22:15:32 +03:00
|
|
|
if not trees:
|
2021-07-22 21:51:49 +03:00
|
|
|
return
|
2021-10-19 19:33:19 +03:00
|
|
|
|
2021-07-22 21:51:49 +03:00
|
|
|
try:
|
2021-09-24 15:32:01 +03:00
|
|
|
with repo.ui.timesection("http.edenapi.upload_trees"):
|
2021-12-15 13:08:46 +03:00
|
|
|
stream, _stats = repo.edenapi.uploadtrees(trees)
|
2021-11-02 21:50:29 +03:00
|
|
|
trees = list(stream)
|
2021-09-24 15:32:01 +03:00
|
|
|
repo.ui.status(
|
|
|
|
_n(
|
|
|
|
"uploaded %d tree\n",
|
|
|
|
"uploaded %d trees\n",
|
2021-11-02 21:50:29 +03:00
|
|
|
len(trees),
|
2021-09-24 15:32:01 +03:00
|
|
|
)
|
2021-11-02 21:50:29 +03:00
|
|
|
% len(trees),
|
2021-09-24 15:32:01 +03:00
|
|
|
component="edenapi",
|
2021-07-22 21:51:49 +03:00
|
|
|
)
|
|
|
|
except (error.RustError, error.HttpError) as e:
|
|
|
|
raise error.Abort(e)
|
|
|
|
|
|
|
|
|
2021-08-05 19:28:23 +03:00
|
|
|
def _uploadchangesets(repo, changesets, mutations):
|
2021-07-22 21:51:49 +03:00
|
|
|
"""Upload changesets"""
|
|
|
|
uploaded, failed = [], []
|
|
|
|
if not changesets:
|
|
|
|
return uploaded, failed
|
|
|
|
try:
|
2021-09-24 15:32:01 +03:00
|
|
|
with repo.ui.timesection("http.edenapi.upload_changesets"):
|
2021-12-15 13:08:46 +03:00
|
|
|
stream, _stats = repo.edenapi.uploadchangesets(changesets, mutations)
|
2021-11-03 18:56:17 +03:00
|
|
|
foundids = {item["data"]["id"]["HgChangesetId"] for item in stream}
|
2021-09-24 15:32:01 +03:00
|
|
|
repo.ui.status(
|
|
|
|
_n(
|
|
|
|
"uploaded %d changeset\n",
|
|
|
|
"uploaded %d changesets\n",
|
2021-11-03 18:56:17 +03:00
|
|
|
len(foundids),
|
2021-09-24 15:32:01 +03:00
|
|
|
)
|
2021-11-03 18:56:17 +03:00
|
|
|
% len(foundids),
|
2021-09-24 15:32:01 +03:00
|
|
|
component="edenapi",
|
|
|
|
)
|
2021-11-03 18:56:17 +03:00
|
|
|
for cs in changesets:
|
|
|
|
if cs[0] in foundids:
|
2021-09-24 15:32:01 +03:00
|
|
|
uploaded.append(cs[0])
|
|
|
|
else:
|
|
|
|
failed.append(cs[0])
|
|
|
|
return uploaded, failed
|
2021-07-22 21:51:49 +03:00
|
|
|
except (error.RustError, error.HttpError) as e:
|
|
|
|
raise error.Abort(e)
|
|
|
|
|
|
|
|
|
2021-10-19 19:33:19 +03:00
|
|
|
def _getfiles(repo, nodes):
|
2021-07-22 21:51:49 +03:00
|
|
|
"""Get changed files"""
|
|
|
|
toupload = set()
|
|
|
|
for node in nodes.iterrev():
|
|
|
|
ctx = repo[node]
|
|
|
|
for f in ctx.files():
|
|
|
|
if f not in ctx:
|
|
|
|
continue
|
|
|
|
fctx = ctx[f]
|
2021-10-19 19:33:19 +03:00
|
|
|
toupload.add(fctx)
|
2021-07-22 21:51:49 +03:00
|
|
|
return toupload
|
|
|
|
|
|
|
|
|
|
|
|
def _gettrees(repo, nodes):
|
|
|
|
"""Get changed trees"""
|
|
|
|
treedepth = 1 << 15
|
|
|
|
for node in nodes.iterrev():
|
|
|
|
parentnodes = repo.changelog.dag.parentnames(node)
|
|
|
|
mfnode = repo.changelog.changelogrevision(node).manifest
|
|
|
|
basemfnodes = [
|
|
|
|
repo.changelog.changelogrevision(p).manifest for p in parentnodes
|
|
|
|
]
|
|
|
|
difftrees = bindings.manifest.subdirdiff(
|
|
|
|
repo.manifestlog.datastore, "", mfnode, basemfnodes, treedepth
|
|
|
|
)
|
2021-11-12 22:15:32 +03:00
|
|
|
for subdir, treenode, treetext, p1, p2 in difftrees:
|
|
|
|
yield treenode, p1, p2, treetext
|
2021-07-22 21:51:49 +03:00
|
|
|
|
|
|
|
|
2021-07-29 22:06:48 +03:00
|
|
|
def _torevs(repo, uploadednodes, failednodes):
|
|
|
|
"""Convert nodes back to revs"""
|
|
|
|
return set([repo[node].rev() for node in uploadednodes]), set(
|
|
|
|
[repo[node].rev() for node in failednodes]
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2021-08-05 19:28:23 +03:00
|
|
|
def filetypefromfile(f):
|
2021-07-28 12:14:31 +03:00
|
|
|
if f.isexec():
|
|
|
|
return "Executable"
|
|
|
|
elif f.islink():
|
|
|
|
return "Symlink"
|
|
|
|
else:
|
|
|
|
return "Regular"
|
|
|
|
|
|
|
|
|
2021-08-05 19:28:23 +03:00
|
|
|
def parentsfromctx(ctx):
|
|
|
|
p1 = ctx.p1().node()
|
|
|
|
p2 = ctx.p2().node()
|
|
|
|
if p1 != nodemod.nullid and p2 != nodemod.nullid:
|
|
|
|
return (p1, p2)
|
|
|
|
elif p1 != nodemod.nullid:
|
|
|
|
return p1
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
2021-09-27 20:23:39 +03:00
|
|
|
def uploadhgchangesets(repo, revs, force=False, skipknowncheck=False):
|
2021-07-22 21:51:49 +03:00
|
|
|
"""Upload list of revs via EdenApi Uploads protocol
|
|
|
|
|
|
|
|
EdenApi Uploads API consists of the following:
|
|
|
|
|
|
|
|
* Endpoint for lookup any type of data (file contents, hg filenodes, hg treemanifests, hg commits).
|
|
|
|
* Endpoint for upload file contents.
|
|
|
|
* Endpoint for upload hg filenodes.
|
|
|
|
* Endpoint for upload hg treemanifest.
|
|
|
|
* Endpoint for upload hg commits & mutation information.
|
|
|
|
|
|
|
|
The upload process is split into several stages:
|
|
|
|
|
|
|
|
* Check and skip commits that have been already uploaded building ``uploadcommitqueue``.
|
|
|
|
* Check and skip hg filenodes that have been already uploaded buiding ``uploadblobqueue``.
|
2021-09-24 17:47:38 +03:00
|
|
|
* Check and skip hg trees that have been already uploaded buiding ``uploadtreesqueue``.
|
2021-07-22 21:51:49 +03:00
|
|
|
* Calculate ContentIds hashes and upload all file contents for the ``uploadblobqueue``
|
|
|
|
but skipping already uploaded content ids first (this step also deduplicates content ids
|
|
|
|
if they are the same for some filenodes). See edenapi.uploadfiles.
|
|
|
|
* Upload hg filenodes (``uploadblobqueue``).
|
|
|
|
* Upload hg trees (``uploadtreesqueue``).
|
|
|
|
* Finally, upload hg changesets and hg mutation information (``uploadcommitqueue``).
|
|
|
|
|
|
|
|
If ``force`` is True (the default is False) the lookup check isn't performed prior to upload for commits, filenodes and trees.
|
|
|
|
It will be still performed for file contents.
|
|
|
|
|
2021-09-27 20:23:39 +03:00
|
|
|
If ``skipknowncheck`` is True (the default is False) the lookup check isn't performed to filter out already uploaded commits.
|
|
|
|
Assumed it is known already that they are missing on the server.
|
|
|
|
|
2021-07-29 22:06:48 +03:00
|
|
|
Returns newly uploaded revs and failed revs.
|
2021-07-22 21:51:49 +03:00
|
|
|
"""
|
|
|
|
|
|
|
|
nodes = [repo[r].node() for r in revs]
|
|
|
|
|
|
|
|
# Build a queue of commits to upload
|
2021-09-27 20:23:39 +03:00
|
|
|
uploadcommitqueue = (
|
|
|
|
nodes if (force or skipknowncheck) else _filtercommits(repo, nodes)
|
|
|
|
)
|
2021-10-14 21:14:07 +03:00
|
|
|
|
|
|
|
if not uploadcommitqueue:
|
|
|
|
# No commits to upload
|
|
|
|
return set(), set()
|
|
|
|
|
2021-07-22 21:51:49 +03:00
|
|
|
repo.ui.status(
|
|
|
|
_n(
|
|
|
|
"queue %d commit for upload\n",
|
|
|
|
"queue %d commits for upload\n",
|
|
|
|
len(uploadcommitqueue),
|
|
|
|
)
|
|
|
|
% len(uploadcommitqueue),
|
|
|
|
component="edenapi",
|
|
|
|
)
|
|
|
|
|
|
|
|
# Sort uploadcommitqueue in topological order (use iterrev() to iterate from parents to children)
|
|
|
|
uploadcommitqueue = repo.changelog.dag.sort(uploadcommitqueue)
|
|
|
|
|
|
|
|
# Build a queue of missing filenodes to upload
|
2021-10-19 19:33:19 +03:00
|
|
|
files = list(_getfiles(repo, uploadcommitqueue))
|
2021-07-28 12:14:31 +03:00
|
|
|
|
2021-09-24 17:47:38 +03:00
|
|
|
# Build a queue of missing trees to upload
|
|
|
|
trees = list(_gettrees(repo, uploadcommitqueue))
|
|
|
|
|
|
|
|
uploadblobqueue, uploadtreesqueue = (
|
2021-10-19 19:33:19 +03:00
|
|
|
(files, trees) if force else _filteruploaded(repo, files, trees)
|
2021-09-24 17:47:38 +03:00
|
|
|
)
|
|
|
|
|
2021-07-22 21:51:49 +03:00
|
|
|
repo.ui.status(
|
|
|
|
_n(
|
|
|
|
"queue %d file for upload\n",
|
|
|
|
"queue %d files for upload\n",
|
|
|
|
len(uploadblobqueue),
|
|
|
|
)
|
|
|
|
% len(uploadblobqueue),
|
|
|
|
component="edenapi",
|
|
|
|
)
|
|
|
|
|
|
|
|
# Upload missing files and filenodes for the selected set of filenodes
|
2021-07-28 12:14:31 +03:00
|
|
|
_uploadfilenodes(repo, uploadblobqueue)
|
2021-07-22 21:51:49 +03:00
|
|
|
|
|
|
|
repo.ui.status(
|
|
|
|
_n(
|
|
|
|
"queue %d tree for upload\n",
|
|
|
|
"queue %d trees for upload\n",
|
|
|
|
len(uploadtreesqueue),
|
|
|
|
)
|
|
|
|
% len(uploadtreesqueue),
|
|
|
|
component="edenapi",
|
|
|
|
)
|
|
|
|
|
|
|
|
# Upload missing trees
|
|
|
|
_uploadtrees(repo, uploadtreesqueue)
|
|
|
|
|
|
|
|
# Uploading changesets
|
|
|
|
changesets = []
|
|
|
|
for node in uploadcommitqueue.iterrev():
|
|
|
|
repo.ui.status(
|
|
|
|
_("uploading commit '%s'...\n") % nodemod.hex(node), component="edenapi"
|
|
|
|
)
|
|
|
|
ctx = repo[node]
|
|
|
|
extras = [
|
|
|
|
{"key": key.encode(), "value": value.encode()}
|
|
|
|
for key, value in ctx.extra().items()
|
|
|
|
if key != "branch"
|
|
|
|
]
|
|
|
|
(time, timezone) = ctx.date()
|
|
|
|
changesets.append(
|
|
|
|
(
|
|
|
|
node,
|
|
|
|
{
|
2021-08-05 19:28:23 +03:00
|
|
|
"parents": parentsfromctx(ctx),
|
2021-07-22 21:51:49 +03:00
|
|
|
"manifestid": ctx.manifestnode(),
|
|
|
|
"user": ctx.user().encode(),
|
|
|
|
"time": int(time),
|
|
|
|
"tz": timezone,
|
|
|
|
"extras": extras,
|
|
|
|
"files": ctx.files(),
|
|
|
|
"message": ctx.description().encode(),
|
|
|
|
},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
mutations = mutation.entriesfornodes(repo, uploadcommitqueue)
|
|
|
|
mutations = [
|
|
|
|
{
|
|
|
|
"successor": mut.succ(),
|
|
|
|
"predecessors": mut.preds(),
|
|
|
|
"split": mut.split(),
|
|
|
|
"op": mut.op(),
|
|
|
|
"user": mut.user().encode(),
|
|
|
|
"time": mut.time(),
|
|
|
|
"tz": mut.tz(),
|
|
|
|
"extras": [{"key": key, "value": value} for key, value in mut.extra()],
|
|
|
|
}
|
|
|
|
for mut in mutations
|
|
|
|
]
|
|
|
|
|
2021-08-05 19:28:23 +03:00
|
|
|
return _torevs(repo, *_uploadchangesets(repo, changesets, mutations))
|