join backup states for hg sl command

Summary:
Check a few backup files for `hg cloud sl` command information.

I added a comment explaining why we would need this.

At the moment some commands like for example `hg cloud switch` or the `hg pull -r` goes through mononoke backend and they will update a different backup cache file comparing with the `hg cloud sync` command that goes through the mercurial backend (write path).

As a result, `hg sl` command displays that some commits haven't been backed up.

Reviewed By: markbt

Differential Revision: D24046449

fbshipit-source-id: 56d924bf641833231cda6e6701e67f63c8453894
This commit is contained in:
Liubov Dmitrieva 2020-10-01 09:25:27 -07:00 committed by Facebook GitHub Bot
parent 5cdca923bb
commit d5442f96df
2 changed files with 60 additions and 12 deletions

View File

@ -203,7 +203,7 @@ def extsetup(ui):
localrepo.localrepository._wlockfreeprefix.add(obsmarkers._obsmarkerssyncing)
localrepo.localrepository._wlockfreeprefix.add(backuplock.progressfilename)
localrepo.localrepository._wlockfreeprefix.add(backupbookmarks._backupstateprefix)
localrepo.localrepository._wlockfreeprefix.add(backupstate.BackupState.prefix)
localrepo.localrepository._wlockfreeprefix.add(backupstate.BackupState.directory)
localrepo.localrepository._wlockfreeprefix.add(background._autobackupstatefile)
localrepo.localrepository._lockfreeprefix.add(syncstate.SyncState.prefix)
localrepo.localrepository._lockfreeprefix.add(sync._syncstatusfile)
@ -374,12 +374,12 @@ def missingcloudrevspull(repo, nodes):
def backedup(repo, subset, x):
"""draft changesets that have been backed up to Commit Cloud"""
unfi = repo
state = backupstate.BackupState(repo, ccutil.getremotepath(repo, None))
heads = backupstate.BackupState.readheadsfromallpaths(repo)
cl = repo.changelog
if cl.algorithmbackend == "segments":
backedup = repo.dageval(lambda: draft() & ancestors(state.heads))
backedup = repo.dageval(lambda: draft() & ancestors(heads))
return subset & cl.torevset(backedup)
backedup = unfi.revs("not public() and ::%ln", state.heads)
backedup = unfi.revs("not public() and ::%ln", heads)
return smartset.filteredset(subset & repo.revs("draft()"), lambda r: r in backedup)
@ -387,12 +387,12 @@ def backedup(repo, subset, x):
def notbackedup(repo, subset, x):
"""changesets that have not yet been backed up to Commit Cloud"""
unfi = repo
state = backupstate.BackupState(repo, ccutil.getremotepath(repo, None))
heads = backupstate.BackupState.readheadsfromallpaths(repo)
cl = repo.changelog
if cl.algorithmbackend == "segments":
notbackedup = repo.dageval(lambda: draft() - ancestors(state.heads))
notbackedup = repo.dageval(lambda: draft() - ancestors(heads))
return subset & cl.torevset(notbackedup)
backedup = unfi.revs("not public() and ::%ln", state.heads)
backedup = unfi.revs("not public() and ::%ln", heads)
return smartset.filteredset(
subset & repo.revs("not public() - hidden()"), lambda r: r not in backedup
)

View File

@ -7,27 +7,75 @@ from __future__ import absolute_import
import hashlib
import os
import time
from edenscm.mercurial import error, node as nodemod, util
from edenscm.mercurial.pycompat import encodeutf8
from . import dependencies
from . import dependencies, util as ccutil
FORMAT_VERSION = "v1"
class BackupState(object):
"""Stores what commits have been successfully backed up to the cloud."""
"""Stores what commits have been successfully backed up to the cloud.
prefix = "commitcloud/backedupheads."
BackupState is not the source of truth, it is a local cache of what has been backed up at the given path.
It is stored in a different file for a different path.
Due to migration to other backend, some commit cloud traffic uses 'infinitepush' (read), other uses 'infinitepushwrite' (write) path and
they would update different local cache files. Also, many commands could be run with '--dest' option to specify the path and
as a result could write the cache to a thrird state file. So, the one or another cache could be out-of-date in some cases or both.
Use `readheadsfromall` class method to get the union of what heads has been backed up at different paths.
After the migration, we should deprecate the current filename convention for the backup cache files.
They shouldn't depend on the remotepath and move the whole extension to always use infinitepushwrite path.
"""
prefix = "backedupheads."
directory = "commitcloud"
oldbackupcache = 604800 # 1 weeks
@classmethod
def readheadsfromallpaths(cls, repo):
repo.sharedvfs.makedirs(cls.directory)
files = [
filename
for filename in repo.sharedvfs.listdir(cls.directory)
if filename.startswith(cls.prefix)
]
heads = set()
hasnode = repo.changelog.hasnode
threshold = time.time() - cls.oldbackupcache
# union the recent cache files
for filename in files:
path = os.path.join(cls.directory, filename)
# skip old cache files
if os.path.getmtime(repo.sharedvfs.join(path)) < threshold:
continue
lines = repo.sharedvfs.readutf8(path).splitlines()
# skip files with unrecognised format
if len(lines) < 2 or lines[0].strip() != FORMAT_VERSION:
continue
heads = heads.union([nodemod.bin(head.strip()) for head in lines[2:]])
heads = {h for h in heads if hasnode(h)}
# return the cached backedup heads in found
if heads:
return heads
# if no cache found - rebuild it based on the infinitepushwrite remote path
return BackupState(
repo, ccutil.getremotewritepath(repo, None), resetlocalstate=True
).heads
def __init__(self, repo, remotepath, resetlocalstate=False):
self.repo = repo
self.remotepath = remotepath
repo.sharedvfs.makedirs("commitcloud")
repo.sharedvfs.makedirs(self.directory)
self.filename = os.path.join(
self.prefix + hashlib.sha256(encodeutf8(remotepath)).hexdigest()[0:8]
self.directory,
self.prefix + hashlib.sha256(encodeutf8(remotepath)).hexdigest()[0:8],
)
self.heads = set()
if repo.sharedvfs.exists(self.filename) and not resetlocalstate: