hggit: add a external-sync command that does the bare minimum

Summary:
For our HgExternalSync jobs that pull from git, we don't really use most of the
bells and whistles of hggit. Notably, we don't care about bookmarks: we only
ever pull master, we never update to it, we only ever look at `-r tip`.

However, we do care about things that are actually much harder to fit in a
world where we try to pretend the remote git repository is actually a hg
repository we can pull from.

Notably, we'd like to enforce limits on how many commits we pull (and convert)
at a time, so that if we fall behind a little bit, we don't start falling even
more behind by having to convert bigger and bigger batches of commits. If we're
trying to pretend fetching from git and converting commits is actually a pull,
then that seems harder to pull off (we'd need to somehow rewind the remote head
we're pulling before importing it).

So, this adds a new external-sync command to hggit that basically the bare
minimum that we do need. It lets you specify a git remote and a head you care
about, and import up to N commits from it. That's it — no bookmarks are updated
or anything (but the git-mapfile is, of course). The only thing that changes is
your commits.

If you actually want to interact with your git repository on an ongoing basis
as if it were a remote hg repository, this is completely useless, but that
isn't what we actually do, so that should be OK.

As part of this, I've modified a few other parts of git_handler to remove
places where we called a `uri` `remote_name` (which is a bit confusing), and a
place where we were asking for a `remote_name` parameter that I don't have
here, but which we also didn't actually need (in `import_git_objects`).

Reviewed By: farnz

Differential Revision: D20836601

fbshipit-source-id: 96230e6e8269d0472404414948fd2f02aa98d79c
This commit is contained in:
Thomas Orozco 2020-04-06 07:33:37 -07:00 committed by Facebook GitHub Bot
parent 5456adefe7
commit 0565c7e244
3 changed files with 101 additions and 8 deletions

View File

@ -255,6 +255,19 @@ def reposetup(ui, repo):
repo.__class__ = klass
@command("external-sync", [], _("REMOTE HEAD LIMIT"))
def externalsync(ui, repo, remote, head, limit):
limit = int(limit)
repo.ui.status(
_("importing up to %d commits from %s in %s\n") % (limit, remote, head)
)
with repo.wlock(), repo.lock():
refs = repo.githandler.fetch_pack(remote, [head])
refs = repo.githandler.filter_refs(refs, [head])
imported = repo.githandler.import_git_objects(refs, limit)
repo.ui.status(_("imported %s commits\n") % imported)
@command("gimport")
def gimport(ui, repo, remote_name=None):
"""import commits from Git to Mercurial"""

View File

@ -277,7 +277,7 @@ class GitHandler(object):
def import_commits(self, remote_name):
refs = self.git.refs.as_dict()
filteredrefs = self.filter_min_date(refs)
self.import_git_objects(remote_name, filteredrefs)
self.import_git_objects(filteredrefs)
self.update_hg_bookmarks(refs)
self.save_map(self.map_file)
@ -292,7 +292,7 @@ class GitHandler(object):
imported = 0
if refs:
filteredrefs = self.filter_min_date(self.filter_refs(refs, heads))
imported = self.import_git_objects(remote_name, filteredrefs)
imported = self.import_git_objects(filteredrefs)
self.update_hg_bookmarks(refs)
try:
@ -860,7 +860,7 @@ class GitHandler(object):
def get_git_incoming(self, refs):
return git2hg.find_incoming(self.git.object_store, self._map, refs)
def import_git_objects(self, remote_name, refs):
def import_git_objects(self, refs, limit=None):
result = self.get_git_incoming(refs)
commits = result.commits
commit_cache = result.commit_cache
@ -871,9 +871,15 @@ class GitHandler(object):
else:
self.ui.status(_("no changes found\n"))
importcount = total
if limit is not None:
importcount = min(total, limit)
mapsavefreq = compat.config(self.ui, "int", "hggit", "mapsavefrequency")
with progress.bar(self.ui, _("importing"), "commits", total=total) as prog:
icommits = enumerate(commits)
with progress.bar(
self.ui, _("importing"), "commits", total=importcount
) as prog:
icommits = enumerate(itertools.islice(commits, limit))
while True:
isubcommits = list(itertools.islice(icommits, mapsavefreq or 1))
if not isubcommits:
@ -891,7 +897,7 @@ class GitHandler(object):
self.save_map(self.map_file)
# TODO if the tags cache is used, remove any dangling tag references
return total
return importcount
def import_git_commit(self, commit):
self.ui.debug("importing: %s\n" % commit.id)
@ -1282,8 +1288,8 @@ class GitHandler(object):
return new_refs
def fetch_pack(self, remote_name, heads=None):
localclient, path = self.get_transport_and_path(remote_name)
def fetch_pack(self, uri, heads=None):
localclient, path = self.get_transport_and_path(uri)
# The dulwich default walk only checks refs/heads/. We also want to
# consider remotes when doing discovery, so we build our own list. We

View File

@ -0,0 +1,74 @@
#require py2
Load commonly used test logic
$ . "$TESTDIR/hggit/testutil"
$ setconfig hggit.mapsavefrequency=1
# Set up the git repo
$ cd "$TESTTMP"
$ git init gitrepo
Initialized empty Git repository in $TESTTMP/gitrepo/.git/
$ cd gitrepo
$ echo commit1 > commit1
$ git add .
$ fn_git_commit -m 'commit1'
Clone the repo
$ cd "$TESTTMP"
$ hg clone -q -r master gitrepo hgrepo
Add more commits
$ cd "$TESTTMP/gitrepo"
$ echo commit2 > commit2
$ git add .
$ fn_git_commit -m 'commit2'
$ echo commit3 > commit3
$ git add .
$ fn_git_commit -m 'commit3'
$ echo commit4 > commit4
$ git add .
$ fn_git_commit -m 'commit4'
Pull one of them
$ cd "$TESTTMP/hgrepo"
$ hg log -r tip -T '{desc}\n'
commit1
$ hg external-sync "$TESTTMP/gitrepo" master 1
importing up to 1 commits from $TESTTMP/gitrepo in master
importing git objects into hg
imported 1 commits
$ hg log -r tip -T '{desc}\n'
commit2
Pull the rest
$ hg external-sync "$TESTTMP/gitrepo" master 3
importing up to 3 commits from $TESTTMP/gitrepo in master
importing git objects into hg
imported 2 commits
$ hg log -r tip -T '{desc}\n'
commit4
$ hg up tip
3 files updated, 0 files merged, 0 files removed, 0 files unresolved
(leaving bookmark master)
$ ls
commit1
commit2
commit3
commit4
Nothing left ot pull
$ hg external-sync "$TESTTMP/gitrepo" master 100
importing up to 100 commits from $TESTTMP/gitrepo in master
no changes found
imported 0 commits