sapling/edenscm/hgext/perfsuite/editsgenerator.py
Jun Wu 9dc21f8d0b codemod: import from the edenscm package
Summary:
D13853115 adds `edenscm/` to `sys.path` and code still uses `import mercurial`.
That has nasty problems if both `import mercurial` and
`import edenscm.mercurial` are used, because Python would think `mercurial.foo`
and `edenscm.mercurial.foo` are different modules so code like
`try: ... except mercurial.error.Foo: ...`, or `isinstance(x, mercurial.foo.Bar)`
would fail to handle the `edenscm.mercurial` version. There are also some
module-level states (ex. `extensions._extensions`) that would cause trouble if
they have multiple versions in a single process.

Change imports to use the `edenscm` so ideally the `mercurial` is no longer
imported at all. Add checks in extensions.py to catch unexpected extensions
importing modules from the old (wrong) locations when running tests.

Reviewed By: phillco

Differential Revision: D13868981

fbshipit-source-id: f4e2513766957fd81d85407994f7521a08e4de48
2019-01-29 17:25:32 -08:00

111 lines
4.1 KiB
Python

# Copyright 2018 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import itertools
import random
from edenscm.mercurial import context
# TODO: make these configs
MAX_FILES_PER_COMMIT = 10000
DESIRED_FILES_PER_COMMIT = 6
MAX_EDITS_PER_FILE = 6
FILE_DELETION_CHANCE = 6 # percent
ADD_DELETE_RATIO = 3
DELETION_MAX_SIZE = 2000
BLACKLIST = [".hgdirsync", ".hgtags"]
class randomeditsgenerator(object):
def __init__(self, ctx):
"""``ctx`` is used to build a set of paths"""
self.ui = ctx.repo().ui
self.pathchars = "abcdefghijklmnopqrstuvwxyz"
self.dirs = self.makedirs()
self.fnames = self.makefilenames()
def makedirs(self):
dircount = self.ui.configint("repogenerator", "filenamedircount", 3)
# We generate all path combinations up-front and then shuffle them to
# try and distribute the paths. (`` getrandompath`` only selects from
# a prorated slice of this list based on generation progress, so if it
# were not randomized, initial edis would all be in a/a/*.)
#
# One downside to this approach: third-level directories are still too
# sparse initially.
paths = list(itertools.product(self.pathchars, repeat=dircount))
random.shuffle(paths)
return paths
def makefilenames(self):
leaflen = self.ui.configint("repogenerator", "filenameleaflength", 3)
# Unlike with dirs, there's no upside to randomizing, so keep
# alphabetical for simplicity.
return list(itertools.product(self.pathchars, repeat=leaflen))
def getrandompath(self, wctx):
# Limit the dictionary of directory names initially, and expand them
# over time (based on our goal) to mimic the organic growth of
# directories and projects.
maxdir = max(1, int(len(self.dirs) * self.getcompletionratio(wctx)))
dirparts = random.sample(self.dirs[0:maxdir], 1)[0]
# Same thing but for filenames.
maxfname = max(1, int(len(self.fnames) * self.getcompletionratio(wctx)))
fnparts = random.sample(self.fnames[0:maxfname], 1)[0]
return "/".join(dirparts) + "/" + "".join(fnparts)
def getcompletionratio(self, wctx):
tiprev = wctx.p1().rev() + 1 # rev is 0-based
goalrev = self.ui.configint("repogenerator", "numcommits", 10000)
return float(tiprev) / goalrev
def makerandomedits(self, wctx):
i = 0
while i < DESIRED_FILES_PER_COMMIT:
path = self.getrandompath(wctx)
existingdata = ""
if isinstance(wctx, context.workingctx):
path = path.encode("ascii", "ignore")
if wctx[path].exists():
if random.randrange(0, 100) <= FILE_DELETION_CHANCE:
wctx[path].remove()
else:
existingdata = wctx[path].data()
else:
existingdata = "new file\n"
if len(existingdata) > 0:
for _ in range(0, random.randrange(1, MAX_EDITS_PER_FILE)):
if len(existingdata) <= 1:
existingdata = "new file"
break
if random.randrange(0, 10) > ADD_DELETE_RATIO:
idx = random.randrange(0, len(existingdata))
existingdata = (
existingdata[:idx]
+ "/* random data: %s */\n" % random.randrange(0, 900000)
+ existingdata[idx:]
)
else:
length = random.randrange(
0, min(len(existingdata) - 1, DELETION_MAX_SIZE)
)
idx = random.randrange(0, len(existingdata) - length)
existingdata = (
existingdata[:idx] + existingdata[(idx + length) :]
)
wctx[path].write(existingdata, "")
i += 1