sapling/eden/scm/tests/test-pathencode.py
Simon Farnsworth 5232dcf9d1 py3: make .py tests continue to run on python 2
Summary:
Use sed to convert testutil.ddot requires to hghave.

ignore-conflict-markers

for test-simplemerge.py

Reviewed By: simpkins

Differential Revision: D19658355

fbshipit-source-id: afae73eb1e43ead79514dfaf9f911f51ac25972e
2020-01-31 00:18:22 -08:00

285 lines
6.6 KiB
Python

# This is a randomized test that generates different pathnames every
# time it is invoked, and tests the encoding of those pathnames.
#
# It uses a simple probabilistic model to generate valid pathnames
# that have proven likely to expose bugs and divergent behavior in
# different encoding implementations.
from __future__ import absolute_import, print_function
import binascii
import collections
import itertools
import math
import os
import random
import sys
import time
from edenscm.mercurial import pycompat, store
from hghave import require
require(["py2"])
try:
xrange
except NameError:
xrange = range
validchars = set(map(chr, range(0, 256)))
alphanum = range(ord("A"), ord("Z"))
for c in "\0/":
validchars.remove(c)
winreserved = (
"aux con prn nul".split()
+ ["com%d" % i for i in xrange(1, 10)]
+ ["lpt%d" % i for i in xrange(1, 10)]
)
def casecombinations(names):
"""Build all case-diddled combinations of names."""
combos = set()
for r in names:
for i in xrange(len(r) + 1):
for c in itertools.combinations(xrange(len(r)), i):
d = r
for j in c:
d = "".join((d[:j], d[j].upper(), d[j + 1 :]))
combos.add(d)
return sorted(combos)
def buildprobtable(fp, cmd="hg manifest tip"):
"""Construct and print a table of probabilities for path name
components. The numbers are percentages."""
counts = collections.defaultdict(lambda: 0)
for line in os.popen(cmd).read().splitlines():
if line[-2:] in (".i", ".d"):
line = line[:-2]
if line.startswith("data/"):
line = line[5:]
for c in line:
counts[c] += 1
for c in "\r/\n":
counts.pop(c, None)
t = sum(pycompat.itervalues(counts)) / 100.0
fp.write("probtable = (")
for i, (k, v) in enumerate(
sorted(counts.iteritems(), key=lambda x: x[1], reverse=True)
):
if (i % 5) == 0:
fp.write("\n ")
vt = v / t
if vt < 0.0005:
break
fp.write("(%r, %.03f), " % (k, vt))
fp.write("\n )\n")
# A table of character frequencies (as percentages), gleaned by
# looking at filelog names from a real-world, very large repo.
probtable = (
("t", 9.828),
("e", 9.042),
("s", 8.011),
("a", 6.801),
("i", 6.618),
("g", 5.053),
("r", 5.030),
("o", 4.887),
("p", 4.363),
("n", 4.258),
("l", 3.830),
("h", 3.693),
("_", 3.659),
(".", 3.377),
("m", 3.194),
("u", 2.364),
("d", 2.296),
("c", 2.163),
("b", 1.739),
("f", 1.625),
("6", 0.666),
("j", 0.610),
("y", 0.554),
("x", 0.487),
("w", 0.477),
("k", 0.476),
("v", 0.473),
("3", 0.336),
("1", 0.335),
("2", 0.326),
("4", 0.310),
("5", 0.305),
("9", 0.302),
("8", 0.300),
("7", 0.299),
("q", 0.298),
("0", 0.250),
("z", 0.223),
("-", 0.118),
("C", 0.095),
("T", 0.087),
("F", 0.085),
("B", 0.077),
("S", 0.076),
("P", 0.076),
("L", 0.059),
("A", 0.058),
("N", 0.051),
("D", 0.049),
("M", 0.046),
("E", 0.039),
("I", 0.035),
("R", 0.035),
("G", 0.028),
("U", 0.026),
("W", 0.025),
("O", 0.017),
("V", 0.015),
("H", 0.013),
("Q", 0.011),
("J", 0.007),
("K", 0.005),
("+", 0.004),
("X", 0.003),
("Y", 0.001),
)
for c, _ in probtable:
validchars.remove(c)
validchars = list(validchars)
def pickfrom(rng, table):
c = 0
r = rng.random() * sum(i[1] for i in table)
for i, p in table:
c += p
if c >= r:
return i
reservedcombos = casecombinations(winreserved)
# The first component of a name following a slash.
firsttable = (
(lambda rng: pickfrom(rng, probtable), 90),
(lambda rng: rng.choice(validchars), 5),
(lambda rng: rng.choice(reservedcombos), 5),
)
# Components of a name following the first.
resttable = firsttable[:-1]
# Special suffixes.
internalsuffixcombos = casecombinations(".hg .i .d".split())
# The last component of a path, before a slash or at the end of a name.
lasttable = resttable + (
(lambda rng: "", 95),
(lambda rng: rng.choice(internalsuffixcombos), 5),
)
def makepart(rng, k):
"""Construct a part of a pathname, without slashes."""
p = pickfrom(rng, firsttable)(rng)
l = len(p)
ps = [p]
maxl = rng.randint(1, k)
while l < maxl:
p = pickfrom(rng, resttable)(rng)
l += len(p)
ps.append(p)
ps.append(pickfrom(rng, lasttable)(rng))
return "".join(ps)
def makepath(rng, j, k):
"""Construct a complete pathname."""
return (
"data/"
+ "/".join(makepart(rng, k) for _ in xrange(j))
+ rng.choice([".d", ".i"])
)
def genpath(rng, count):
"""Generate random pathnames with gradually increasing lengths."""
mink, maxk = 1, 4096
def steps():
for i in xrange(count):
yield mink + int(round(math.sqrt((maxk - mink) * float(i) / count)))
for k in steps():
x = rng.randint(1, k)
y = rng.randint(1, k)
yield makepath(rng, x, y)
def runtests(rng, seed, count):
nerrs = 0
for p in genpath(rng, count):
h = store._pathencode(p) # uses C implementation, if available
r = store._hybridencode(p, True) # reference implementation in Python
if h != r:
if nerrs == 0:
print("seed:", hex(seed)[:-1], file=sys.stderr)
print("\np: '%s'" % p.encode("string_escape"), file=sys.stderr)
print("h: '%s'" % h.encode("string_escape"), file=sys.stderr)
print("r: '%s'" % r.encode("string_escape"), file=sys.stderr)
nerrs += 1
return nerrs
def main():
import getopt
# Empirically observed to take about a second to run
count = 100
seed = None
opts, args = getopt.getopt(sys.argv[1:], "c:s:", ["build", "count=", "seed="])
for o, a in opts:
if o in ("-c", "--count"):
count = int(a)
elif o in ("-s", "--seed"):
seed = int(a, base=0) # accepts base 10 or 16 strings
elif o == "--build":
buildprobtable(
sys.stdout,
"find .hg/store/data -type f && " "cat .hg/store/fncache 2>/dev/null",
)
sys.exit(0)
if seed is None:
try:
seed = int(binascii.hexlify(os.urandom(16)), 16)
except AttributeError:
seed = int(time.time() * 1000)
rng = random.Random(seed)
if runtests(rng, seed, count):
sys.exit(1)
if __name__ == "__main__":
main()