mirror of
https://github.com/facebook/sapling.git
synced 2024-10-06 06:47:41 +03:00
py3: fix revlog path encodings
Summary: This is needed to move our hg servers to python 3. Reviewed By: quark-zju Differential Revision: D24204056 fbshipit-source-id: cbaf97893f8f77b535952ac290766f0fd5e14f0c
This commit is contained in:
parent
2207e27ce0
commit
43fe23f09d
@ -150,6 +150,9 @@ if sys.version_info[0] >= 3:
|
||||
# type: (str) -> str
|
||||
return value
|
||||
|
||||
def inttobyte(value):
|
||||
return bytes([value])
|
||||
|
||||
def parse_email(fp):
|
||||
# Rarely used, so let's lazy load it
|
||||
import email.parser
|
||||
@ -253,6 +256,9 @@ else:
|
||||
# type: (str) -> str
|
||||
return value.decode("utf-8", "replace").encode("utf-8")
|
||||
|
||||
def inttobyte(value):
|
||||
return chr(value)
|
||||
|
||||
def parse_email(fp):
|
||||
import email.parser
|
||||
|
||||
|
@ -24,7 +24,7 @@ from edenscmnative import parsers
|
||||
from . import encoding, error, hintutil, pycompat, util, vfs as vfsmod
|
||||
from .i18n import _
|
||||
from .node import bin
|
||||
from .pycompat import decodeutf8, encodeutf8, range
|
||||
from .pycompat import decodeutf8, encodeutf8, inttobyte, range
|
||||
|
||||
|
||||
# This avoids a collision between a file named foo and a dir named
|
||||
@ -144,14 +144,14 @@ def _buildencodefun(forfncache):
|
||||
"""
|
||||
e = "_"
|
||||
xchr = pycompat.bytechr
|
||||
asciistr = list(map(xchr, range(127)))
|
||||
asciistr = list(map(inttobyte, range(127)))
|
||||
capitals = list(range(ord("A"), ord("Z") + 1))
|
||||
|
||||
cmap = dict((x, x) for x in asciistr)
|
||||
for x in _reserved():
|
||||
cmap[xchr(x)] = "~%02x" % x
|
||||
cmap[inttobyte(x)] = encodeutf8("~%02x" % x)
|
||||
for x in capitals + [ord(e)]:
|
||||
cmap[xchr(x)] = e + xchr(x).lower()
|
||||
cmap[inttobyte(x)] = encodeutf8(e + xchr(x).lower())
|
||||
|
||||
dmap = {}
|
||||
for k, v in cmap.items():
|
||||
@ -161,28 +161,33 @@ def _buildencodefun(forfncache):
|
||||
cmaplong = cmap.copy()
|
||||
|
||||
for i in capitals:
|
||||
c = chr(i)
|
||||
c = inttobyte(i)
|
||||
cmaplong[c] = c
|
||||
assert c not in dmap
|
||||
dmap[c] = c
|
||||
|
||||
cmapverylong = cmaplong.copy()
|
||||
cmapverylong["_"] = ":"
|
||||
assert ":" not in dmap
|
||||
dmap[":"] = "_"
|
||||
cmapverylong[b"_"] = b":"
|
||||
assert b":" not in dmap
|
||||
dmap[b":"] = b"_"
|
||||
|
||||
def encodecomp(comp):
|
||||
encoded = "".join(cmap[c] for c in comp)
|
||||
assert isinstance(comp, str), "encodecomp accepts str paths"
|
||||
comp = encodeutf8(comp)
|
||||
comp = [comp[i : i + 1] for i in range(len(comp))]
|
||||
encoded = b"".join(cmap[c] for c in comp)
|
||||
if len(encoded) > 255:
|
||||
encoded = "".join(cmaplong[c] for c in comp)
|
||||
encoded = b"".join(cmaplong[c] for c in comp)
|
||||
if len(encoded) > 255:
|
||||
encoded = "".join(cmapverylong[c] for c in comp)
|
||||
return encoded
|
||||
encoded = b"".join(cmapverylong[c] for c in comp)
|
||||
return decodeutf8(encoded)
|
||||
|
||||
def encodemaybelong(path):
|
||||
assert isinstance(path, str), "encodemaybelong accepts str paths"
|
||||
return "/".join(map(encodecomp, path.split("/")))
|
||||
|
||||
def decode(s):
|
||||
assert isinstance(s, bytes), "decode accepts bytes paths"
|
||||
i = 0
|
||||
while i < len(s):
|
||||
for l in range(1, 4):
|
||||
@ -196,12 +201,18 @@ def _buildencodefun(forfncache):
|
||||
raise KeyError
|
||||
|
||||
if forfncache:
|
||||
return (
|
||||
lambda s: "".join([cmap[s[c : c + 1]] for c in range(len(s))]),
|
||||
lambda s: "".join(list(decode(s))),
|
||||
)
|
||||
|
||||
def encode(s):
|
||||
assert isinstance(s, str), "encode accepts str paths"
|
||||
s = encodeutf8(s)
|
||||
return decodeutf8(b"".join([cmap[s[c : c + 1]] for c in range(len(s))]))
|
||||
|
||||
return (encode, lambda s: decodeutf8(b"".join(list(decode(s)))))
|
||||
else:
|
||||
return (encodemaybelong, lambda s: "".join(list(decode(s))))
|
||||
return (
|
||||
encodemaybelong,
|
||||
lambda s: decodeutf8(b"".join(list(decode(encodeutf8(s))))),
|
||||
)
|
||||
|
||||
|
||||
_encodefname, _decodefname = _buildencodefun(True)
|
||||
@ -239,14 +250,15 @@ def _buildlowerencodefun():
|
||||
'the~07quick~adshot'
|
||||
"""
|
||||
xchr = pycompat.bytechr
|
||||
cmap = dict([(xchr(x), xchr(x)) for x in range(127)])
|
||||
cmap = dict([(inttobyte(x), inttobyte(x)) for x in range(127)])
|
||||
for x in _reserved():
|
||||
cmap[xchr(x)] = "~%02x" % x
|
||||
cmap[inttobyte(x)] = encodeutf8("~%02x" % x)
|
||||
for x in range(ord("A"), ord("Z") + 1):
|
||||
cmap[xchr(x)] = xchr(x).lower()
|
||||
cmap[inttobyte(x)] = encodeutf8(xchr(x).lower())
|
||||
|
||||
def lowerencode(s):
|
||||
return "".join([cmap[c] for c in iter(s)])
|
||||
s = encodeutf8(s)
|
||||
return decodeutf8(b"".join([cmap[c] for c in iter(s)]))
|
||||
|
||||
return lowerencode
|
||||
|
||||
|
@ -1,28 +1,40 @@
|
||||
# encoding: utf-8
|
||||
from __future__ import absolute_import, print_function
|
||||
|
||||
import sys
|
||||
|
||||
from edenscm.mercurial import store
|
||||
from hghave import require
|
||||
|
||||
|
||||
require(["py2"])
|
||||
if sys.version_info[0] >= 3:
|
||||
|
||||
def escape(s):
|
||||
return str(s.encode("utf-8"))[2:-1]
|
||||
|
||||
|
||||
else:
|
||||
|
||||
def escape(s):
|
||||
return s.encode("string_escape")
|
||||
|
||||
|
||||
def show(s):
|
||||
# show test input
|
||||
print("A = '%s'" % s.encode("string_escape"))
|
||||
print("A = '%s'" % escape(s))
|
||||
|
||||
# show the result of the C implementation, if available
|
||||
h = store._pathencode(s)
|
||||
print("B = '%s'" % h.encode("string_escape"))
|
||||
print("B = '%s'" % escape(h))
|
||||
|
||||
# compare it with reference implementation in Python
|
||||
r = store._hybridencode(s, True)
|
||||
if h != r:
|
||||
print("R = '%s'" % r.encode("string_escape"))
|
||||
print("R = '%s'" % escape(r))
|
||||
print()
|
||||
|
||||
|
||||
show("data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&'()+,-.;=[]^`{}")
|
||||
show("data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&()+,-.;=[]^`{}")
|
||||
|
||||
print("uppercase char X is encoded as _x")
|
||||
show("data/ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
@ -30,6 +42,9 @@ show("data/ABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||
print("underbar is doubled")
|
||||
show("data/_")
|
||||
|
||||
print("unicode character")
|
||||
show("data/🐐")
|
||||
|
||||
print("tilde is character-encoded")
|
||||
show("data/~")
|
||||
|
||||
@ -39,25 +54,6 @@ show(
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
|
||||
)
|
||||
|
||||
print("characters in ASCII code range 126..255")
|
||||
show(
|
||||
"data/\x7e\x7f"
|
||||
"\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
|
||||
"\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
|
||||
)
|
||||
show(
|
||||
"data/\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
|
||||
"\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
|
||||
)
|
||||
show(
|
||||
"data/\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
|
||||
"\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
|
||||
)
|
||||
show(
|
||||
"data/\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef"
|
||||
"\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
|
||||
)
|
||||
|
||||
print("Windows reserved characters")
|
||||
show(
|
||||
'data/less <, greater >, colon :, double-quote ", backslash \\'
|
||||
@ -207,7 +203,7 @@ show(
|
||||
|
||||
print("not hitting limit with any of these")
|
||||
show(
|
||||
"data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&'()+,-.;="
|
||||
"data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&()+,-.;="
|
||||
"[]^`{}xxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-"
|
||||
"123456789-12345"
|
||||
)
|
||||
|
@ -1,5 +1,5 @@
|
||||
A = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}'
|
||||
B = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}'
|
||||
A = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&()+,-.;=[]^`{}'
|
||||
B = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&()+,-.;=[]^`{}'
|
||||
|
||||
uppercase char X is encoded as _x
|
||||
A = 'data/ABCDEFGHIJKLMNOPQRSTUVWXYZ'
|
||||
@ -9,6 +9,10 @@ underbar is doubled
|
||||
A = 'data/_'
|
||||
B = 'data/__'
|
||||
|
||||
unicode character
|
||||
A = 'data/\xf0\x9f\x90\x90'
|
||||
B = 'data/~f0~9f~90~90'
|
||||
|
||||
tilde is character-encoded
|
||||
A = 'data/~'
|
||||
B = 'data/~7e'
|
||||
@ -17,19 +21,6 @@ characters in ASCII code range 1..31
|
||||
A = 'data/\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f'
|
||||
B = 'data/~01~02~03~04~05~06~07~08~09~0a~0b~0c~0d~0e~0f~10~11~12~13~14~15~16~17~18~19~1a~1b~1c~1d~1e~1f'
|
||||
|
||||
characters in ASCII code range 126..255
|
||||
A = 'data/~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f'
|
||||
B = 'data/~7e~7f~80~81~82~83~84~85~86~87~88~89~8a~8b~8c~8d~8e~8f~90~91~92~93~94~95~96~97~98~99~9a~9b~9c~9d~9e~9f'
|
||||
|
||||
A = 'data/\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf'
|
||||
B = 'data/~a0~a1~a2~a3~a4~a5~a6~a7~a8~a9~aa~ab~ac~ad~ae~af~b0~b1~b2~b3~b4~b5~b6~b7~b8~b9~ba~bb~bc~bd~be~bf'
|
||||
|
||||
A = 'data/\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf'
|
||||
B = 'data/~c0~c1~c2~c3~c4~c5~c6~c7~c8~c9~ca~cb~cc~cd~ce~cf~d0~d1~d2~d3~d4~d5~d6~d7~d8~d9~da~db~dc~dd~de~df'
|
||||
|
||||
A = 'data/\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'
|
||||
B = 'data/~e0~e1~e2~e3~e4~e5~e6~e7~e8~e9~ea~eb~ec~ed~ee~ef~f0~f1~f2~f3~f4~f5~f6~f7~f8~f9~fa~fb~fc~fd~fe~ff'
|
||||
|
||||
Windows reserved characters
|
||||
A = 'data/less <, greater >, colon :, double-quote ", backslash \\, pipe |, question-mark ?, asterisk *'
|
||||
B = 'data/less ~3c, greater ~3e, colon ~3a, double-quote ~22, backslash ~5c, pipe ~7c, question-mark ~3f, asterisk ~2a'
|
||||
@ -229,8 +220,8 @@ A = 'data/z23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-
|
||||
B = 'data/z23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
|
||||
|
||||
not hitting limit with any of these
|
||||
A = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}xxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
|
||||
B = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&\'()+,-.;=[]^`{}xxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
|
||||
A = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&()+,-.;=[]^`{}xxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
|
||||
B = 'data/abcdefghijklmnopqrstuvwxyz0123456789 !#%&()+,-.;=[]^`{}xxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
|
||||
|
||||
underbar hitting length limit due to encoding
|
||||
A = 'data/_23456789-123456789-123456789-123456789-123456789-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12345'
|
||||
|
Loading…
Reference in New Issue
Block a user