mirror of
https://github.com/facebook/sapling.git
synced 2024-10-12 09:48:05 +03:00
48aa19eddb
This patch adds some ugly constructs. The first of them is bytesformatter, a function that formats strings like when '%' is called. The main motivation for this function is py3k's strange behavior: >>> 'foo %s' % b'bar' "foo b'bar'" >>> b'foo %s' % b'bar' Traceback (most recent call last): File "<stdin>", line 1, in <module> TypeError: unsupported operand type(s) for %: 'bytes' and 'bytes' >>> b'foo %s' % 'bar' Traceback (most recent call last): File "<stdin>", line 1, in <module> TypeError: unsupported operand type(s) for %: 'bytes' and 'str' In other words, if we can't format bytes with bytes, and recall that all mercurial strings will be converted by a fixer, then things will break badly if we don't take a similar approach. The other addition with this patch is that the os.environ dictionary is monkeypatched to have bytes items. Hopefully this won't be needed in the future, as python 3.2 might get a os.environb dictionary that holds bytes items.
98 lines
2.9 KiB
Python
98 lines
2.9 KiB
Python
"""Fixer that changes plain strings to bytes strings."""
|
|
|
|
import re
|
|
|
|
from lib2to3 import fixer_base
|
|
from lib2to3.pgen2 import token
|
|
from lib2to3.fixer_util import Name
|
|
from lib2to3.pygram import python_symbols as syms
|
|
|
|
_re = re.compile(r'[rR]?[\'\"]')
|
|
|
|
# XXX: Implementing a blacklist in 2to3 turned out to be more troublesome than
|
|
# blacklisting some modules inside the fixers. So, this is what I came with.
|
|
|
|
blacklist = ['mercurial/demandimport.py',
|
|
'mercurial/py3kcompat.py', # valid python 3 already
|
|
'mercurial/i18n.py',
|
|
]
|
|
|
|
def isdocstring(node):
|
|
def isclassorfunction(ancestor):
|
|
symbols = (syms.funcdef, syms.classdef)
|
|
# if the current node is a child of a function definition, a class
|
|
# definition or a file, then it is a docstring
|
|
if ancestor.type == syms.simple_stmt:
|
|
try:
|
|
while True:
|
|
if ancestor.type in symbols:
|
|
return True
|
|
ancestor = ancestor.parent
|
|
except AttributeError:
|
|
return False
|
|
return False
|
|
|
|
def ismodule(ancestor):
|
|
# Our child is a docstring if we are a simple statement, and our
|
|
# ancestor is file_input. In other words, our child is a lone string in
|
|
# the source file.
|
|
try:
|
|
if (ancestor.type == syms.simple_stmt and
|
|
ancestor.parent.type == syms.file_input):
|
|
return True
|
|
except AttributeError:
|
|
return False
|
|
|
|
def isdocassignment(ancestor):
|
|
# Assigning to __doc__, definitely a string
|
|
try:
|
|
while True:
|
|
if (ancestor.type == syms.expr_stmt and
|
|
Name('__doc__') in ancestor.children):
|
|
return True
|
|
ancestor = ancestor.parent
|
|
except AttributeError:
|
|
return False
|
|
|
|
if ismodule(node.parent) or \
|
|
isdocassignment(node.parent) or \
|
|
isclassorfunction(node.parent):
|
|
return True
|
|
return False
|
|
|
|
def shouldtransform(node):
|
|
specialnames = ['__main__']
|
|
|
|
if node.value in specialnames:
|
|
return False
|
|
|
|
ggparent = node.parent.parent.parent
|
|
sggparent = str(ggparent)
|
|
|
|
if 'getattr' in sggparent or \
|
|
'hasattr' in sggparent or \
|
|
'setattr' in sggparent or \
|
|
'encode' in sggparent or \
|
|
'decode' in sggparent:
|
|
return False
|
|
|
|
return True
|
|
|
|
class FixBytes(fixer_base.BaseFix):
|
|
|
|
PATTERN = 'STRING'
|
|
|
|
def transform(self, node, results):
|
|
if self.filename in blacklist:
|
|
return
|
|
if node.type == token.STRING:
|
|
if _re.match(node.value):
|
|
if isdocstring(node):
|
|
return
|
|
if not shouldtransform(node):
|
|
return
|
|
new = node.clone()
|
|
new.value = 'b' + new.value
|
|
return new
|
|
|