2019-11-01 22:33:28 +03:00
|
|
|
# Portions Copyright (c) Facebook, Inc. and its affiliates.
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2.
|
|
|
|
|
2009-04-26 03:13:08 +04:00
|
|
|
# encoding.py - character transcoding support for Mercurial
|
|
|
|
#
|
|
|
|
# Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
2010-01-20 07:20:08 +03:00
|
|
|
# GNU General Public License version 2 or any later version.
|
2009-04-03 23:51:48 +04:00
|
|
|
|
2017-09-03 08:56:31 +03:00
|
|
|
from __future__ import absolute_import, print_function
|
2015-12-13 06:57:48 +03:00
|
|
|
|
py3: introduce a wrapper for __builtins__.{raw_,}input()
In order to make this work, we have to wrap the io streams in a
TextIOWrapper so that __builtins__.input() can do unicode IO on Python
3. We can't just restore the original (unicode) sys.std* because we
might be running a cmdserver, and if we blindly restore sys.* to the
original values then we end up breaking the cmdserver. Sadly,
TextIOWrapper tries to close the underlying stream during its __del__,
so we have to make a sublcass to prevent that.
If you see errors like:
TypeError: a bytes-like object is required, not 'str'
On an input() or print() call on Python 3, the substitution of
sys.std* is probably the root cause.
A previous version of this change tried to put the bytesinput() method
in pycompat - it turns out we need to do some encoding handling, so we
have to be in a higher layer that's allowed to use
mercurial.encoding.encoding. As a result, this is in util for now,
with the TextIOWrapper subclass hiding in encoding.py. I'm not sure of
a better place for the time being.
Differential Revision: https://phab.mercurial-scm.org/D299
2017-07-24 21:38:40 +03:00
|
|
|
import io
|
2015-12-13 06:57:48 +03:00
|
|
|
import locale
|
|
|
|
import os
|
2019-12-05 04:02:25 +03:00
|
|
|
import sys
|
2015-12-13 06:57:48 +03:00
|
|
|
import unicodedata
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
from . import error, policy, pycompat
|
|
|
|
from .pure import charencode as charencodepure
|
2019-09-27 01:28:09 +03:00
|
|
|
from .pycompat import range
|
2009-04-03 23:51:48 +04:00
|
|
|
|
2017-04-23 10:10:51 +03:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
charencode = policy.importmod(r"charencode")
|
2017-07-31 17:13:47 +03:00
|
|
|
|
2017-04-23 06:59:42 +03:00
|
|
|
isasciistr = charencode.isasciistr
|
2017-07-31 17:13:47 +03:00
|
|
|
asciilower = charencode.asciilower
|
|
|
|
asciiupper = charencode.asciiupper
|
2017-04-23 08:47:52 +03:00
|
|
|
_jsonescapeu8fast = charencode.jsonescapeu8fast
|
2017-07-31 17:13:47 +03:00
|
|
|
|
2016-09-28 14:39:06 +03:00
|
|
|
_sysstr = pycompat.sysstr
|
|
|
|
|
2019-12-05 04:02:25 +03:00
|
|
|
if sys.version_info[0] >= 3:
|
2016-03-12 08:23:34 +03:00
|
|
|
unichr = chr
|
|
|
|
|
2014-12-16 21:06:41 +03:00
|
|
|
# These unicode characters are ignored by HFS+ (Apple Technote 1150,
|
|
|
|
# "Unicode Subtleties"), so we need to ignore them in some places for
|
|
|
|
# sanity.
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
_ignore = [
|
|
|
|
unichr(int(x, 16)).encode("utf-8")
|
2020-01-28 21:21:58 +03:00
|
|
|
for x in (
|
|
|
|
"200c 200d 200e 200f 202a 202b 202c 202d 202e "
|
|
|
|
"206a 206b 206c 206d 206e 206f feff"
|
|
|
|
).split()
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
]
|
2014-12-16 21:06:41 +03:00
|
|
|
# verify the next function will work
|
2019-12-05 04:02:25 +03:00
|
|
|
assert all(i.startswith((b"\xe2", b"\xef")) for i in _ignore)
|
2014-12-16 21:06:41 +03:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2014-12-16 21:06:41 +03:00
|
|
|
def hfsignoreclean(s):
|
|
|
|
"""Remove codepoints ignored by HFS+ from s.
|
|
|
|
|
|
|
|
>>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
|
|
|
|
'.hg'
|
|
|
|
>>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
|
|
|
|
'.hg'
|
|
|
|
"""
|
|
|
|
if "\xe2" in s or "\xef" in s:
|
|
|
|
for c in _ignore:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
s = s.replace(c, "")
|
2014-12-16 21:06:41 +03:00
|
|
|
return s
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2019-07-25 00:15:16 +03:00
|
|
|
def setfromenviron():
|
|
|
|
"""Reset encoding states from environment variables"""
|
|
|
|
global encoding, encodingmode, environ, _wide
|
2020-01-28 21:21:58 +03:00
|
|
|
environ = os.environ # re-exports
|
2019-07-25 00:15:16 +03:00
|
|
|
try:
|
2020-01-28 08:21:28 +03:00
|
|
|
encoding = os.environ.get("HGENCODING")
|
2019-07-25 00:15:16 +03:00
|
|
|
if not encoding:
|
2020-01-28 21:21:58 +03:00
|
|
|
encoding = locale.getpreferredencoding() or "ascii"
|
2019-07-25 00:15:16 +03:00
|
|
|
encoding = _encodingfixers.get(encoding, lambda: encoding)()
|
|
|
|
except locale.Error:
|
|
|
|
encoding = "ascii"
|
2020-01-28 08:21:28 +03:00
|
|
|
encodingmode = os.environ.get("HGENCODINGMODE", "strict")
|
2019-07-25 00:15:16 +03:00
|
|
|
|
encoding: replace 'ascii' with 'utf-8' automatically
Summary:
`ascii` was used as the default / fallback, which is not a user-friendly choice.
Nowadays utf-8 dominates:
- Rust stdlib is utf-8.
- Ruby since 1.9 is utf-8 by default.
- Python 3 is unicode by default.
- Windows 10 adds utf-8 code page.
Given the fact that:
- Our CI sets HGENCODING to utf-8
- Nuclide passes `--encoding=utf-8` to every command.
- Some people have messed up with `LC_*` and complained about hg crashes.
- utf-8 is a super set of ascii, nobody complains that they want `ascii`
encoding and the `utf-8` encoding messed their setup up.
Let's just use `utf-8` as the default encoding. More aggressively, if someone
sets `ascii` as the encoding, it's almost always a mistake. Auto-correct that
to `utf-8` too.
This should also make future integration with Rust easier (where it's enforced
utf-8 and does not have an option to change the encoding). In the future we
might just drop the flexibility of choosing customized encoding, so this diff
autofixes `ascii` to `utf-8`, instead of allowing `ascii` to be set. We cannot
enforce `utf-8` yet, because of Windows.
Here is our encoding strategy vs the upstream's:
| item | upstream | | ours | ours |
| | current | ideal | current | ideal |
| CLI argv | bytes | bytes | utf-8 [1] | utf-8 |
| path | bytes | auto [3] | migrating [2] | utf-8 |
| commit message | utf-8 | utf-8 | utf-8 | utf-8 |
| bookmark name | utf-8 | utf-8 | utf-8 | utf-8 |
| file content | bytes | bytes | bytes | bytes |
[1]: Argv was accidentally enforced utf-8 for command-line arguments by a Rust
wrapper. But it simplified a lot of things and is kind of ok: everything that
can be passed as CLI arguments are utf-8: -M commit message, -b bookmark, paths,
etc. There is no "file content" passed via CLI arguments.
[2]: Path is controversial, because it's possible for systems to have non-utf8
paths. The upstream behavior is incorrect if a repo gets shared among different
encoding systems (ex. both Linux and Windows). We have to know the encoding of
paths to be able to convert them suitable for the local system. One way is to
enforce UTF-8 for paths. The other is to keep encoding information stored with
individual paths (like Ruby strings). The UTF-8 approach is much simpler with
the tradeoff that non-utf-8 paths become unsupported, which seems to be a
reasonable trade-off.
[3]: See https://www.mercurial-scm.org/wiki/WindowsUTF8Plan.
Reviewed By: singhsrb
Differential Revision: D17098991
fbshipit-source-id: c0ff1e586a887233bd43cdb854fb3538aa9b70c2
2019-09-13 01:05:08 +03:00
|
|
|
if encoding == "ascii":
|
|
|
|
encoding = "utf-8"
|
|
|
|
|
2019-07-25 00:15:16 +03:00
|
|
|
# How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
|
|
|
|
_wide = _sysstr(
|
2020-01-28 08:21:28 +03:00
|
|
|
os.environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" and "WFA" or "WF"
|
2019-07-25 00:15:16 +03:00
|
|
|
)
|
|
|
|
|
2016-09-28 14:05:34 +03:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
_encodingfixers = {"646": lambda: "ascii", "ANSI_X3.4-1968": lambda: "ascii"}
|
2009-04-03 23:51:48 +04:00
|
|
|
|
2019-02-11 20:12:57 +03:00
|
|
|
# cp65001 is a Windows variant of utf-8, which isn't supported on Python 2.
|
|
|
|
# No idea if it should be rewritten to the canonical name 'utf-8' on Python 3.
|
|
|
|
# https://bugs.python.org/issue13216
|
2019-12-05 04:02:25 +03:00
|
|
|
if pycompat.iswindows and sys.version_info[0] < 3:
|
2019-02-11 20:12:57 +03:00
|
|
|
_encodingfixers["cp65001"] = lambda: "utf-8"
|
|
|
|
|
2019-07-25 00:15:16 +03:00
|
|
|
environ = encoding = encodingmode = _wide = None
|
|
|
|
setfromenviron()
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
fallbackencoding = "ISO-8859-1"
|
|
|
|
|
2009-04-03 23:51:48 +04:00
|
|
|
|
2017-08-14 09:50:40 +03:00
|
|
|
class localstr(bytes):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""This class allows strings that are unmodified to be
|
|
|
|
round-tripped to the local encoding and back"""
|
|
|
|
|
2010-11-25 00:38:52 +03:00
|
|
|
def __new__(cls, u, l):
|
2017-08-14 09:50:40 +03:00
|
|
|
s = bytes.__new__(cls, l)
|
2010-11-25 00:38:52 +03:00
|
|
|
s._utf8 = u
|
|
|
|
return s
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2010-11-25 00:38:52 +03:00
|
|
|
def __hash__(self):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return hash(self._utf8) # avoid collisions in local string space
|
|
|
|
|
2010-11-25 00:38:52 +03:00
|
|
|
|
encoding: replace 'ascii' with 'utf-8' automatically
Summary:
`ascii` was used as the default / fallback, which is not a user-friendly choice.
Nowadays utf-8 dominates:
- Rust stdlib is utf-8.
- Ruby since 1.9 is utf-8 by default.
- Python 3 is unicode by default.
- Windows 10 adds utf-8 code page.
Given the fact that:
- Our CI sets HGENCODING to utf-8
- Nuclide passes `--encoding=utf-8` to every command.
- Some people have messed up with `LC_*` and complained about hg crashes.
- utf-8 is a super set of ascii, nobody complains that they want `ascii`
encoding and the `utf-8` encoding messed their setup up.
Let's just use `utf-8` as the default encoding. More aggressively, if someone
sets `ascii` as the encoding, it's almost always a mistake. Auto-correct that
to `utf-8` too.
This should also make future integration with Rust easier (where it's enforced
utf-8 and does not have an option to change the encoding). In the future we
might just drop the flexibility of choosing customized encoding, so this diff
autofixes `ascii` to `utf-8`, instead of allowing `ascii` to be set. We cannot
enforce `utf-8` yet, because of Windows.
Here is our encoding strategy vs the upstream's:
| item | upstream | | ours | ours |
| | current | ideal | current | ideal |
| CLI argv | bytes | bytes | utf-8 [1] | utf-8 |
| path | bytes | auto [3] | migrating [2] | utf-8 |
| commit message | utf-8 | utf-8 | utf-8 | utf-8 |
| bookmark name | utf-8 | utf-8 | utf-8 | utf-8 |
| file content | bytes | bytes | bytes | bytes |
[1]: Argv was accidentally enforced utf-8 for command-line arguments by a Rust
wrapper. But it simplified a lot of things and is kind of ok: everything that
can be passed as CLI arguments are utf-8: -M commit message, -b bookmark, paths,
etc. There is no "file content" passed via CLI arguments.
[2]: Path is controversial, because it's possible for systems to have non-utf8
paths. The upstream behavior is incorrect if a repo gets shared among different
encoding systems (ex. both Linux and Windows). We have to know the encoding of
paths to be able to convert them suitable for the local system. One way is to
enforce UTF-8 for paths. The other is to keep encoding information stored with
individual paths (like Ruby strings). The UTF-8 approach is much simpler with
the tradeoff that non-utf-8 paths become unsupported, which seems to be a
reasonable trade-off.
[3]: See https://www.mercurial-scm.org/wiki/WindowsUTF8Plan.
Reviewed By: singhsrb
Differential Revision: D17098991
fbshipit-source-id: c0ff1e586a887233bd43cdb854fb3538aa9b70c2
2019-09-13 01:05:08 +03:00
|
|
|
def _setascii():
|
|
|
|
"""Set encoding to ascii. Used by some doctests."""
|
|
|
|
global encoding
|
|
|
|
encoding = "ascii"
|
|
|
|
|
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
def _tolocal(s):
|
2009-04-03 23:51:48 +04:00
|
|
|
"""
|
|
|
|
Convert a string from internal UTF-8 to local encoding
|
|
|
|
|
|
|
|
All internal strings should be UTF-8 but some repos before the
|
|
|
|
implementation of locale support may contain latin1 or possibly
|
|
|
|
other character sets. We attempt to decode everything strictly
|
|
|
|
using UTF-8, then Latin-1, and failing that, we use UTF-8 and
|
|
|
|
replace unknown characters.
|
2010-11-25 00:38:52 +03:00
|
|
|
|
|
|
|
The localstr class is used to cache the known UTF-8 encoding of
|
|
|
|
strings next to their local representation to allow lossless
|
|
|
|
round-trip conversion back to UTF-8.
|
|
|
|
|
encoding: replace 'ascii' with 'utf-8' automatically
Summary:
`ascii` was used as the default / fallback, which is not a user-friendly choice.
Nowadays utf-8 dominates:
- Rust stdlib is utf-8.
- Ruby since 1.9 is utf-8 by default.
- Python 3 is unicode by default.
- Windows 10 adds utf-8 code page.
Given the fact that:
- Our CI sets HGENCODING to utf-8
- Nuclide passes `--encoding=utf-8` to every command.
- Some people have messed up with `LC_*` and complained about hg crashes.
- utf-8 is a super set of ascii, nobody complains that they want `ascii`
encoding and the `utf-8` encoding messed their setup up.
Let's just use `utf-8` as the default encoding. More aggressively, if someone
sets `ascii` as the encoding, it's almost always a mistake. Auto-correct that
to `utf-8` too.
This should also make future integration with Rust easier (where it's enforced
utf-8 and does not have an option to change the encoding). In the future we
might just drop the flexibility of choosing customized encoding, so this diff
autofixes `ascii` to `utf-8`, instead of allowing `ascii` to be set. We cannot
enforce `utf-8` yet, because of Windows.
Here is our encoding strategy vs the upstream's:
| item | upstream | | ours | ours |
| | current | ideal | current | ideal |
| CLI argv | bytes | bytes | utf-8 [1] | utf-8 |
| path | bytes | auto [3] | migrating [2] | utf-8 |
| commit message | utf-8 | utf-8 | utf-8 | utf-8 |
| bookmark name | utf-8 | utf-8 | utf-8 | utf-8 |
| file content | bytes | bytes | bytes | bytes |
[1]: Argv was accidentally enforced utf-8 for command-line arguments by a Rust
wrapper. But it simplified a lot of things and is kind of ok: everything that
can be passed as CLI arguments are utf-8: -M commit message, -b bookmark, paths,
etc. There is no "file content" passed via CLI arguments.
[2]: Path is controversial, because it's possible for systems to have non-utf8
paths. The upstream behavior is incorrect if a repo gets shared among different
encoding systems (ex. both Linux and Windows). We have to know the encoding of
paths to be able to convert them suitable for the local system. One way is to
enforce UTF-8 for paths. The other is to keep encoding information stored with
individual paths (like Ruby strings). The UTF-8 approach is much simpler with
the tradeoff that non-utf-8 paths become unsupported, which seems to be a
reasonable trade-off.
[3]: See https://www.mercurial-scm.org/wiki/WindowsUTF8Plan.
Reviewed By: singhsrb
Differential Revision: D17098991
fbshipit-source-id: c0ff1e586a887233bd43cdb854fb3538aa9b70c2
2019-09-13 01:05:08 +03:00
|
|
|
>>> _setascii()
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> u = b'foo: \\xc3\\xa4' # utf-8
|
2010-11-25 00:38:52 +03:00
|
|
|
>>> l = tolocal(u)
|
|
|
|
>>> l
|
|
|
|
'foo: ?'
|
|
|
|
>>> fromlocal(l)
|
|
|
|
'foo: \\xc3\\xa4'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> u2 = b'foo: \\xc3\\xa1'
|
2010-11-25 00:38:52 +03:00
|
|
|
>>> d = { l: 1, tolocal(u2): 2 }
|
2013-01-15 05:59:14 +04:00
|
|
|
>>> len(d) # no collision
|
|
|
|
2
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> b'foo: ?' in d
|
2010-11-25 00:38:52 +03:00
|
|
|
False
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> l1 = b'foo: \\xe4' # historical latin1 fallback
|
2010-11-25 00:38:52 +03:00
|
|
|
>>> l = tolocal(l1)
|
|
|
|
>>> l
|
|
|
|
'foo: ?'
|
|
|
|
>>> fromlocal(l) # magically in utf-8
|
|
|
|
'foo: \\xc3\\xa4'
|
2009-04-03 23:51:48 +04:00
|
|
|
"""
|
2010-11-25 00:38:52 +03:00
|
|
|
|
2017-04-23 07:06:23 +03:00
|
|
|
if isasciistr(s):
|
|
|
|
return s
|
|
|
|
|
2012-03-23 01:54:46 +04:00
|
|
|
try:
|
2009-04-03 23:51:48 +04:00
|
|
|
try:
|
2012-03-23 01:54:46 +04:00
|
|
|
# make sure string is actually stored in UTF-8
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
u = s.decode("UTF-8")
|
encoding: replace 'ascii' with 'utf-8' automatically
Summary:
`ascii` was used as the default / fallback, which is not a user-friendly choice.
Nowadays utf-8 dominates:
- Rust stdlib is utf-8.
- Ruby since 1.9 is utf-8 by default.
- Python 3 is unicode by default.
- Windows 10 adds utf-8 code page.
Given the fact that:
- Our CI sets HGENCODING to utf-8
- Nuclide passes `--encoding=utf-8` to every command.
- Some people have messed up with `LC_*` and complained about hg crashes.
- utf-8 is a super set of ascii, nobody complains that they want `ascii`
encoding and the `utf-8` encoding messed their setup up.
Let's just use `utf-8` as the default encoding. More aggressively, if someone
sets `ascii` as the encoding, it's almost always a mistake. Auto-correct that
to `utf-8` too.
This should also make future integration with Rust easier (where it's enforced
utf-8 and does not have an option to change the encoding). In the future we
might just drop the flexibility of choosing customized encoding, so this diff
autofixes `ascii` to `utf-8`, instead of allowing `ascii` to be set. We cannot
enforce `utf-8` yet, because of Windows.
Here is our encoding strategy vs the upstream's:
| item | upstream | | ours | ours |
| | current | ideal | current | ideal |
| CLI argv | bytes | bytes | utf-8 [1] | utf-8 |
| path | bytes | auto [3] | migrating [2] | utf-8 |
| commit message | utf-8 | utf-8 | utf-8 | utf-8 |
| bookmark name | utf-8 | utf-8 | utf-8 | utf-8 |
| file content | bytes | bytes | bytes | bytes |
[1]: Argv was accidentally enforced utf-8 for command-line arguments by a Rust
wrapper. But it simplified a lot of things and is kind of ok: everything that
can be passed as CLI arguments are utf-8: -M commit message, -b bookmark, paths,
etc. There is no "file content" passed via CLI arguments.
[2]: Path is controversial, because it's possible for systems to have non-utf8
paths. The upstream behavior is incorrect if a repo gets shared among different
encoding systems (ex. both Linux and Windows). We have to know the encoding of
paths to be able to convert them suitable for the local system. One way is to
enforce UTF-8 for paths. The other is to keep encoding information stored with
individual paths (like Ruby strings). The UTF-8 approach is much simpler with
the tradeoff that non-utf-8 paths become unsupported, which seems to be a
reasonable trade-off.
[3]: See https://www.mercurial-scm.org/wiki/WindowsUTF8Plan.
Reviewed By: singhsrb
Differential Revision: D17098991
fbshipit-source-id: c0ff1e586a887233bd43cdb854fb3538aa9b70c2
2019-09-13 01:05:08 +03:00
|
|
|
if encoding == "utf-8":
|
2012-03-23 01:54:46 +04:00
|
|
|
# fast path
|
|
|
|
return s
|
2016-09-28 14:39:06 +03:00
|
|
|
r = u.encode(_sysstr(encoding), u"replace")
|
|
|
|
if u == r.decode(_sysstr(encoding)):
|
2011-04-16 08:45:41 +04:00
|
|
|
# r is a safe, non-lossy encoding of s
|
|
|
|
return r
|
2012-03-23 01:54:46 +04:00
|
|
|
return localstr(s, r)
|
2009-04-03 23:51:48 +04:00
|
|
|
except UnicodeDecodeError:
|
2012-03-23 01:54:46 +04:00
|
|
|
# we should only get here if we're looking at an ancient changeset
|
|
|
|
try:
|
2016-09-28 14:39:06 +03:00
|
|
|
u = s.decode(_sysstr(fallbackencoding))
|
|
|
|
r = u.encode(_sysstr(encoding), u"replace")
|
|
|
|
if u == r.decode(_sysstr(encoding)):
|
2012-03-23 01:54:46 +04:00
|
|
|
# r is a safe, non-lossy encoding of s
|
|
|
|
return r
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return localstr(u.encode("UTF-8"), r)
|
2012-03-23 01:54:46 +04:00
|
|
|
except UnicodeDecodeError:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
u = s.decode("utf-8", "replace") # last ditch
|
2016-09-28 14:39:06 +03:00
|
|
|
# can't round-trip
|
|
|
|
return u.encode(_sysstr(encoding), u"replace")
|
2015-06-24 08:20:08 +03:00
|
|
|
except LookupError as k:
|
2012-03-23 01:54:46 +04:00
|
|
|
raise error.Abort(k, hint="please check your locale settings")
|
2009-04-03 23:51:48 +04:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
def _fromlocal(s):
|
2009-04-03 23:51:48 +04:00
|
|
|
"""
|
|
|
|
Convert a string from the local character encoding to UTF-8
|
|
|
|
|
|
|
|
We attempt to decode strings using the encoding mode set by
|
|
|
|
HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
|
|
|
|
characters will cause an error message. Other modes include
|
|
|
|
'replace', which replaces unknown characters with a special
|
|
|
|
Unicode character, and 'ignore', which drops the character.
|
|
|
|
"""
|
2010-11-25 00:38:52 +03:00
|
|
|
|
|
|
|
# can we do a lossless round-trip?
|
|
|
|
if isinstance(s, localstr):
|
|
|
|
return s._utf8
|
2017-04-23 07:06:23 +03:00
|
|
|
if isasciistr(s):
|
|
|
|
return s
|
2010-11-25 00:38:52 +03:00
|
|
|
|
2009-04-03 23:51:48 +04:00
|
|
|
try:
|
2016-09-28 14:39:06 +03:00
|
|
|
u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
|
|
|
|
return u.encode("utf-8")
|
2015-06-24 08:20:08 +03:00
|
|
|
except UnicodeDecodeError as inst:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
sub = s[max(0, inst.start - 10) : inst.start + 10]
|
2009-04-03 23:51:48 +04:00
|
|
|
raise error.Abort("decoding near '%s': %s!" % (sub, inst))
|
2015-06-24 08:20:08 +03:00
|
|
|
except LookupError as k:
|
2011-12-26 18:01:06 +04:00
|
|
|
raise error.Abort(k, hint="please check your locale settings")
|
2009-04-03 23:51:48 +04:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2017-03-13 19:11:08 +03:00
|
|
|
def unitolocal(u):
|
|
|
|
"""Convert a unicode string to a byte string of local encoding"""
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return tolocal(u.encode("utf-8"))
|
|
|
|
|
2017-03-13 19:11:08 +03:00
|
|
|
|
|
|
|
def unifromlocal(s):
|
|
|
|
"""Convert a byte string of local encoding to a unicode string"""
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return fromlocal(s).decode("utf-8")
|
|
|
|
|
2017-03-13 19:11:08 +03:00
|
|
|
|
2017-06-24 07:48:04 +03:00
|
|
|
def unimethod(bytesfunc):
|
|
|
|
"""Create a proxy method that forwards __unicode__() and __str__() of
|
|
|
|
Python 3 to __bytes__()"""
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2017-06-24 07:48:04 +03:00
|
|
|
def unifunc(obj):
|
|
|
|
return unifromlocal(bytesfunc(obj))
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2017-06-24 07:48:04 +03:00
|
|
|
return unifunc
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2017-03-13 19:12:56 +03:00
|
|
|
# converter functions between native str and byte string. use these if the
|
|
|
|
# character encoding is not aware (e.g. exception message) or is known to
|
|
|
|
# be locale dependent (e.g. date formatting.)
|
2019-12-05 04:02:25 +03:00
|
|
|
if sys.version_info[0] >= 3:
|
2017-03-13 19:12:56 +03:00
|
|
|
strtolocal = unitolocal
|
|
|
|
strfromlocal = unifromlocal
|
2017-06-24 07:48:04 +03:00
|
|
|
strmethod = unimethod
|
2017-03-13 19:12:56 +03:00
|
|
|
else:
|
2017-03-29 15:13:55 +03:00
|
|
|
strtolocal = pycompat.identity
|
|
|
|
strfromlocal = pycompat.identity
|
2017-06-24 07:48:04 +03:00
|
|
|
strmethod = pycompat.identity
|
2017-03-13 19:12:56 +03:00
|
|
|
|
2010-10-28 00:35:21 +04:00
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
def _colwidth(s):
|
2011-09-21 22:00:41 +04:00
|
|
|
"Find the column width of a string for display in the local encoding"
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return ucolwidth(s.decode(_sysstr(encoding), u"replace"))
|
|
|
|
|
2011-08-26 23:56:12 +04:00
|
|
|
|
|
|
|
def ucolwidth(d):
|
|
|
|
"Find the column width of a Unicode string for display"
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
eaw = getattr(unicodedata, "east_asian_width", None)
|
2011-07-26 00:19:43 +04:00
|
|
|
if eaw is not None:
|
2017-05-29 15:57:51 +03:00
|
|
|
return sum([eaw(c) in _wide and 2 or 1 for c in d])
|
2009-04-03 23:51:48 +04:00
|
|
|
return len(d)
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2011-09-21 22:00:46 +04:00
|
|
|
def getcols(s, start, c):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""Use colwidth to find a c-column substring of s starting at byte
|
|
|
|
index start"""
|
2019-09-27 01:28:09 +03:00
|
|
|
for x in range(start + c, len(s)):
|
2011-09-21 22:00:46 +04:00
|
|
|
t = s[start:x]
|
|
|
|
if colwidth(t) == c:
|
|
|
|
return t
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
|
|
|
def trim(s, width, ellipsis="", leftside=False):
|
2014-07-05 21:56:41 +04:00
|
|
|
"""Trim string 's' to at most 'width' columns (including 'ellipsis').
|
|
|
|
|
2014-07-05 21:56:41 +04:00
|
|
|
If 'leftside' is True, left side of string 's' is trimmed.
|
|
|
|
'ellipsis' is always placed at trimmed side.
|
|
|
|
|
2017-09-03 09:47:17 +03:00
|
|
|
>>> from .node import bin
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> def bprint(s):
|
|
|
|
... print(pycompat.sysstr(s))
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> ellipsis = b'+++'
|
2015-12-13 06:57:48 +03:00
|
|
|
>>> from . import encoding
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> encoding.encoding = b'utf-8'
|
|
|
|
>>> t = b'1234567890'
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 12, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
1234567890
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 10, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
1234567890
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
12345+++
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, ellipsis=ellipsis, leftside=True))
|
2014-07-05 21:56:41 +04:00
|
|
|
+++67890
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8))
|
2014-07-05 21:56:41 +04:00
|
|
|
12345678
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, leftside=True))
|
2014-07-05 21:56:41 +04:00
|
|
|
34567890
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 3, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
+++
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 1, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
+
|
|
|
|
>>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
|
2017-09-03 09:42:27 +03:00
|
|
|
>>> t = u.encode(pycompat.sysstr(encoding.encoding))
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 12, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 10, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
\xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
\xe3\x81\x82\xe3\x81\x84+++
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, ellipsis=ellipsis, leftside=True))
|
2014-07-05 21:56:41 +04:00
|
|
|
+++\xe3\x81\x88\xe3\x81\x8a
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 5))
|
2014-07-05 21:56:41 +04:00
|
|
|
\xe3\x81\x82\xe3\x81\x84
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 5, leftside=True))
|
2014-07-05 21:56:41 +04:00
|
|
|
\xe3\x81\x88\xe3\x81\x8a
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 4, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
+++
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 4, ellipsis=ellipsis, leftside=True))
|
2014-07-05 21:56:41 +04:00
|
|
|
+++
|
2017-09-03 09:47:17 +03:00
|
|
|
>>> t = bin(b'112233445566778899aa') # invalid byte sequence
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 12, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 10, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
\x11\x22\x33\x44\x55+++
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, ellipsis=ellipsis, leftside=True))
|
2014-07-05 21:56:41 +04:00
|
|
|
+++\x66\x77\x88\x99\xaa
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8))
|
2014-07-05 21:56:41 +04:00
|
|
|
\x11\x22\x33\x44\x55\x66\x77\x88
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 8, leftside=True))
|
2014-07-05 21:56:41 +04:00
|
|
|
\x33\x44\x55\x66\x77\x88\x99\xaa
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 3, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
+++
|
2017-09-03 08:56:31 +03:00
|
|
|
>>> bprint(trim(t, 1, ellipsis=ellipsis))
|
2014-07-05 21:56:41 +04:00
|
|
|
+
|
|
|
|
"""
|
|
|
|
try:
|
2020-01-28 21:21:58 +03:00
|
|
|
if sys.version_info.major == 3:
|
|
|
|
u = s
|
|
|
|
else:
|
|
|
|
u = s.decode(_sysstr(encoding))
|
2014-07-05 21:56:41 +04:00
|
|
|
except UnicodeDecodeError:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
if len(s) <= width: # trimming is not needed
|
2014-07-05 21:56:41 +04:00
|
|
|
return s
|
|
|
|
width -= len(ellipsis)
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
if width <= 0: # no enough room even for ellipsis
|
|
|
|
return ellipsis[: width + len(ellipsis)]
|
2014-07-05 21:56:41 +04:00
|
|
|
if leftside:
|
|
|
|
return ellipsis + s[-width:]
|
2014-07-05 21:56:41 +04:00
|
|
|
return s[:width] + ellipsis
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
if ucolwidth(u) <= width: # trimming is not needed
|
2014-07-05 21:56:41 +04:00
|
|
|
return s
|
|
|
|
|
|
|
|
width -= len(ellipsis)
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
if width <= 0: # no enough room even for ellipsis
|
|
|
|
return ellipsis[: width + len(ellipsis)]
|
2014-07-05 21:56:41 +04:00
|
|
|
|
2014-07-05 21:56:41 +04:00
|
|
|
if leftside:
|
|
|
|
uslice = lambda i: u[i:]
|
|
|
|
concat = lambda s: ellipsis + s
|
|
|
|
else:
|
|
|
|
uslice = lambda i: u[:-i]
|
|
|
|
concat = lambda s: s + ellipsis
|
2019-09-27 01:28:09 +03:00
|
|
|
for i in range(1, len(u)):
|
2014-07-05 21:56:41 +04:00
|
|
|
usub = uslice(i)
|
|
|
|
if ucolwidth(usub) <= width:
|
2016-09-28 14:39:06 +03:00
|
|
|
return concat(usub.encode(_sysstr(encoding)))
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return ellipsis # no enough room for multi-column characters
|
|
|
|
|
2014-07-05 21:56:41 +04:00
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
def _lower(s):
|
2011-04-30 19:57:13 +04:00
|
|
|
"best-effort encoding-aware case-folding of local string s"
|
2012-04-10 21:07:18 +04:00
|
|
|
try:
|
2014-10-04 05:45:56 +04:00
|
|
|
return asciilower(s)
|
2012-07-24 01:55:22 +04:00
|
|
|
except UnicodeDecodeError:
|
2012-04-10 21:07:18 +04:00
|
|
|
pass
|
2011-04-30 19:57:13 +04:00
|
|
|
try:
|
|
|
|
if isinstance(s, localstr):
|
|
|
|
u = s._utf8.decode("utf-8")
|
|
|
|
else:
|
2016-09-28 14:39:06 +03:00
|
|
|
u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
|
2011-04-30 19:57:13 +04:00
|
|
|
|
|
|
|
lu = u.lower()
|
|
|
|
if u == lu:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return s # preserve localstring
|
2016-09-28 14:39:06 +03:00
|
|
|
return lu.encode(_sysstr(encoding))
|
2011-04-30 19:57:13 +04:00
|
|
|
except UnicodeError:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return s.lower() # we don't know how to fold this except in ASCII
|
2015-06-24 08:20:08 +03:00
|
|
|
except LookupError as k:
|
2011-12-16 16:09:41 +04:00
|
|
|
raise error.Abort(k, hint="please check your locale settings")
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
def _upper(s):
|
2011-12-16 16:09:41 +04:00
|
|
|
"best-effort encoding-aware case-folding of local string s"
|
2012-07-24 01:55:26 +04:00
|
|
|
try:
|
2015-04-01 01:22:09 +03:00
|
|
|
return asciiupper(s)
|
2012-07-24 01:55:26 +04:00
|
|
|
except UnicodeDecodeError:
|
2015-04-01 10:30:41 +03:00
|
|
|
return upperfallback(s)
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2015-04-01 10:30:41 +03:00
|
|
|
def upperfallback(s):
|
2011-12-16 16:09:41 +04:00
|
|
|
try:
|
|
|
|
if isinstance(s, localstr):
|
|
|
|
u = s._utf8.decode("utf-8")
|
|
|
|
else:
|
2016-09-28 14:39:06 +03:00
|
|
|
u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
|
2011-12-16 16:09:41 +04:00
|
|
|
|
|
|
|
uu = u.upper()
|
|
|
|
if u == uu:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return s # preserve localstring
|
2016-09-28 14:39:06 +03:00
|
|
|
return uu.encode(_sysstr(encoding))
|
2011-12-16 16:09:41 +04:00
|
|
|
except UnicodeError:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
return s.upper() # we don't know how to fold this except in ASCII
|
2015-06-24 08:20:08 +03:00
|
|
|
except LookupError as k:
|
2011-12-16 16:09:41 +04:00
|
|
|
raise error.Abort(k, hint="please check your locale settings")
|
2012-02-21 02:42:45 +04:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2015-04-01 10:21:10 +03:00
|
|
|
class normcasespecs(object):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""what a platform's normcase does to ASCII strings
|
2015-04-01 10:21:10 +03:00
|
|
|
|
|
|
|
This is specified per platform, and should be consistent with what normcase
|
|
|
|
on that platform actually does.
|
|
|
|
|
|
|
|
lower: normcase lowercases ASCII strings
|
|
|
|
upper: normcase uppercases ASCII strings
|
2015-04-03 05:17:32 +03:00
|
|
|
other: the fallback function should always be called
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
This should be kept in sync with normcase_spec in util.h."""
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2015-04-01 10:21:10 +03:00
|
|
|
lower = -1
|
|
|
|
upper = 1
|
|
|
|
other = 0
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2015-12-27 13:28:34 +03:00
|
|
|
def jsonescape(s, paranoid=False):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""returns a string suitable for JSON
|
2014-09-15 22:12:49 +04:00
|
|
|
|
|
|
|
JSON is problematic for us because it doesn't support non-Unicode
|
|
|
|
bytes. To deal with this, we take the following approach:
|
|
|
|
|
|
|
|
- localstr objects are converted back to UTF-8
|
|
|
|
- valid UTF-8/ASCII strings are passed as-is
|
|
|
|
- other strings are converted to UTF-8b surrogate encoding
|
|
|
|
- apply JSON-specified string escaping
|
|
|
|
|
|
|
|
(escapes are doubled in these tests)
|
|
|
|
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'this is a test')
|
2014-09-15 22:12:49 +04:00
|
|
|
'this is a test'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'escape characters: \\0 \\x0b \\x7f')
|
2016-01-16 12:30:01 +03:00
|
|
|
'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'escape characters: \\b \\t \\n \\f \\r \\" \\\\')
|
2017-04-23 08:47:52 +03:00
|
|
|
'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'a weird byte: \\xdd')
|
2014-09-15 22:12:49 +04:00
|
|
|
'a weird byte: \\xed\\xb3\\x9d'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'utf-8: caf\\xc3\\xa9')
|
2014-09-15 22:12:49 +04:00
|
|
|
'utf-8: caf\\xc3\\xa9'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'')
|
2014-09-15 22:12:49 +04:00
|
|
|
''
|
2015-12-27 13:28:34 +03:00
|
|
|
|
2015-12-27 13:58:11 +03:00
|
|
|
If paranoid, non-ascii and common troublesome characters are also escaped.
|
|
|
|
This is suitable for web output.
|
2015-12-27 13:28:34 +03:00
|
|
|
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> s = b'escape characters: \\0 \\x0b \\x7f'
|
2017-04-23 08:47:52 +03:00
|
|
|
>>> assert jsonescape(s) == jsonescape(s, paranoid=True)
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> s = b'escape characters: \\b \\t \\n \\f \\r \\" \\\\'
|
2017-04-23 08:47:52 +03:00
|
|
|
>>> assert jsonescape(s) == jsonescape(s, paranoid=True)
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
|
2015-12-27 13:28:34 +03:00
|
|
|
'escape boundary: ~ \\\\u007f \\\\u0080'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'a weird byte: \\xdd', paranoid=True)
|
2015-12-27 13:28:34 +03:00
|
|
|
'a weird byte: \\\\udcdd'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'utf-8: caf\\xc3\\xa9', paranoid=True)
|
2015-12-27 13:28:34 +03:00
|
|
|
'utf-8: caf\\\\u00e9'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
|
2015-12-27 13:28:34 +03:00
|
|
|
'non-BMP: \\\\ud834\\\\udd1e'
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> jsonescape(b'<foo@example.org>', paranoid=True)
|
2015-12-27 13:58:11 +03:00
|
|
|
'\\\\u003cfoo@example.org\\\\u003e'
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""
|
2014-09-15 22:12:49 +04:00
|
|
|
|
2015-12-27 13:28:34 +03:00
|
|
|
u8chars = toutf8b(s)
|
|
|
|
try:
|
2017-04-23 10:10:51 +03:00
|
|
|
return _jsonescapeu8fast(u8chars, paranoid)
|
|
|
|
except ValueError:
|
2015-12-27 13:28:34 +03:00
|
|
|
pass
|
2017-04-23 10:10:51 +03:00
|
|
|
return charencodepure.jsonescapeu8fallback(u8chars, paranoid)
|
2014-09-15 22:12:49 +04:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2017-09-16 16:55:48 +03:00
|
|
|
# We need to decode/encode U+DCxx codes transparently since invalid UTF-8
|
|
|
|
# bytes are mapped to that range.
|
2019-12-05 04:02:25 +03:00
|
|
|
if sys.version_info[0] >= 3:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
_utf8strict = r"surrogatepass"
|
2017-09-16 16:55:48 +03:00
|
|
|
else:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
_utf8strict = r"strict"
|
2017-09-16 16:55:48 +03:00
|
|
|
|
2015-11-06 01:48:46 +03:00
|
|
|
_utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2015-11-06 01:48:46 +03:00
|
|
|
def getutf8char(s, pos):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""get the next full utf-8 character in the given string, starting at pos
|
2015-11-06 01:48:46 +03:00
|
|
|
|
|
|
|
Raises a UnicodeError if the given location does not start a valid
|
|
|
|
utf-8 character.
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""
|
2015-11-06 01:48:46 +03:00
|
|
|
|
|
|
|
# find how many bytes to attempt decoding from first nibble
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
l = _utf8len[ord(s[pos : pos + 1]) >> 4]
|
|
|
|
if not l: # ascii
|
|
|
|
return s[pos : pos + 1]
|
2015-11-06 01:48:46 +03:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
c = s[pos : pos + l]
|
2015-11-06 01:48:46 +03:00
|
|
|
# validate with attempted decode
|
2017-09-16 16:55:48 +03:00
|
|
|
c.decode("utf-8", _utf8strict)
|
2015-11-06 01:48:46 +03:00
|
|
|
return c
|
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2012-02-21 02:42:45 +04:00
|
|
|
def toutf8b(s):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""convert a local, possibly-binary string into UTF-8b
|
2012-02-21 02:42:45 +04:00
|
|
|
|
|
|
|
This is intended as a generic method to preserve data when working
|
|
|
|
with schemes like JSON and XML that have no provision for
|
|
|
|
arbitrary byte strings. As Mercurial often doesn't know
|
|
|
|
what encoding data is in, we use so-called UTF-8b.
|
|
|
|
|
|
|
|
If a string is already valid UTF-8 (or ASCII), it passes unmodified.
|
|
|
|
Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
|
|
|
|
uDC00-uDCFF.
|
|
|
|
|
|
|
|
Principles of operation:
|
|
|
|
|
2012-08-16 00:38:42 +04:00
|
|
|
- ASCII and UTF-8 data successfully round-trips and is understood
|
2012-02-21 02:42:45 +04:00
|
|
|
by Unicode-oriented clients
|
|
|
|
- filenames and file contents in arbitrary other encodings can have
|
|
|
|
be round-tripped or recovered by clueful clients
|
|
|
|
- local strings that have a cached known UTF-8 encoding (aka
|
|
|
|
localstr) get sent as UTF-8 so Unicode-oriented clients get the
|
|
|
|
Unicode data they want
|
|
|
|
- because we must preserve UTF-8 bytestring in places such as
|
|
|
|
filenames, metadata can't be roundtripped without help
|
|
|
|
|
|
|
|
(Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
|
|
|
|
arbitrary bytes into an internal Unicode format that can be
|
|
|
|
re-encoded back into the original. Here we are exposing the
|
|
|
|
internal surrogate encoding as a UTF-8 string.)
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""
|
2012-02-21 02:42:45 +04:00
|
|
|
|
2017-04-23 07:08:58 +03:00
|
|
|
if not isinstance(s, localstr) and isasciistr(s):
|
|
|
|
return s
|
2015-11-06 02:30:10 +03:00
|
|
|
if "\xed" not in s:
|
|
|
|
if isinstance(s, localstr):
|
|
|
|
return s._utf8
|
|
|
|
try:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
s.decode("utf-8", _utf8strict)
|
2015-11-06 02:30:10 +03:00
|
|
|
return s
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
pass
|
2015-11-06 02:21:43 +03:00
|
|
|
|
2017-09-03 09:54:29 +03:00
|
|
|
s = pycompat.bytestr(s)
|
2015-11-06 02:21:43 +03:00
|
|
|
r = ""
|
|
|
|
pos = 0
|
|
|
|
l = len(s)
|
|
|
|
while pos < l:
|
|
|
|
try:
|
|
|
|
c = getutf8char(s, pos)
|
2015-11-06 02:30:10 +03:00
|
|
|
if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
|
|
|
|
# have to re-escape existing U+DCxx characters
|
2018-10-01 17:15:19 +03:00
|
|
|
c = unichr(0xDC00 + ord(s[pos])).encode("utf-8", _utf8strict)
|
2015-11-06 02:30:10 +03:00
|
|
|
pos += 1
|
|
|
|
else:
|
|
|
|
pos += len(c)
|
2015-11-06 02:21:43 +03:00
|
|
|
except UnicodeDecodeError:
|
2018-10-01 17:15:19 +03:00
|
|
|
c = unichr(0xDC00 + ord(s[pos])).encode("utf-8", _utf8strict)
|
2015-11-06 02:21:43 +03:00
|
|
|
pos += 1
|
|
|
|
r += c
|
|
|
|
return r
|
2012-02-21 02:42:45 +04:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2012-02-21 02:42:45 +04:00
|
|
|
def fromutf8b(s):
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""Given a UTF-8b string, return a local, possibly-binary string.
|
2012-02-21 02:42:45 +04:00
|
|
|
|
|
|
|
return the original binary string. This
|
|
|
|
is a round-trip process for strings like filenames, but metadata
|
|
|
|
that's was passed through tolocal will remain in UTF-8.
|
|
|
|
|
2015-11-03 02:17:33 +03:00
|
|
|
>>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> m = b"\\xc3\\xa9\\x99abcd"
|
2015-11-03 02:17:33 +03:00
|
|
|
>>> toutf8b(m)
|
2012-02-21 02:42:45 +04:00
|
|
|
'\\xc3\\xa9\\xed\\xb2\\x99abcd'
|
2015-11-03 02:17:33 +03:00
|
|
|
>>> roundtrip(m)
|
|
|
|
True
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> roundtrip(b"\\xc2\\xc2\\x80")
|
2015-11-03 02:17:33 +03:00
|
|
|
True
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> roundtrip(b"\\xef\\xbf\\xbd")
|
2015-11-03 02:17:33 +03:00
|
|
|
True
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> roundtrip(b"\\xef\\xef\\xbf\\xbd")
|
2012-02-21 02:42:45 +04:00
|
|
|
True
|
2017-09-03 08:32:11 +03:00
|
|
|
>>> roundtrip(b"\\xf1\\x80\\x80\\x80\\x80")
|
2016-01-07 23:57:57 +03:00
|
|
|
True
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
"""
|
2012-02-21 02:42:45 +04:00
|
|
|
|
2017-04-23 07:08:58 +03:00
|
|
|
if isasciistr(s):
|
|
|
|
return s
|
2012-02-21 02:42:45 +04:00
|
|
|
# fast path - look for uDxxx prefixes in s
|
|
|
|
if "\xed" not in s:
|
|
|
|
return s
|
|
|
|
|
2016-01-07 23:57:57 +03:00
|
|
|
# We could do this with the unicode type but some Python builds
|
|
|
|
# use UTF-16 internally (issue5031) which causes non-BMP code
|
|
|
|
# points to be escaped. Instead, we use our handy getutf8char
|
|
|
|
# helper again to walk the string without "decoding" it.
|
|
|
|
|
2017-09-03 09:54:29 +03:00
|
|
|
s = pycompat.bytestr(s)
|
2012-02-21 02:42:45 +04:00
|
|
|
r = ""
|
2016-01-07 23:57:57 +03:00
|
|
|
pos = 0
|
|
|
|
l = len(s)
|
|
|
|
while pos < l:
|
|
|
|
c = getutf8char(s, pos)
|
|
|
|
pos += len(c)
|
|
|
|
# unescape U+DCxx characters
|
|
|
|
if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
|
2018-10-01 17:15:19 +03:00
|
|
|
c = pycompat.bytechr(ord(c.decode("utf-8", _utf8strict)) & 0xFF)
|
2016-01-07 23:57:57 +03:00
|
|
|
r += c
|
2012-02-21 02:42:45 +04:00
|
|
|
return r
|
py3: introduce a wrapper for __builtins__.{raw_,}input()
In order to make this work, we have to wrap the io streams in a
TextIOWrapper so that __builtins__.input() can do unicode IO on Python
3. We can't just restore the original (unicode) sys.std* because we
might be running a cmdserver, and if we blindly restore sys.* to the
original values then we end up breaking the cmdserver. Sadly,
TextIOWrapper tries to close the underlying stream during its __del__,
so we have to make a sublcass to prevent that.
If you see errors like:
TypeError: a bytes-like object is required, not 'str'
On an input() or print() call on Python 3, the substitution of
sys.std* is probably the root cause.
A previous version of this change tried to put the bytesinput() method
in pycompat - it turns out we need to do some encoding handling, so we
have to be in a higher layer that's allowed to use
mercurial.encoding.encoding. As a result, this is in util for now,
with the TextIOWrapper subclass hiding in encoding.py. I'm not sure of
a better place for the time being.
Differential Revision: https://phab.mercurial-scm.org/D299
2017-07-24 21:38:40 +03:00
|
|
|
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2019-12-05 04:02:25 +03:00
|
|
|
if sys.version_info[0] >= 3:
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
# Prefer native unicode on Python
|
|
|
|
colwidth = ucolwidth
|
|
|
|
fromlocal = pycompat.identity
|
|
|
|
strfromlocal = pycompat.identity
|
|
|
|
strio = pycompat.identity
|
|
|
|
strmethod = pycompat.identity
|
|
|
|
strtolocal = pycompat.identity
|
|
|
|
tolocal = pycompat.identity
|
2020-01-29 18:38:41 +03:00
|
|
|
tolocalstr = pycompat.decodeutf8 # Binary utf-8 to Python 3 str
|
2020-01-28 21:21:58 +03:00
|
|
|
unifromlocal = pycompat.identity
|
|
|
|
unitolocal = pycompat.identity
|
py3: introduce a wrapper for __builtins__.{raw_,}input()
In order to make this work, we have to wrap the io streams in a
TextIOWrapper so that __builtins__.input() can do unicode IO on Python
3. We can't just restore the original (unicode) sys.std* because we
might be running a cmdserver, and if we blindly restore sys.* to the
original values then we end up breaking the cmdserver. Sadly,
TextIOWrapper tries to close the underlying stream during its __del__,
so we have to make a sublcass to prevent that.
If you see errors like:
TypeError: a bytes-like object is required, not 'str'
On an input() or print() call on Python 3, the substitution of
sys.std* is probably the root cause.
A previous version of this change tried to put the bytesinput() method
in pycompat - it turns out we need to do some encoding handling, so we
have to be in a higher layer that's allowed to use
mercurial.encoding.encoding. As a result, this is in util for now,
with the TextIOWrapper subclass hiding in encoding.py. I'm not sure of
a better place for the time being.
Differential Revision: https://phab.mercurial-scm.org/D299
2017-07-24 21:38:40 +03:00
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
def lower(s):
|
|
|
|
return s.lower()
|
py3: introduce a wrapper for __builtins__.{raw_,}input()
In order to make this work, we have to wrap the io streams in a
TextIOWrapper so that __builtins__.input() can do unicode IO on Python
3. We can't just restore the original (unicode) sys.std* because we
might be running a cmdserver, and if we blindly restore sys.* to the
original values then we end up breaking the cmdserver. Sadly,
TextIOWrapper tries to close the underlying stream during its __del__,
so we have to make a sublcass to prevent that.
If you see errors like:
TypeError: a bytes-like object is required, not 'str'
On an input() or print() call on Python 3, the substitution of
sys.std* is probably the root cause.
A previous version of this change tried to put the bytesinput() method
in pycompat - it turns out we need to do some encoding handling, so we
have to be in a higher layer that's allowed to use
mercurial.encoding.encoding. As a result, this is in util for now,
with the TextIOWrapper subclass hiding in encoding.py. I'm not sure of
a better place for the time being.
Differential Revision: https://phab.mercurial-scm.org/D299
2017-07-24 21:38:40 +03:00
|
|
|
|
2020-01-28 21:21:58 +03:00
|
|
|
def upper(s):
|
|
|
|
return s.upper()
|
codemod: join the auto-formatter party
Summary:
Turned on the auto formatter. Ran `arc lint --apply-patches --take BLACK **/*.py`.
Then run `arc lint` again so some other autofixers like spellchecker etc. looked
at the code base. Manually accept the changes whenever they make sense, or use
a workaround (ex. changing "dict()" to "dict constructor") where autofix is false
positive. Disabled linters on files that are hard (i18n/polib.py) to fix, or less
interesting to fix (hgsubversion tests), or cannot be fixed without breaking
OSS build (FBPYTHON4).
Conflicted linters (test-check-module-imports.t, part of test-check-code.t,
test-check-pyflakes.t) are removed or disabled.
Duplicated linters (test-check-pyflakes.t, test-check-pylint.t) are removed.
An issue of the auto-formatter is lines are no longer guarnateed to be <= 80
chars. But that seems less important comparing with the benefit auto-formatter
provides.
As we're here, also remove test-check-py3-compat.t, as it is currently broken
if `PYTHON3=/bin/python3` is set.
Reviewed By: wez, phillco, simpkins, pkaush, singhsrb
Differential Revision: D8173629
fbshipit-source-id: 90e248ae0c5e6eaadbe25520a6ee42d32005621b
2018-05-26 07:34:37 +03:00
|
|
|
|
|
|
|
|
2017-08-16 07:50:11 +03:00
|
|
|
else:
|
2020-01-28 21:21:58 +03:00
|
|
|
colwidth = _colwidth
|
|
|
|
fromlocal = _fromlocal
|
|
|
|
lower = _lower
|
2017-08-16 07:50:11 +03:00
|
|
|
strio = pycompat.identity
|
2020-01-28 21:21:58 +03:00
|
|
|
tolocal = _tolocal
|
2020-01-29 18:38:41 +03:00
|
|
|
tolocalstr = _tolocal # Binary utf-8 to local byte string
|
2020-01-28 21:21:58 +03:00
|
|
|
upper = _upper
|