py3: only use binary stdin/stdout/stderr

Summary:
Drop stdoutbytes/stdinbytes. They make things unnecessarily complicated
(especially for chg / Rust dispatch entry point).

The new idea is IO are using bytes. Text are written in utf-8 (Python 3) or
local encoding (Python 2). To make stdout behave reasonably on systems not
using utf-8 locale (ex. Windows), we might add a Rust binding to Rust's stdout,
which does the right thing:
- When writing to stdout console, expect text to be utf-8 encoded and do proper decoding.
- Wehn writing to stdout file, write the raw bytes without translation.

Note Python's `sys.stdout.buffer` does not do translation when writing to stdout console
like Rust's stdout.

For now, my main motivation of this change is to fix chg on Python 3.

Reviewed By: xavierd

Differential Revision: D19702533

fbshipit-source-id: 74704c83e1b200ff66fb3a2d23d97ff21c7239c8
This commit is contained in:
Jun Wu 2020-02-03 18:24:33 -08:00 committed by Facebook Github Bot
parent 16d4739981
commit 3e0b781197
21 changed files with 46 additions and 90 deletions

View File

@ -50,7 +50,7 @@ _fixsyspath()
del globals()["_fixsyspath"]
def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None):
def run(args=None, fin=None, fout=None, ferr=None):
import sys
if args is None:
@ -83,4 +83,4 @@ def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None):
# so 'import dispatch' happens after demandimport
from .mercurial import dispatch
dispatch.run(args, fin, fout, foutbytes, ferr)
dispatch.run(args, fin, fout, ferr)

View File

@ -25,6 +25,7 @@ import edenscmnative
from edenscm import hgext, mercurial
from edenscm.mercurial import registrar
from edenscm.mercurial.i18n import _
from edenscm.mercurial.pycompat import decodeutf8
cmdtable = {}
@ -65,7 +66,7 @@ def debugshell(ui, repo, **opts):
return 0
if not ui.interactive():
command = ui.fin.read()
command = decodeutf8(ui.fin.read())
exec(command)
return 0

View File

@ -70,7 +70,7 @@ from edenscm.mercurial import (
)
from edenscm.mercurial.i18n import _
from edenscm.mercurial.node import bin, hex, nullid, nullrev
from edenscm.mercurial.pycompat import queue, range
from edenscm.mercurial.pycompat import encodeutf8, queue, range
wrapcommand = extensions.wrapcommand
@ -893,7 +893,7 @@ def wraprepo(repo):
for k, v in sorted(pycompat.iteritems(refs)):
if k != "tip":
v = hex(v)
sha = hashlib.sha1("%s%s%s" % (sha, k, v)).hexdigest()
sha = hashlib.sha1(encodeutf8("%s%s%s" % (sha, k, v))).hexdigest()
return sha
def _sqlsynchash(self):
@ -917,7 +917,7 @@ def wraprepo(repo):
]
# is it a new repo with empty references?
if sqlresults == [[]]:
return hashlib.sha1("%s%s" % ("tip", -1)).hexdigest()
return hashlib.sha1(encodeutf8("%s%s" % ("tip", -1))).hexdigest()
# sqlresults looks like [[('59237a7416a6a1764ea088f0bc1189ea58d5b592',)]]
sqlsynchash = sqlresults[0][0][0]
if len(sqlsynchash) != 40:

View File

@ -1137,7 +1137,7 @@ def _getgoal(opts):
def _readfile(ui, path):
if path == "-":
with ui.timeblockedsection("histedit"):
return ui.fin.read()
return pycompat.decodeutf8(ui.fin.read())
else:
with open(path, "rb") as f:
return pycompat.decodeutf8(f.read())

View File

@ -7,13 +7,13 @@
from __future__ import absolute_import
import json
import os
from edenscm.mercurial import (
blobstore,
error,
extensions,
json,
pathutil,
perftrace,
progress,

View File

@ -20,6 +20,7 @@ from edenscm.mercurial import (
util,
)
from edenscm.mercurial.i18n import _
from edenscm.mercurial.pycompat import decodeutf8
from . import editsgenerator
@ -136,7 +137,6 @@ class perftestsuite(object):
cwd=None,
env=None,
stderr=False,
utf8decode=True,
input=None,
timeout=0,
returncode=False,
@ -162,10 +162,8 @@ class perftestsuite(object):
else:
out, err = p.communicate()
if out is not None and utf8decode:
out = out.decode("utf-8")
if err is not None and utf8decode:
err = err.decode("utf-8")
out = decodeutf8(out)
err = decodeutf8(err)
if p.returncode != 0 and returncode is False:
self.ui.warn(_("run call failed!\n"))

View File

@ -948,7 +948,7 @@ def logmessage(repo, opts):
if not message and logfile:
try:
if isstdiofilename(logfile):
message = ui.fin.read()
message = pycompat.decodeutf8(ui.fin.read())
else:
message = pycompat.decodeutf8(
b"\n".join(util.readfile(logfile).splitlines())

View File

@ -81,7 +81,7 @@ from .. import (
)
from ..i18n import _
from ..node import bin, hex, nullhex, nullid, nullrev, short
from ..pycompat import range
from ..pycompat import decodeutf8, range
from .cmdtable import command
@ -184,7 +184,7 @@ def debugbuilddag(
if text is None:
ui.status(_("reading DAG from stdin\n"))
text = ui.fin.read()
text = decodeutf8(ui.fin.read())
cl = repo.changelog
if len(cl) > 0:
@ -3153,7 +3153,7 @@ def debugdrawdag(ui, repo, **opts):
Note that the revset cannot have confusing characters which can be seen as
the part of the graph edges, like `|/+-\`.
"""
text = ui.fin.read()
text = decodeutf8(ui.fin.read())
return drawdag.drawdag(repo, text, **opts)

View File

@ -65,7 +65,6 @@ class request(object):
repo=None,
fin=None,
fout=None,
foutbytes=None,
ferr=None,
prereposetups=None,
):
@ -76,7 +75,6 @@ class request(object):
# input/output/error streams
self.fin = fin
self.fout = fout
self.foutbytes = foutbytes
self.ferr = ferr
# remember options pre-parsed by _earlyparseopts()
@ -124,12 +122,12 @@ class request(object):
raise exc
def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None):
def run(args=None, fin=None, fout=None, ferr=None):
"run the command in sys.argv"
_initstdio()
if args is None:
args = pycompat.sysargv
req = request(args[1:], fin=fin, fout=fout, foutbytes=foutbytes, ferr=ferr)
req = request(args[1:], fin=fin, fout=fout, ferr=ferr)
err = None
try:
status = (dispatch(req) or 0) & 255
@ -142,12 +140,6 @@ def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None):
except IOError as e:
err = e
status = -1
if util.safehasattr(req.ui, "foutbytes"):
try:
req.ui.foutbytes.flush()
except IOError as e:
err = e
status = -1
if util.safehasattr(req.ui, "ferr"):
if err is not None and err.errno != errno.EPIPE:
req.ui.ferr.write("abort: %s\n" % encoding.strtolocal(err.strerror))
@ -404,8 +396,6 @@ def dispatch(req):
req.ui.fin = req.fin
if req.fout:
req.ui.fout = req.fout
if req.foutbytes:
req.ui.foutbytes = req.foutbytes
if req.ferr:
req.ui.ferr = req.ferr
except error.Abort as inst:

View File

@ -74,8 +74,12 @@ def loads(string):
if sys.version_info[0] < 3:
# XXX: This should round-trip with "dumps". But it might be non-trivial to
# do so.
return _rapply(
lambda s: pycompat.decodeutf8(s.encode("utf-8")), _sysjson.loads(string)
)
def encode(s):
if isinstance(s, type(u"")):
return pycompat.decodeutf8(s.encode("utf-8"))
else:
return s
return _rapply(encode, _sysjson.loads(string))
else:
return _sysjson.loads(string)

View File

@ -69,22 +69,15 @@ if sys.version_info[0] >= 3:
sysplatform = sys.platform
sysexecutable = sys.executable
# Upstream added stringio which conforms to their end goal of using bytes
# everywhere. We want to use String in Python 3 and Bytes/str in Python 2,
# so we need a different io abstraction here, which I call stringutf8io. We
# should probably remove stringio.
stringio = io.BytesIO
stringutf8io = io.StringIO
maplist = lambda *args: list(map(*args))
ziplist = lambda *args: list(zip(*args))
rawinput = input
range = range
stdin = sys.stdin
stdinbytes = sys.stdin.buffer
stdout = sys.stdout
stdoutbytes = sys.stdout.buffer
stderr = sys.stderr
stdin = sys.stdin.buffer
stdout = sys.stdout.buffer
stderr = sys.stderr.buffer
sysargv = sys.argv
@ -157,9 +150,7 @@ else:
ossep = os.sep
osaltsep = os.altsep
stdin = sys.stdin
stdinbytes = sys.stdin
stdout = sys.stdout
stdoutbytes = sys.stdout
stderr = sys.stderr
if getattr(sys, "argv", None) is not None:
sysargv = sys.argv
@ -168,7 +159,6 @@ else:
sysexecutable = sys.executable
shlexsplit = shlex.split
stringio = cStringIO.StringIO
stringutf8io = cStringIO.StringIO
maplist = map
ziplist = zip
rawinput = raw_input # noqa

View File

@ -7,7 +7,7 @@
from __future__ import absolute_import
import json
from . import json
class ReplayData(object):

View File

@ -27,8 +27,8 @@ class sshserver(wireproto.abstractserverproto):
self.ui = ui
self.repo = repo
self.lock = None
self.fin = ui.finbytes
self.fout = ui.foutbytes
self.fin = ui.fin
self.fout = ui.fout
self.name = "ssh"
hook.redirect(True)

View File

@ -142,12 +142,6 @@ class httppasswordmgrdbproxy(object):
return tuple(v for v in self._get_mgr().find_user_password(realm, uri))
class stdoutkind(Enum):
"used to remember the last output stream we used (we need to flush if we switch)"
TEXT = 1
BYTES = 2
def _catchterm(*args):
raise error.SignalInterrupt
@ -183,16 +177,13 @@ class ui(object):
self._colormode = None
self._terminfoparams = {}
self._styles = {}
self.laststdout = None
if src:
self._uiconfig = src._uiconfig.copy()
self.fout = src.fout
self.foutbytes = src.foutbytes
self.ferr = src.ferr
self.fin = src.fin
self.finbytes = src.finbytes
self.pageractive = src.pageractive
self._disablepager = src._disablepager
self._tweaked = src._tweaked
@ -212,10 +203,8 @@ class ui(object):
self._uiconfig = uiconfig.uiconfig()
self.fout = util.stdout
self.foutbytes = util.stdoutbytes
self.ferr = util.stderr
self.fin = util.stdin
self.finbytes = util.stdinbytes
self.pageractive = False
self._disablepager = False
self._tweaked = False
@ -655,13 +644,8 @@ class ui(object):
# type: (str) -> None
with progress.suspend():
starttime = util.timer()
if self.laststdout != stdoutkind.TEXT and not getattr(
self.foutbytes, "closed", False
):
self.foutbytes.flush()
self.laststdout = stdoutkind.TEXT
try:
self.fout.write("".join(msgs))
self.fout.write(encodeutf8("".join(msgs)))
except IOError as err:
raise error.StdioError(err)
finally:
@ -694,15 +678,8 @@ class ui(object):
def _writebytes(self, *msgs, **opts):
with progress.suspend():
starttime = util.timer()
if self.laststdout != stdoutkind.BYTES and not getattr(
self.fout, "closed", False
):
self.fout.flush()
self.laststdout = stdoutkind.BYTES
try:
if not getattr(self.fout, "closed", False):
self.fout.flush()
self.foutbytes.write(b"".join(msgs))
self.fout.write(b"".join(msgs))
except IOError as err:
raise error.StdioError(err)
finally:
@ -730,7 +707,7 @@ class ui(object):
self.fout.flush()
# Write all messages in a single operation as stderr may be
# unbuffered.
self.ferr.write("".join(msgs))
self.ferr.write(encodeutf8("".join(msgs)))
# stderr may be buffered under win32 when redirected to files,
# including stdout.
if not getattr(self.ferr, "closed", False):
@ -1153,7 +1130,7 @@ class ui(object):
# to interact with tty even if fin is not a tty.
with self.timeblockedsection("stdio"):
if self.configbool("ui", "nontty"):
l = self.fin.readline()
l = decodeutf8(self.fin.readline())
if not l:
raise EOFError
return l.rstrip("\n")

View File

@ -75,9 +75,7 @@ queue = pycompat.queue.Queue
socketserver = pycompat.socketserver
stderr = pycompat.stderr
stdin = pycompat.stdin
stdinbytes = pycompat.stdinbytes
stdout = pycompat.stdout
stdoutbytes = pycompat.stdoutbytes
stringio = pycompat.stringio
httpserver = urllibcompat.httpserver
@ -96,7 +94,7 @@ def isatty(fp):
# destined stdout with a pipe destined stdout (e.g. pager), we want line
# buffering
if isatty(stdout):
stdout = os.fdopen(stdout.fileno(), "w", 1)
stdout = os.fdopen(stdout.fileno(), "wb", 1)
if pycompat.iswindows:
from . import windows as platform

View File

@ -14,7 +14,6 @@ from __future__ import absolute_import
import functools
import hashlib
import json
import os
import tempfile
import time
@ -27,6 +26,7 @@ from . import (
error,
exchange,
extensions,
json,
peer,
pushkey as pushkeymod,
pycompat,
@ -190,6 +190,7 @@ def escapearg(plain):
.replace("=", ":e")
)
def unescapestringarg(escaped):
return (
escaped.replace(":e", "=")
@ -198,6 +199,7 @@ def unescapestringarg(escaped):
.replace(":c", ":")
)
def unescapebytearg(escaped):
return (
escaped.replace(b":e", b"=")
@ -1063,8 +1065,7 @@ def hello(repo, proto):
capabilities: space separated list of tokens
"""
return b"capabilities: %s\n" % (pycompat.encodeutf8(capabilities(repo,
proto)))
return b"capabilities: %s\n" % (pycompat.encodeutf8(capabilities(repo, proto)))
@wireprotocommand("listkeys", "namespace")

View File

@ -88,13 +88,12 @@ impl HgPython {
let call_args = {
let fin = read_to_py_object(py, &io.input);
let fout = write_to_py_object(py, &io.output);
let foutbytes = fout.clone_ref(py);
let ferr = match io.error {
None => fout.clone_ref(py),
Some(ref error) => write_to_py_object(py, error),
};
let args: Vec<Str> = args.into_iter().map(Str::from).collect();
(args, fin, fout, foutbytes, ferr).to_py_object(py)
(args, fin, fout, ferr).to_py_object(py)
};
entry_point_mod.call(py, "run", call_args, None)?;
Ok(())

View File

@ -40,9 +40,7 @@ class mockrepo(object):
class mockui(object):
def __init__(self, inbytes):
self.fin = io.BytesIO(inbytes)
self.finbytes = self.fin
self.fout = io.BytesIO()
self.foutbytes = self.fout
self.ferr = io.BytesIO()

View File

@ -24,7 +24,7 @@ ui_ = uimod.ui.load()
ui_.setconfig("ui", "formatted", "True")
# we're not interested in the output, so write that to devnull
ui_.fout = open(os.devnull, "w")
ui_.fout = open(os.devnull, "wb")
# call some arbitrary command just so we go through
# color's wrapped _runcommand twice.

View File

@ -251,13 +251,13 @@ Send unbundlereplay batch 3 (all good, this time with logging to files)
$ cat $TESTTMP/commands | hg sendunbundlereplaybatch --path ssh://user@dummy/server \
> --debug --reports $TESTTMP/reports.txt
creating a peer took: * (glob)
single wireproto command took: * (glob)
running * 'hg -R server serve --stdio' (glob)
sending hello command
sending between command
remote: * (glob)
remote: capabilities: * (glob)
remote: 1
single wireproto command took: * (glob)
using $TESTTMP/reports.txt as a reports file
sending unbundlereplay command
remote: pushing 1 changeset:

View File

@ -307,8 +307,8 @@ def _hg(*args, **kwargs):
stdin = kwargs.get("stdin") or ""
encoding.setfromenviron()
cwdbefore = os.getcwd()
fout = pycompat.stringutf8io()
fin = pycompat.stringutf8io(stdin)
fout = util.stringio()
fin = util.stringio(pycompat.encodeutf8(stdin))
sysargs = ["hg"] + list(args)
pycompat.sysargv = sysargs
status = bindings.commands.run(sysargs, fin, fout, fout)
@ -317,7 +317,7 @@ def _hg(*args, **kwargs):
# Revert side effect of --cwd
os.chdir(cwdbefore)
buf = fout.getvalue().rstrip()
return (status, buf)
return (status, pycompat.decodeutf8(buf))
# utilities in tinit.sh