py3: only use binary stdin/stdout/stderr

Summary:
Drop stdoutbytes/stdinbytes. They make things unnecessarily complicated
(especially for chg / Rust dispatch entry point).

The new idea is IO are using bytes. Text are written in utf-8 (Python 3) or
local encoding (Python 2). To make stdout behave reasonably on systems not
using utf-8 locale (ex. Windows), we might add a Rust binding to Rust's stdout,
which does the right thing:
- When writing to stdout console, expect text to be utf-8 encoded and do proper decoding.
- Wehn writing to stdout file, write the raw bytes without translation.

Note Python's `sys.stdout.buffer` does not do translation when writing to stdout console
like Rust's stdout.

For now, my main motivation of this change is to fix chg on Python 3.

Reviewed By: xavierd

Differential Revision: D19702533

fbshipit-source-id: 74704c83e1b200ff66fb3a2d23d97ff21c7239c8
This commit is contained in:
Jun Wu 2020-02-03 18:24:33 -08:00 committed by Facebook Github Bot
parent 16d4739981
commit 3e0b781197
21 changed files with 46 additions and 90 deletions

View File

@ -50,7 +50,7 @@ _fixsyspath()
del globals()["_fixsyspath"] del globals()["_fixsyspath"]
def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None): def run(args=None, fin=None, fout=None, ferr=None):
import sys import sys
if args is None: if args is None:
@ -83,4 +83,4 @@ def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None):
# so 'import dispatch' happens after demandimport # so 'import dispatch' happens after demandimport
from .mercurial import dispatch from .mercurial import dispatch
dispatch.run(args, fin, fout, foutbytes, ferr) dispatch.run(args, fin, fout, ferr)

View File

@ -25,6 +25,7 @@ import edenscmnative
from edenscm import hgext, mercurial from edenscm import hgext, mercurial
from edenscm.mercurial import registrar from edenscm.mercurial import registrar
from edenscm.mercurial.i18n import _ from edenscm.mercurial.i18n import _
from edenscm.mercurial.pycompat import decodeutf8
cmdtable = {} cmdtable = {}
@ -65,7 +66,7 @@ def debugshell(ui, repo, **opts):
return 0 return 0
if not ui.interactive(): if not ui.interactive():
command = ui.fin.read() command = decodeutf8(ui.fin.read())
exec(command) exec(command)
return 0 return 0

View File

@ -70,7 +70,7 @@ from edenscm.mercurial import (
) )
from edenscm.mercurial.i18n import _ from edenscm.mercurial.i18n import _
from edenscm.mercurial.node import bin, hex, nullid, nullrev from edenscm.mercurial.node import bin, hex, nullid, nullrev
from edenscm.mercurial.pycompat import queue, range from edenscm.mercurial.pycompat import encodeutf8, queue, range
wrapcommand = extensions.wrapcommand wrapcommand = extensions.wrapcommand
@ -893,7 +893,7 @@ def wraprepo(repo):
for k, v in sorted(pycompat.iteritems(refs)): for k, v in sorted(pycompat.iteritems(refs)):
if k != "tip": if k != "tip":
v = hex(v) v = hex(v)
sha = hashlib.sha1("%s%s%s" % (sha, k, v)).hexdigest() sha = hashlib.sha1(encodeutf8("%s%s%s" % (sha, k, v))).hexdigest()
return sha return sha
def _sqlsynchash(self): def _sqlsynchash(self):
@ -917,7 +917,7 @@ def wraprepo(repo):
] ]
# is it a new repo with empty references? # is it a new repo with empty references?
if sqlresults == [[]]: if sqlresults == [[]]:
return hashlib.sha1("%s%s" % ("tip", -1)).hexdigest() return hashlib.sha1(encodeutf8("%s%s" % ("tip", -1))).hexdigest()
# sqlresults looks like [[('59237a7416a6a1764ea088f0bc1189ea58d5b592',)]] # sqlresults looks like [[('59237a7416a6a1764ea088f0bc1189ea58d5b592',)]]
sqlsynchash = sqlresults[0][0][0] sqlsynchash = sqlresults[0][0][0]
if len(sqlsynchash) != 40: if len(sqlsynchash) != 40:

View File

@ -1137,7 +1137,7 @@ def _getgoal(opts):
def _readfile(ui, path): def _readfile(ui, path):
if path == "-": if path == "-":
with ui.timeblockedsection("histedit"): with ui.timeblockedsection("histedit"):
return ui.fin.read() return pycompat.decodeutf8(ui.fin.read())
else: else:
with open(path, "rb") as f: with open(path, "rb") as f:
return pycompat.decodeutf8(f.read()) return pycompat.decodeutf8(f.read())

View File

@ -7,13 +7,13 @@
from __future__ import absolute_import from __future__ import absolute_import
import json
import os import os
from edenscm.mercurial import ( from edenscm.mercurial import (
blobstore, blobstore,
error, error,
extensions, extensions,
json,
pathutil, pathutil,
perftrace, perftrace,
progress, progress,

View File

@ -20,6 +20,7 @@ from edenscm.mercurial import (
util, util,
) )
from edenscm.mercurial.i18n import _ from edenscm.mercurial.i18n import _
from edenscm.mercurial.pycompat import decodeutf8
from . import editsgenerator from . import editsgenerator
@ -136,7 +137,6 @@ class perftestsuite(object):
cwd=None, cwd=None,
env=None, env=None,
stderr=False, stderr=False,
utf8decode=True,
input=None, input=None,
timeout=0, timeout=0,
returncode=False, returncode=False,
@ -162,10 +162,8 @@ class perftestsuite(object):
else: else:
out, err = p.communicate() out, err = p.communicate()
if out is not None and utf8decode: out = decodeutf8(out)
out = out.decode("utf-8") err = decodeutf8(err)
if err is not None and utf8decode:
err = err.decode("utf-8")
if p.returncode != 0 and returncode is False: if p.returncode != 0 and returncode is False:
self.ui.warn(_("run call failed!\n")) self.ui.warn(_("run call failed!\n"))

View File

@ -948,7 +948,7 @@ def logmessage(repo, opts):
if not message and logfile: if not message and logfile:
try: try:
if isstdiofilename(logfile): if isstdiofilename(logfile):
message = ui.fin.read() message = pycompat.decodeutf8(ui.fin.read())
else: else:
message = pycompat.decodeutf8( message = pycompat.decodeutf8(
b"\n".join(util.readfile(logfile).splitlines()) b"\n".join(util.readfile(logfile).splitlines())

View File

@ -81,7 +81,7 @@ from .. import (
) )
from ..i18n import _ from ..i18n import _
from ..node import bin, hex, nullhex, nullid, nullrev, short from ..node import bin, hex, nullhex, nullid, nullrev, short
from ..pycompat import range from ..pycompat import decodeutf8, range
from .cmdtable import command from .cmdtable import command
@ -184,7 +184,7 @@ def debugbuilddag(
if text is None: if text is None:
ui.status(_("reading DAG from stdin\n")) ui.status(_("reading DAG from stdin\n"))
text = ui.fin.read() text = decodeutf8(ui.fin.read())
cl = repo.changelog cl = repo.changelog
if len(cl) > 0: if len(cl) > 0:
@ -3153,7 +3153,7 @@ def debugdrawdag(ui, repo, **opts):
Note that the revset cannot have confusing characters which can be seen as Note that the revset cannot have confusing characters which can be seen as
the part of the graph edges, like `|/+-\`. the part of the graph edges, like `|/+-\`.
""" """
text = ui.fin.read() text = decodeutf8(ui.fin.read())
return drawdag.drawdag(repo, text, **opts) return drawdag.drawdag(repo, text, **opts)

View File

@ -65,7 +65,6 @@ class request(object):
repo=None, repo=None,
fin=None, fin=None,
fout=None, fout=None,
foutbytes=None,
ferr=None, ferr=None,
prereposetups=None, prereposetups=None,
): ):
@ -76,7 +75,6 @@ class request(object):
# input/output/error streams # input/output/error streams
self.fin = fin self.fin = fin
self.fout = fout self.fout = fout
self.foutbytes = foutbytes
self.ferr = ferr self.ferr = ferr
# remember options pre-parsed by _earlyparseopts() # remember options pre-parsed by _earlyparseopts()
@ -124,12 +122,12 @@ class request(object):
raise exc raise exc
def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None): def run(args=None, fin=None, fout=None, ferr=None):
"run the command in sys.argv" "run the command in sys.argv"
_initstdio() _initstdio()
if args is None: if args is None:
args = pycompat.sysargv args = pycompat.sysargv
req = request(args[1:], fin=fin, fout=fout, foutbytes=foutbytes, ferr=ferr) req = request(args[1:], fin=fin, fout=fout, ferr=ferr)
err = None err = None
try: try:
status = (dispatch(req) or 0) & 255 status = (dispatch(req) or 0) & 255
@ -142,12 +140,6 @@ def run(args=None, fin=None, fout=None, foutbytes=None, ferr=None):
except IOError as e: except IOError as e:
err = e err = e
status = -1 status = -1
if util.safehasattr(req.ui, "foutbytes"):
try:
req.ui.foutbytes.flush()
except IOError as e:
err = e
status = -1
if util.safehasattr(req.ui, "ferr"): if util.safehasattr(req.ui, "ferr"):
if err is not None and err.errno != errno.EPIPE: if err is not None and err.errno != errno.EPIPE:
req.ui.ferr.write("abort: %s\n" % encoding.strtolocal(err.strerror)) req.ui.ferr.write("abort: %s\n" % encoding.strtolocal(err.strerror))
@ -404,8 +396,6 @@ def dispatch(req):
req.ui.fin = req.fin req.ui.fin = req.fin
if req.fout: if req.fout:
req.ui.fout = req.fout req.ui.fout = req.fout
if req.foutbytes:
req.ui.foutbytes = req.foutbytes
if req.ferr: if req.ferr:
req.ui.ferr = req.ferr req.ui.ferr = req.ferr
except error.Abort as inst: except error.Abort as inst:

View File

@ -74,8 +74,12 @@ def loads(string):
if sys.version_info[0] < 3: if sys.version_info[0] < 3:
# XXX: This should round-trip with "dumps". But it might be non-trivial to # XXX: This should round-trip with "dumps". But it might be non-trivial to
# do so. # do so.
return _rapply( def encode(s):
lambda s: pycompat.decodeutf8(s.encode("utf-8")), _sysjson.loads(string) if isinstance(s, type(u"")):
) return pycompat.decodeutf8(s.encode("utf-8"))
else:
return s
return _rapply(encode, _sysjson.loads(string))
else: else:
return _sysjson.loads(string) return _sysjson.loads(string)

View File

@ -69,22 +69,15 @@ if sys.version_info[0] >= 3:
sysplatform = sys.platform sysplatform = sys.platform
sysexecutable = sys.executable sysexecutable = sys.executable
# Upstream added stringio which conforms to their end goal of using bytes
# everywhere. We want to use String in Python 3 and Bytes/str in Python 2,
# so we need a different io abstraction here, which I call stringutf8io. We
# should probably remove stringio.
stringio = io.BytesIO stringio = io.BytesIO
stringutf8io = io.StringIO
maplist = lambda *args: list(map(*args)) maplist = lambda *args: list(map(*args))
ziplist = lambda *args: list(zip(*args)) ziplist = lambda *args: list(zip(*args))
rawinput = input rawinput = input
range = range range = range
stdin = sys.stdin stdin = sys.stdin.buffer
stdinbytes = sys.stdin.buffer stdout = sys.stdout.buffer
stdout = sys.stdout stderr = sys.stderr.buffer
stdoutbytes = sys.stdout.buffer
stderr = sys.stderr
sysargv = sys.argv sysargv = sys.argv
@ -157,9 +150,7 @@ else:
ossep = os.sep ossep = os.sep
osaltsep = os.altsep osaltsep = os.altsep
stdin = sys.stdin stdin = sys.stdin
stdinbytes = sys.stdin
stdout = sys.stdout stdout = sys.stdout
stdoutbytes = sys.stdout
stderr = sys.stderr stderr = sys.stderr
if getattr(sys, "argv", None) is not None: if getattr(sys, "argv", None) is not None:
sysargv = sys.argv sysargv = sys.argv
@ -168,7 +159,6 @@ else:
sysexecutable = sys.executable sysexecutable = sys.executable
shlexsplit = shlex.split shlexsplit = shlex.split
stringio = cStringIO.StringIO stringio = cStringIO.StringIO
stringutf8io = cStringIO.StringIO
maplist = map maplist = map
ziplist = zip ziplist = zip
rawinput = raw_input # noqa rawinput = raw_input # noqa

View File

@ -7,7 +7,7 @@
from __future__ import absolute_import from __future__ import absolute_import
import json from . import json
class ReplayData(object): class ReplayData(object):

View File

@ -27,8 +27,8 @@ class sshserver(wireproto.abstractserverproto):
self.ui = ui self.ui = ui
self.repo = repo self.repo = repo
self.lock = None self.lock = None
self.fin = ui.finbytes self.fin = ui.fin
self.fout = ui.foutbytes self.fout = ui.fout
self.name = "ssh" self.name = "ssh"
hook.redirect(True) hook.redirect(True)

View File

@ -142,12 +142,6 @@ class httppasswordmgrdbproxy(object):
return tuple(v for v in self._get_mgr().find_user_password(realm, uri)) return tuple(v for v in self._get_mgr().find_user_password(realm, uri))
class stdoutkind(Enum):
"used to remember the last output stream we used (we need to flush if we switch)"
TEXT = 1
BYTES = 2
def _catchterm(*args): def _catchterm(*args):
raise error.SignalInterrupt raise error.SignalInterrupt
@ -183,16 +177,13 @@ class ui(object):
self._colormode = None self._colormode = None
self._terminfoparams = {} self._terminfoparams = {}
self._styles = {} self._styles = {}
self.laststdout = None
if src: if src:
self._uiconfig = src._uiconfig.copy() self._uiconfig = src._uiconfig.copy()
self.fout = src.fout self.fout = src.fout
self.foutbytes = src.foutbytes
self.ferr = src.ferr self.ferr = src.ferr
self.fin = src.fin self.fin = src.fin
self.finbytes = src.finbytes
self.pageractive = src.pageractive self.pageractive = src.pageractive
self._disablepager = src._disablepager self._disablepager = src._disablepager
self._tweaked = src._tweaked self._tweaked = src._tweaked
@ -212,10 +203,8 @@ class ui(object):
self._uiconfig = uiconfig.uiconfig() self._uiconfig = uiconfig.uiconfig()
self.fout = util.stdout self.fout = util.stdout
self.foutbytes = util.stdoutbytes
self.ferr = util.stderr self.ferr = util.stderr
self.fin = util.stdin self.fin = util.stdin
self.finbytes = util.stdinbytes
self.pageractive = False self.pageractive = False
self._disablepager = False self._disablepager = False
self._tweaked = False self._tweaked = False
@ -655,13 +644,8 @@ class ui(object):
# type: (str) -> None # type: (str) -> None
with progress.suspend(): with progress.suspend():
starttime = util.timer() starttime = util.timer()
if self.laststdout != stdoutkind.TEXT and not getattr(
self.foutbytes, "closed", False
):
self.foutbytes.flush()
self.laststdout = stdoutkind.TEXT
try: try:
self.fout.write("".join(msgs)) self.fout.write(encodeutf8("".join(msgs)))
except IOError as err: except IOError as err:
raise error.StdioError(err) raise error.StdioError(err)
finally: finally:
@ -694,15 +678,8 @@ class ui(object):
def _writebytes(self, *msgs, **opts): def _writebytes(self, *msgs, **opts):
with progress.suspend(): with progress.suspend():
starttime = util.timer() starttime = util.timer()
if self.laststdout != stdoutkind.BYTES and not getattr(
self.fout, "closed", False
):
self.fout.flush()
self.laststdout = stdoutkind.BYTES
try: try:
if not getattr(self.fout, "closed", False): self.fout.write(b"".join(msgs))
self.fout.flush()
self.foutbytes.write(b"".join(msgs))
except IOError as err: except IOError as err:
raise error.StdioError(err) raise error.StdioError(err)
finally: finally:
@ -730,7 +707,7 @@ class ui(object):
self.fout.flush() self.fout.flush()
# Write all messages in a single operation as stderr may be # Write all messages in a single operation as stderr may be
# unbuffered. # unbuffered.
self.ferr.write("".join(msgs)) self.ferr.write(encodeutf8("".join(msgs)))
# stderr may be buffered under win32 when redirected to files, # stderr may be buffered under win32 when redirected to files,
# including stdout. # including stdout.
if not getattr(self.ferr, "closed", False): if not getattr(self.ferr, "closed", False):
@ -1153,7 +1130,7 @@ class ui(object):
# to interact with tty even if fin is not a tty. # to interact with tty even if fin is not a tty.
with self.timeblockedsection("stdio"): with self.timeblockedsection("stdio"):
if self.configbool("ui", "nontty"): if self.configbool("ui", "nontty"):
l = self.fin.readline() l = decodeutf8(self.fin.readline())
if not l: if not l:
raise EOFError raise EOFError
return l.rstrip("\n") return l.rstrip("\n")

View File

@ -75,9 +75,7 @@ queue = pycompat.queue.Queue
socketserver = pycompat.socketserver socketserver = pycompat.socketserver
stderr = pycompat.stderr stderr = pycompat.stderr
stdin = pycompat.stdin stdin = pycompat.stdin
stdinbytes = pycompat.stdinbytes
stdout = pycompat.stdout stdout = pycompat.stdout
stdoutbytes = pycompat.stdoutbytes
stringio = pycompat.stringio stringio = pycompat.stringio
httpserver = urllibcompat.httpserver httpserver = urllibcompat.httpserver
@ -96,7 +94,7 @@ def isatty(fp):
# destined stdout with a pipe destined stdout (e.g. pager), we want line # destined stdout with a pipe destined stdout (e.g. pager), we want line
# buffering # buffering
if isatty(stdout): if isatty(stdout):
stdout = os.fdopen(stdout.fileno(), "w", 1) stdout = os.fdopen(stdout.fileno(), "wb", 1)
if pycompat.iswindows: if pycompat.iswindows:
from . import windows as platform from . import windows as platform

View File

@ -14,7 +14,6 @@ from __future__ import absolute_import
import functools import functools
import hashlib import hashlib
import json
import os import os
import tempfile import tempfile
import time import time
@ -27,6 +26,7 @@ from . import (
error, error,
exchange, exchange,
extensions, extensions,
json,
peer, peer,
pushkey as pushkeymod, pushkey as pushkeymod,
pycompat, pycompat,
@ -190,6 +190,7 @@ def escapearg(plain):
.replace("=", ":e") .replace("=", ":e")
) )
def unescapestringarg(escaped): def unescapestringarg(escaped):
return ( return (
escaped.replace(":e", "=") escaped.replace(":e", "=")
@ -198,6 +199,7 @@ def unescapestringarg(escaped):
.replace(":c", ":") .replace(":c", ":")
) )
def unescapebytearg(escaped): def unescapebytearg(escaped):
return ( return (
escaped.replace(b":e", b"=") escaped.replace(b":e", b"=")
@ -1063,8 +1065,7 @@ def hello(repo, proto):
capabilities: space separated list of tokens capabilities: space separated list of tokens
""" """
return b"capabilities: %s\n" % (pycompat.encodeutf8(capabilities(repo, return b"capabilities: %s\n" % (pycompat.encodeutf8(capabilities(repo, proto)))
proto)))
@wireprotocommand("listkeys", "namespace") @wireprotocommand("listkeys", "namespace")

View File

@ -88,13 +88,12 @@ impl HgPython {
let call_args = { let call_args = {
let fin = read_to_py_object(py, &io.input); let fin = read_to_py_object(py, &io.input);
let fout = write_to_py_object(py, &io.output); let fout = write_to_py_object(py, &io.output);
let foutbytes = fout.clone_ref(py);
let ferr = match io.error { let ferr = match io.error {
None => fout.clone_ref(py), None => fout.clone_ref(py),
Some(ref error) => write_to_py_object(py, error), Some(ref error) => write_to_py_object(py, error),
}; };
let args: Vec<Str> = args.into_iter().map(Str::from).collect(); let args: Vec<Str> = args.into_iter().map(Str::from).collect();
(args, fin, fout, foutbytes, ferr).to_py_object(py) (args, fin, fout, ferr).to_py_object(py)
}; };
entry_point_mod.call(py, "run", call_args, None)?; entry_point_mod.call(py, "run", call_args, None)?;
Ok(()) Ok(())

View File

@ -40,9 +40,7 @@ class mockrepo(object):
class mockui(object): class mockui(object):
def __init__(self, inbytes): def __init__(self, inbytes):
self.fin = io.BytesIO(inbytes) self.fin = io.BytesIO(inbytes)
self.finbytes = self.fin
self.fout = io.BytesIO() self.fout = io.BytesIO()
self.foutbytes = self.fout
self.ferr = io.BytesIO() self.ferr = io.BytesIO()

View File

@ -24,7 +24,7 @@ ui_ = uimod.ui.load()
ui_.setconfig("ui", "formatted", "True") ui_.setconfig("ui", "formatted", "True")
# we're not interested in the output, so write that to devnull # we're not interested in the output, so write that to devnull
ui_.fout = open(os.devnull, "w") ui_.fout = open(os.devnull, "wb")
# call some arbitrary command just so we go through # call some arbitrary command just so we go through
# color's wrapped _runcommand twice. # color's wrapped _runcommand twice.

View File

@ -251,13 +251,13 @@ Send unbundlereplay batch 3 (all good, this time with logging to files)
$ cat $TESTTMP/commands | hg sendunbundlereplaybatch --path ssh://user@dummy/server \ $ cat $TESTTMP/commands | hg sendunbundlereplaybatch --path ssh://user@dummy/server \
> --debug --reports $TESTTMP/reports.txt > --debug --reports $TESTTMP/reports.txt
creating a peer took: * (glob) creating a peer took: * (glob)
single wireproto command took: * (glob)
running * 'hg -R server serve --stdio' (glob) running * 'hg -R server serve --stdio' (glob)
sending hello command sending hello command
sending between command sending between command
remote: * (glob) remote: * (glob)
remote: capabilities: * (glob) remote: capabilities: * (glob)
remote: 1 remote: 1
single wireproto command took: * (glob)
using $TESTTMP/reports.txt as a reports file using $TESTTMP/reports.txt as a reports file
sending unbundlereplay command sending unbundlereplay command
remote: pushing 1 changeset: remote: pushing 1 changeset:

View File

@ -307,8 +307,8 @@ def _hg(*args, **kwargs):
stdin = kwargs.get("stdin") or "" stdin = kwargs.get("stdin") or ""
encoding.setfromenviron() encoding.setfromenviron()
cwdbefore = os.getcwd() cwdbefore = os.getcwd()
fout = pycompat.stringutf8io() fout = util.stringio()
fin = pycompat.stringutf8io(stdin) fin = util.stringio(pycompat.encodeutf8(stdin))
sysargs = ["hg"] + list(args) sysargs = ["hg"] + list(args)
pycompat.sysargv = sysargs pycompat.sysargv = sysargs
status = bindings.commands.run(sysargs, fin, fout, fout) status = bindings.commands.run(sysargs, fin, fout, fout)
@ -317,7 +317,7 @@ def _hg(*args, **kwargs):
# Revert side effect of --cwd # Revert side effect of --cwd
os.chdir(cwdbefore) os.chdir(cwdbefore)
buf = fout.getvalue().rstrip() buf = fout.getvalue().rstrip()
return (status, buf) return (status, pycompat.decodeutf8(buf))
# utilities in tinit.sh # utilities in tinit.sh