diff --git a/eden/scm/edenscm/mercurial/patch.py b/eden/scm/edenscm/mercurial/patch.py index 5931622416..57861db407 100644 --- a/eden/scm/edenscm/mercurial/patch.py +++ b/eden/scm/edenscm/mercurial/patch.py @@ -251,13 +251,7 @@ def extract(ui, fileobj): ui.debug("Content-Type: %s\n" % content_type) if content_type not in ok_types: continue - if sys.version_info[0] >= 3: - # The message was surrogateescape encoded, so we need to undo - # that. - payload = part.get_payload() - payload = payload.encode("ascii", errors="surrogateescape") - else: - payload = part.get_payload(decode=True) + payload = part.get_payload(decode=True) m = diffre.search(payload) if m: hgpatch = False diff --git a/eden/scm/edenscm/mercurial/pycompat.py b/eden/scm/edenscm/mercurial/pycompat.py index ec46270d73..2b31262729 100644 --- a/eden/scm/edenscm/mercurial/pycompat.py +++ b/eden/scm/edenscm/mercurial/pycompat.py @@ -157,8 +157,10 @@ if sys.version_info[0] >= 3: ep = email.parser.Parser() # disable the "universal newlines" mode, which isn't binary safe. - # We'll have to use surrogateescape when encoding the string back to - # bytes later. + # Note, although we specific ascii+surrogateescape decoding here, we don't have + # to specify it elsewhere for reencoding as the email.parser detects the + # surrogates and automatically chooses the appropriate encoding. + # See: https://github.com/python/cpython/blob/3.8/Lib/email/message.py::get_payload() fp = io.TextIOWrapper( fp, encoding=r"ascii", errors=r"surrogateescape", newline=chr(10) ) diff --git a/eden/scm/tests/test-import-t.py b/eden/scm/tests/test-import-t.py index 873438b5a7..fecc86a112 100644 --- a/eden/scm/tests/test-import-t.py +++ b/eden/scm/tests/test-import-t.py @@ -1,3 +1,4 @@ +# coding=utf-8 # Copyright (c) Facebook, Inc. and its affiliates. # Copyright (c) Mercurial Contributors. # @@ -1497,3 +1498,12 @@ sh % "printf 'diff --git a/a b/b\\nrename from a\\nrename to b'" | "hg import -" a not tracked! abort: source file 'a' does not exist [255]""" + +# Verify that utf-8 characters in patches can be imported +open("unicode.txt", "w").write("echo 🍺") +sh % "hg commit -Aqm unicode" +sh % "hg rm unicode.txt" +sh % "hg commit -qm remove" +sh % "hg export --rev 'desc(unicode)'" | "hg import -" == r""" + applying patch from stdin +"""