sapling/eden/integration/unicode_test.py
Xavier Deguillard 8853701e91 path: forbid building non-utf8 paths
Summary:
The world has moved on utf-8 as the default encoding for files and data, but
EdenFS still accepts non utf-8 filenames to be written to it. In fact, most of
the time when a non utf-8 file is written to the working copy, and even though
EdenFS handles it properly, Mercurial ends up freaking out and crash. In all of
these cases, non-utf8 files were not intentional, and thus refusing to create
them wouldn't be a loss of functionality.

Note that this diff makes the asumption that Mercurial's manifest only accept
utf8 path, and thus we only have to protect against files being created in the
working copy that aren't utf8.

The unfortunate part of this diff is that it makes importing trees a bit more
expensive as testing that a path is utf8 valid is not free.

Reviewed By: chadaustin

Differential Revision: D25442975

fbshipit-source-id: 89341a004272736a61639751da43c2e9c673d5b3
2021-02-23 11:35:12 -08:00

54 lines
1.5 KiB
Python

#!/usr/bin/env python3
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2.
# pyre-strict
import errno
import os
import sys
from contextlib import contextmanager
from typing import Generator
from .lib import testcase
@testcase.eden_repo_test
# pyre-ignore[13]: T62487924
class UnicodeTest(testcase.EdenRepoTest):
"""Verify that non-utf8 files cannot be created on EdenFS."""
non_utf8_path: bytes
def populate_repo(self) -> None:
self.repo.write_file("a", "a")
self.repo.commit("Initial commit.")
self.non_utf8_path = os.path.join(self.mount.encode("utf-8"), b"\xff\xfffoobar")
@contextmanager
def verifyUtf8Error(self) -> Generator[None, None, None]:
if sys.platform == "win32":
with self.assertRaises(UnicodeDecodeError):
yield
else:
with self.assertRaises(OSError) as exc:
yield
self.assertEqual(errno.EILSEQ, exc.exception.errno)
def test_mkdir_non_utf8(self) -> None:
with self.verifyUtf8Error():
os.mkdir(self.non_utf8_path)
def test_create_file_non_utf8(self) -> None:
with self.verifyUtf8Error():
with open(self.non_utf8_path, "w") as f:
f.write("foo")
def test_rename_non_utf8(self) -> None:
with self.verifyUtf8Error():
os.rename(os.path.join(self.mount, "a"), self.non_utf8_path)