add code to test saved snapshots

Summary:
Update the snapshot framework code to add tests that mount saved snapshots
with the current edenfs build and verifies that they behave as expected.

This also changes the snapshot generation code a fair amount as well, in
order to be able to verify the snapshot data:
- We now more cleanly split out data that should be saved when the snapshot is
  first generated (such as the `.eden` state directory) and data that should
  be re-generated each time the snapshot is unpacked (such as the `/etc/eden`
  config directory and the system configuration for `hg`).
- The code is now capable of rewriting absolute paths in the Eden state files
  so it is usable in a new location after it has been unpacked.

This also updates the "basic" snapshot type to create a wider variety of types
of files and file changes, and to be able to verify the contents of a resumed
snapshot.

Reviewed By: strager

Differential Revision: D9955411

fbshipit-source-id: 76012c01016cf4d89dc611c5596a05f5e7f013ed
This commit is contained in:
Adam Simpkins 2018-10-31 12:08:27 -07:00 committed by Facebook Github Bot
parent e3dbed8148
commit 49a7c05d61
5 changed files with 746 additions and 66 deletions

View File

@ -9,23 +9,32 @@
import abc
import contextlib
import datetime
import json
import logging
import os
import socket
import stat
import subprocess
import tempfile
import time
import types
import typing
from pathlib import Path
from typing import Callable, Dict, Iterator, List, Optional, Type, TypeVar, Union
from typing import Any, Callable, Dict, Iterator, List, Optional, Type, TypeVar, Union
import toml
from eden.integration.lib import edenclient, hgrepo, util
from eden.integration.lib.find_executables import FindExe
from eden.integration.lib.temporary_directory import create_tmp_dir
from . import verify as verify_mod
T = TypeVar("T", bound="BaseSnapshot")
class BaseSnapshot:
class BaseSnapshot(metaclass=abc.ABCMeta):
# The NAME and DESCRIPTION class fields are intended to be overridden on subclasses
# by the @snapshot_class decorator.
NAME = "Base Snapshot Class"
@ -33,7 +42,22 @@ class BaseSnapshot:
def __init__(self, base_dir: Path) -> None:
self.base_dir = base_dir
self.eden: Optional[edenclient.EdenFS] = None
# All data inside self.data_dir will be saved as part of the snapshot
self.data_dir = self.base_dir / "data"
# Anything inside self.transient_dir will not be saved with the snapshot,
# and will always be regenerated from scratch when resuming a snapshot.
self.transient_dir = self.base_dir / "transient"
self.eden_state_dir = self.data_dir / "eden"
# We put the etc eden directory inside the transient directory.
# Whenever we resume a snapshot we want to use a current version of the edenfs
# daemon and its configuration, rather than an old copy of the edenfs
# configuration.
self.etc_eden_dir = self.transient_dir / "etc_eden"
# We put the home directory inside the transient directory as well.
self.home_dir = self.transient_dir / "home"
def __enter__(self: T) -> T:
return self
@ -44,15 +68,7 @@ class BaseSnapshot:
exc_value: Optional[BaseException],
tb: Optional[types.TracebackType],
) -> None:
self.cleanup()
def cleanup(self) -> None:
if self.eden is not None:
try:
self.eden.kill()
except Exception as ex:
logging.exception("error stopping edenfs")
self.eden = None
pass
def create_tarball(self, output_path: Path) -> None:
"""Create a tarball from the snapshot contents.
@ -85,46 +101,209 @@ class BaseSnapshot:
subprocess.check_call(cmd, cwd=self.base_dir)
def generate(self) -> None:
self._setup_directories()
"""Generate the snapshot data.
This method should normally be called after constructing the snapshot object
pointing to an empty directory.
"""
self._create_directories()
self._emit_metadata()
self.gen_before_eden_running()
self.eden = edenclient.EdenFS(
with self.edenfs() as eden:
eden.start()
self.gen_eden_running(eden)
self.gen_after_eden_stopped()
# Rewrite the config state to point to "/tmp/dummy_snapshot_path"
# This isn't really strictly necessary, but just makes the state that
# gets saved slightly more deterministic.
self._relocate_to(Path("/tmp/dummy_snapshot_path"))
def verify(self, verifier: verify_mod.SnapshotVerifier) -> None:
"""Verify that the snapshot data looks correct.
This is generally invoked by tests to confirm that an unpacked snapshot still
works properly with the current version of EdenFS.
"""
with self.edenfs() as eden:
eden.start()
print("Verifing snapshot data:")
print("=" * 60)
self.verify_snapshot_data(verifier, eden)
print("=" * 60)
def edenfs(self) -> edenclient.EdenFS:
"""Return an EdenFS object that can be used to run an edenfs daemon for this
snapshot.
The returned EdenFS object will not be started yet; the caller must explicitly
call start() on it.
"""
return edenclient.EdenFS(
eden_dir=str(self.eden_state_dir),
etc_eden_dir=str(self.etc_eden_dir),
home_dir=str(self.home_dir),
storage_engine="rocksdb",
)
try:
self.eden.start()
self.gen_eden_running()
finally:
self.eden.kill()
self.eden = None
self.gen_after_eden_stopped()
def resume(self) -> None:
"""Prepare a snapshot to be resumed after unpacking it.
def _setup_directories(self) -> None:
self.data_dir = self.base_dir / "data"
This updates the snapshot data so it can be run from its new location,
and recreates any transient state needed for the snapshot.
"""
self.create_transient_dir()
self._relocate_to(self.base_dir)
self.prep_resume()
def _create_directories(self) -> None:
self.data_dir.mkdir()
self.create_transient_dir()
self.eden_state_dir = self.data_dir / "eden"
self.etc_eden_dir = self.data_dir / "etc_eden"
def create_transient_dir(self) -> None:
self.transient_dir.mkdir()
self.etc_eden_dir.mkdir()
self.home_dir = self.data_dir / "home"
self.home_dir.mkdir()
# Set up configuration and hooks inside the etc eden directory.
hooks_dir = self.etc_eden_dir / "hooks"
hooks_dir.mkdir()
os.symlink(FindExe.EDEN_POST_CLONE_HOOK, hooks_dir / "post-clone")
config_dir = self.etc_eden_dir / "config.d"
config_dir.mkdir()
# Set the hg.edenextension path to the empty string, so that
# we use the version of the eden extension built into hg.par
toml_config = {"hooks": {"hg.edenextension": ""}}
with (config_dir / "hooks").open("w") as f:
toml.dump(toml_config, f)
def _emit_metadata(self) -> None:
now = time.time()
# In addition to recording the current time as a unix timestamp,
# we also store a tuple of (year, month, day). This is primarily to help make
# it easier for future verification code if we ever need to alter the
# verification logic for older versions of the same snapshot type.
# This will allow more human-readable time comparisons in the code, and makes it
# easier to compare just based on a prefix of this tuple.
now_date = datetime.datetime.fromtimestamp(now)
date_tuple = (
now_date.year,
now_date.month,
now_date.day,
now_date.hour,
now_date.minute,
now_date.second,
)
data = {
"type": self.NAME,
"description": self.DESCRIPTION,
"time_created": time.time(),
"time_created": int(now),
"date_created": date_tuple,
"base_dir": str(self.base_dir),
}
self._write_metadata(data)
metadata_path = self.data_dir / "info.json"
with metadata_path.open("w") as f:
@property
def _metadata_path(self) -> Path:
return self.data_dir / "info.json"
def _write_metadata(self, data: Dict[str, Any]) -> None:
with self._metadata_path.open("w") as f:
json.dump(data, f, indent=2, sort_keys=True)
def _read_metadata(self) -> Dict[str, Any]:
with self._metadata_path.open("r") as f:
return typing.cast(Dict[str, Any], json.load(f))
def _relocate_to(self, base_dir: Path) -> None:
"""Rewrite data inside an unpacked snapshot directory to refer to the base
directory using the specified path.
This replaces absolute path names in various data files to refer to the new
location. This is needed so that a snapshot originally created in one location
can be unpacked and used in another location.
"""
info = self._read_metadata()
old_base_dir = Path(info["base_dir"])
# A few files in the RocksDB directory end up with the absolute path
# embedded in them.
rocks_db_path = self.eden_state_dir / "storage" / "rocks-db"
for entry in rocks_db_path.iterdir():
if entry.name.startswith("LOG") or entry.name.startswith("OPTIONS"):
self._replace_file_contents(entry, bytes(old_base_dir), bytes(base_dir))
# Parse eden's config.json to get the list of checkouts, and update each one.
eden_config_path = self.eden_state_dir / "config.json"
with eden_config_path.open("r+") as config_file:
eden_data = json.load(config_file)
new_config_data = {}
for _old_checkout_path, checkout_name in eden_data.items():
new_checkout_path = self.data_dir / checkout_name
new_config_data[str(new_checkout_path)] = checkout_name
checkout_state_dir = self.eden_state_dir / "clients" / checkout_name
self._relocate_checkout(checkout_state_dir, old_base_dir, base_dir)
config_file.seek(0)
config_file.truncate()
json.dump(new_config_data, config_file, indent=2, sort_keys=True)
# Update the info file with the new base path
info["base_dir"] = str(base_dir)
self._write_metadata(info)
def _relocate_checkout(
self, checkout_state_dir: Path, old_base_dir: Path, new_base_dir: Path
) -> None:
self._replace_file_contents(
checkout_state_dir / "config.toml", bytes(old_base_dir), bytes(new_base_dir)
)
overlay_dir = checkout_state_dir / "local"
self._relocate_overlay_dir(
overlay_dir, bytes(old_base_dir), bytes(new_base_dir)
)
def _relocate_overlay_dir(
self, dir_path: Path, old_data: bytes, new_data: bytes
) -> None:
# Recursively update the contents for every file in the overlay
# if it contains the old path.
#
# This approach is pretty dumb: we aren't processing the overlay file formats at
# all, just blindly replacing the contents if we happen to see something that
# looks like the old path. For now this is the easiest thing to do, and the
# chance of other data looking like the source path should be very unlikely.
#
# In practice we normally need to update the overlay files for at least the
# following inodes:
# .eden/root
# .eden/client
# .eden/socket
# .hg/sharedpath
#
for path in dir_path.iterdir():
stat_info = path.lstat()
if stat.S_ISDIR(stat_info.st_mode):
self._relocate_overlay_dir(path, old_data, new_data)
else:
self._replace_file_contents(path, old_data, new_data)
def _replace_file_contents(
self, path: Path, old_data: bytes, new_data: bytes
) -> None:
with path.open("rb+") as f:
file_contents = f.read()
new_contents = file_contents.replace(old_data, new_data)
if new_contents != file_contents:
f.seek(0)
f.truncate()
f.write(new_contents)
def gen_before_eden_running(self) -> None:
"""gen_before_eden_running() will be called when generating a new snapshot after
the directory structure has been set up but before edenfs is started.
@ -133,7 +312,7 @@ class BaseSnapshot:
"""
pass
def gen_eden_running(self) -> None:
def gen_eden_running(self, eden: edenclient.EdenFS) -> None:
"""gen_eden_running() will be called when generating a new snapshot once edenfs
has been started.
@ -149,37 +328,60 @@ class BaseSnapshot:
"""
pass
def prep_resume(self) -> None:
"""prep_resume() will be when preparing to resume a snapshot, before edenfs has
been started.
Subclasses of BaseSnapshot can perform any work they want here.
here.
"""
pass
@abc.abstractmethod
def verify_snapshot_data(
self, verifier: verify_mod.SnapshotVerifier, eden: edenclient.EdenFS
) -> None:
"""Verify that the snapshot data looks correct.
This method should be overridden by subclasses.
"""
pass
class HgSnapshot(BaseSnapshot, metaclass=abc.ABCMeta):
"""A helper parent class for BaseSnapshot implementations that creates a single
checkout of a mercurial repository."""
def gen_before_eden_running(self) -> None:
# Prepare the system hgrc file
self.system_hgrc_path = self.data_dir / "system_hgrc"
def create_transient_dir(self) -> None:
super().create_transient_dir()
# Note that we put the system hgrc file in self.transient_dir rather than
# self.data_dir:
# This file is not saved with the snapshot, and is instead regenerated each time
# we unpack the snapshot. This reflects the fact that we always run with the
# current system hgrc rather than an old snapshot of the system configs.
self.system_hgrc_path = self.transient_dir / "system_hgrc"
self.system_hgrc_path.write_text(hgrepo.HgRepository.get_system_hgrc_contents())
def hg_repo(self, path: Path) -> hgrepo.HgRepository:
return hgrepo.HgRepository(str(path), system_hgrc=str(self.system_hgrc_path))
def gen_before_eden_running(self) -> None:
logging.info("Creating backing repository...")
# Create the repository
backing_repo_path = self.data_dir / "repo"
backing_repo_path.mkdir()
self.backing_repo = hgrepo.HgRepository(
str(backing_repo_path), system_hgrc=str(self.system_hgrc_path)
)
self.backing_repo = self.hg_repo(backing_repo_path)
self.backing_repo.init()
self.populate_backing_repo()
def gen_eden_running(self) -> None:
assert self.eden is not None
def gen_eden_running(self, eden: edenclient.EdenFS) -> None:
logging.info("Preparing checkout...")
checkout_path = self.data_dir / "checkout"
self.eden.clone(self.backing_repo.path, str(checkout_path))
eden.clone(self.backing_repo.path, str(self.checkout_path))
self.checkout_repo = hgrepo.HgRepository(
str(checkout_path), system_hgrc=str(self.system_hgrc_path)
)
self.checkout_repo = self.hg_repo(self.checkout_path)
self.populate_checkout()
@abc.abstractmethod
@ -190,32 +392,62 @@ class HgSnapshot(BaseSnapshot, metaclass=abc.ABCMeta):
def populate_checkout(self) -> None:
pass
def checkout_path(self, *args: Union[Path, str]) -> Path:
"""Compute a path inside the checkout."""
return Path(self.checkout_repo.path, *args)
@property
def checkout_path(self) -> Path:
"""Return the path to the checkout root."""
return self.data_dir / "checkout"
def read_file(self, path: Union[Path, str]) -> bytes:
"""Helper function to read a file in the checkout.
This is primarily used to ensure that the file is loaded.
"""
file_path = self.checkout_path(path)
file_path = self.checkout_path / path
with file_path.open("rb") as f:
data: bytes = f.read()
return data
def write_file(self, path: Union[Path, str], contents: bytes) -> None:
def write_file(
self, path: Union[Path, str], contents: bytes, mode: int = 0o644
) -> None:
"""Helper function to write a file in the checkout."""
file_path = self.checkout_path(path)
file_path = self.checkout_path / path
file_path.parent.mkdir(parents=True, exist_ok=True)
with file_path.open("wb") as f:
os.fchmod(f.fileno(), mode)
f.write(contents)
def chmod(self, path: Union[Path, str], mode: int) -> None:
file_path = self.checkout_path / path
os.chmod(file_path, mode)
def mkdir(self, path: Union[Path, str], mode: int = 0o755) -> None:
dir_path = self.checkout_path / path
dir_path.mkdir(mode=mode, parents=True, exist_ok=False)
# Explicitly call chmod() to ignore any umask settings
dir_path.chmod(mode)
def list_dir(self, path: Union[Path, str]) -> List[Path]:
"""List the contents of a directory in the checkout.
This can be used to ensure the directory has been loaded by Eden.
"""
dir_path = self.checkout_path(path)
dir_path = self.checkout_path / path
return list(dir_path.iterdir())
def make_socket(self, path: Union[Path, str], mode: int = 0o755) -> None:
socket_path = self.checkout_path / path
with socket.socket(socket.AF_UNIX) as sock:
# Call fchmod() before we create the socket to ensure that its initial
# permissions are not looser than requested. The OS will still honor the
# umask when creating the socket.
os.fchmod(sock.fileno(), mode)
sock.bind(str(socket_path))
sock.listen(10)
# Call chmod() update the permissions ignoring the umask.
# Note that we unfortunately must use path.chmod() here rather than
# os.fchmod(): Linux appears to ignore fchmod() calls after the socket has
# already been bound.
socket_path.chmod(mode)
snapshot_types: Dict[str, Type[BaseSnapshot]] = {}
@ -243,9 +475,43 @@ def generate(snapshot_type: Type[T]) -> Iterator[T]:
temporary directory that will be cleaned up when exiting the `with` context.
"""
with create_tmp_dir() as tmpdir:
with snapshot_type(tmpdir) as snapshot:
snapshot.generate()
yield snapshot
snapshot = snapshot_type(tmpdir)
snapshot.generate()
yield snapshot
class UnknownSnapshotTypeError(ValueError):
def __init__(self, type_name: str) -> None:
super().__init__(f"unknown snapshot type {type_name!r}")
self.type_name = type_name
def unpack_into(snapshot_path: Path, output_path: Path) -> BaseSnapshot:
"""Unpack a snapshot into the specified output directory.
Returns the appropriate BaseSnapshot subclass for this snapshot.
"""
# GNU tar is smart enough to automatically figure out the correct
# decompression method.
untar_cmd = ["tar", "-xf", str(snapshot_path)]
subprocess.check_call(untar_cmd, cwd=output_path)
data_dir = output_path / "data"
try:
with (data_dir / "info.json").open("r") as info_file:
info = json.load(info_file)
type_name = info["type"]
snapshot_type = snapshot_types.get(type_name)
if snapshot_type is None:
raise UnknownSnapshotTypeError(type_name)
snapshot = snapshot_type(output_path)
snapshot.resume()
return snapshot
except Exception as ex:
util.cleanup_tmp_dir(data_dir)
raise
def _import_snapshot_modules() -> None:

View File

@ -0,0 +1,131 @@
#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import os
import stat
import unittest
from pathlib import Path
from typing import Callable
from eden.integration.lib import edenclient
from . import snapshot as snapshot_mod, verify as verify_mod
class Test(unittest.TestCase):
"""Tests to verify the contents of various saved snapshots.
All of the test functions in this class are dynamically added by register_tests()
"""
def _test_snapshot(self, snapshot_path: Path) -> None:
with snapshot_mod.create_tmp_dir() as tmp_dir:
snapshot = snapshot_mod.unpack_into(snapshot_path, tmp_dir)
self._run_test(snapshot)
def _run_test(self, snapshot: snapshot_mod.BaseSnapshot) -> None:
verifier = verify_mod.SnapshotVerifier()
snapshot.verify(verifier)
# Fail the test if any errors were found.
# The individual errors will have been printed out previously
# as they were found.
if verifier.errors:
self.fail(f"found {len(verifier.errors)} errors")
class InfraTests(unittest.TestCase):
"""Tests for the snapshot generation/verification code itself."""
NUM_SNAPSHOTS = 0
def test_snapshot_list(self) -> None:
# Ensure that at least one snapshot file was found, so that the tests will
# fail if we somehow can't find the snapshot data directory correctly.
self.assertGreater(self.NUM_SNAPSHOTS, 0)
def test_verify_directory(self) -> None:
File = verify_mod.ExpectedFile
Socket = verify_mod.ExpectedSocket
Symlink = verify_mod.ExpectedSymlink
expected = [
File("a/b/normal.txt", b"abc\n", 0o644),
File("a/b/normal_exe.exe", b"abc\n", 0o755),
File("a/b/missing.txt", b"abc\n", 0o644),
File("a/b/wrong_perms.txt", b"abc\n", 0o644),
File("a/b/wrong_file_type.txt", b"abc\n", 0o644),
Socket("a/normal.sock", 0o644),
Socket("a/exe.sock", 0o755),
Symlink("a/normal.link", b"symlink contents", 0o777),
Symlink("a/missing.link", b"missing symlink", 0o777),
]
# Define a subclass of HgSnapshot. We use define this solely so we can use its
# helper write_file(), make_socket(), and mkdir() methods
class MockSnapshot(snapshot_mod.HgSnapshot):
def populate_backing_repo(self) -> None:
pass
def populate_checkout(self) -> None:
pass
def verify_snapshot_data(
self, verifier: verify_mod.SnapshotVerifier, eden: edenclient.EdenFS
) -> None:
pass
with snapshot_mod.create_tmp_dir() as tmp_dir:
snapshot = MockSnapshot(tmp_dir)
snapshot.data_dir.mkdir()
snapshot.checkout_path.mkdir()
snapshot.write_file("a/b/normal.txt", b"abc\n", 0o644)
snapshot.write_file("a/b/normal_exe.exe", b"abc\n", 0o755)
snapshot.write_file("a/b/wrong_perms.txt", b"abc\n", 0o755)
snapshot.make_socket("a/b/wrong_file_type.txt", 0o755)
snapshot.make_socket("a/normal.sock", 0o644)
snapshot.make_socket("a/exe.sock", 0o755)
os.symlink(b"symlink contents", snapshot.checkout_path / "a/normal.link")
# The verifier code only checks files, not directories, so it should not
# complain about extra directories that may be present.
snapshot.mkdir("a/b/c/extra_dir", 0o755)
verifier = verify_mod.SnapshotVerifier()
verifier.verify_directory(snapshot.checkout_path, expected)
expected_errors = [
"a/b/missing.txt: file not present in snapshot",
"a/missing.link: file not present in snapshot",
f"a/b/wrong_file_type.txt: expected file type to be {stat.S_IFREG:#o}, "
f"found {stat.S_IFSOCK:#o}",
f"a/b/wrong_file_type.txt: expected permissions to be 0o644, found 0o755",
"a/b/wrong_perms.txt: expected permissions to be 0o644, found 0o755",
]
self.assertEqual(sorted(verifier.errors), sorted(expected_errors))
def register_tests() -> None:
# Create one test function for each snapshot
snapshot_dir = Path("eden/test-data/snapshots").resolve()
for snapshot in snapshot_dir.iterdir():
# We don't use Path.stem here since it only strips off the very last suffix,
# so foo.tar.bz2 becomes foo.tar rather than foo.
stem = snapshot.name.split(".", 1)[0]
setattr(Test, f"test_{stem}", _create_test_fn(snapshot))
InfraTests.NUM_SNAPSHOTS += 1
def _create_test_fn(snapshot: Path) -> Callable[[Test], None]:
def test_fn(self: Test) -> None:
self._test_snapshot(snapshot)
return test_fn
register_tests()

View File

@ -7,6 +7,8 @@
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
from eden.integration.lib import edenclient
from eden.integration.snapshot import verify as verify_mod
from eden.integration.snapshot.snapshot import HgSnapshot, snapshot_class
@ -15,22 +17,129 @@ from eden.integration.snapshot.snapshot import HgSnapshot, snapshot_class
"A simple directory structure with a mix of loaded, materialized, "
"and unloaded files.",
)
class BaseSnapshot(HgSnapshot):
class BasicSnapshot(HgSnapshot):
def populate_backing_repo(self) -> None:
repo = self.backing_repo
repo.write_file("README.md", "project docs")
repo.write_file("src/main.c", 'printf("hello world!\\n");\n')
repo.write_file("src/lib.c", "void do_stuff() {}\n")
repo.write_file("src/test/test.c", 'printf("success!\\n");\n')
repo.write_file("include/lib.h", "void do_stuff();\n")
repo.write_file("other/foo.txt", "foo\n")
repo.write_file("other/bar.txt", "bar\n")
repo.write_file("other/a/b/c.txt", "abc\n")
repo.write_file(".gitignore", "ignored.txt\n")
repo.write_file("main/loaded_dir/loaded_file.c", "loaded")
repo.write_file("main/loaded_dir/not_loaded_file.c", "not loaded")
repo.write_file("main/loaded_dir/not_loaded_exe.sh", "not loaded", mode=0o755)
repo.write_file(
"main/materialized_subdir/script.sh", "original script contents", mode=0o755
)
repo.write_file("main/materialized_subdir/test.c", "original test contents")
repo.write_file("main/materialized_subdir/unmodified.txt", "original contents")
repo.write_file("main/mode_changes/normal_to_exe.txt", "will change mode")
repo.write_file(
"main/mode_changes/exe_to_normal.txt", "will change mode", mode=0o755
)
repo.write_file("main/mode_changes/normal_to_readonly.txt", "will be readonly")
repo.write_file("never_accessed/foo/bar/baz.txt", "baz\n")
repo.write_file("never_accessed/foo/bar/xyz.txt", "xyz\n")
repo.write_file("never_accessed/foo/file.txt", "data\n")
repo.commit("Initial commit.")
def populate_checkout(self) -> None:
# Load the src directory and the src/lib.c file
self.list_dir("src")
self.read_file("src/lib.c")
# Modify src/test/test.c to force it to be materialized
self.write_file("src/test/test.c", b"new test contents")
# Load the main/loaded_dir directory and the main/loaded_dir/lib.c file
# This currently allocates inode numbers for everything in main/loaded_dir/ and
# causes main/loaded_dir/ to be tracked in the overlay
self.list_dir("main/loaded_dir")
self.read_file("main/loaded_dir/loaded_file.c")
# Modify some files in main/materialized_subdir to force them to be materialized
self.write_file(
"main/materialized_subdir/script.sh", b"new script contents", 0o755
)
self.write_file("main/materialized_subdir/test.c", b"new test contents")
# Test materializing some files by changing their mode
self.chmod("main/mode_changes/normal_to_exe.txt", 0o755)
self.chmod("main/mode_changes/exe_to_normal.txt", 0o644)
self.chmod("main/mode_changes/normal_to_readonly.txt", 0o400)
# Create a new top-level directory with some new files
self.write_file("untracked/new/normal.txt", b"new src contents")
self.write_file("untracked/new/normal2.txt", b"extra src contents")
self.write_file("untracked/new/readonly.txt", b"new readonly contents", 0o400)
self.write_file("untracked/executable.exe", b"do stuff", mode=0o755)
self.make_socket("untracked/everybody.sock", mode=0o666)
self.make_socket("untracked/owner_only.sock", mode=0o600)
# Create some untracked files in an existing tracked directory
self.write_file("main/untracked.txt", b"new new untracked file")
self.write_file("main/ignored.txt", b"new ignored file")
self.write_file("main/untracked_dir/foo.txt", b"foobar")
def verify_snapshot_data(
self, verifier: verify_mod.SnapshotVerifier, eden: edenclient.EdenFS
) -> None:
# Confirm that `hg status` reports the correct information
self.verify_hg_status(verifier)
# Confirm that the files look like what we expect
File = verify_mod.ExpectedFile
Socket = verify_mod.ExpectedSocket
Symlink = verify_mod.ExpectedSymlink
expected_files = [
# TODO: These symlink permissions should ideally be 0o777
Symlink(".eden/root", bytes(self.checkout_path), 0o770),
Symlink(
".eden/client",
bytes(self.eden_state_dir / "clients" / "checkout"),
0o770,
),
Symlink(".eden/socket", bytes(self.eden_state_dir / "socket"), 0o770),
File("README.md", b"project docs", 0o644),
File(".gitignore", b"ignored.txt\n", 0o644),
File("main/loaded_dir/loaded_file.c", b"loaded", 0o644),
File("main/loaded_dir/not_loaded_file.c", b"not loaded", 0o644),
File("main/loaded_dir/not_loaded_exe.sh", b"not loaded", 0o755),
File("main/materialized_subdir/script.sh", b"new script contents", 0o755),
File("main/materialized_subdir/test.c", b"new test contents", 0o644),
File(
"main/materialized_subdir/unmodified.txt", b"original contents", 0o644
),
File("main/mode_changes/normal_to_exe.txt", b"will change mode", 0o755),
File("main/mode_changes/exe_to_normal.txt", b"will change mode", 0o644),
File(
"main/mode_changes/normal_to_readonly.txt", b"will be readonly", 0o400
),
File("main/untracked.txt", b"new new untracked file", 0o644),
File("main/ignored.txt", b"new ignored file", 0o644),
File("main/untracked_dir/foo.txt", b"foobar", 0o644),
File("never_accessed/foo/bar/baz.txt", b"baz\n", 0o644),
File("never_accessed/foo/bar/xyz.txt", b"xyz\n", 0o644),
File("never_accessed/foo/file.txt", b"data\n", 0o644),
File("untracked/new/normal.txt", b"new src contents", 0o644),
File("untracked/new/normal2.txt", b"extra src contents", 0o644),
File("untracked/new/readonly.txt", b"new readonly contents", 0o400),
File("untracked/executable.exe", b"do stuff", 0o755),
Socket("untracked/everybody.sock", 0o666),
Socket("untracked/owner_only.sock", 0o600),
]
verifier.verify_directory(self.checkout_path, expected_files)
def verify_hg_status(self, verifier: verify_mod.SnapshotVerifier) -> None:
expected_status = {
"main/materialized_subdir/script.sh": "M",
"main/materialized_subdir/test.c": "M",
"main/mode_changes/normal_to_exe.txt": "M",
"main/mode_changes/exe_to_normal.txt": "M",
# We changed the mode on main/mode_changes/normal_to_readonly.txt,
# but the change isn't significant to mercurial.
"untracked/new/normal.txt": "?",
"untracked/new/normal2.txt": "?",
"untracked/new/readonly.txt": "?",
"untracked/executable.exe": "?",
"untracked/everybody.sock": "?",
"untracked/owner_only.sock": "?",
"main/untracked.txt": "?",
"main/ignored.txt": "I",
"main/untracked_dir/foo.txt": "?",
}
repo = self.hg_repo(self.checkout_path)
verifier.verify_hg_status(repo, expected_status)

View File

@ -0,0 +1,174 @@
#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import abc
import os
import stat as stat_mod
from pathlib import Path
from typing import Dict, List
from eden.integration.lib import hgrepo
class ExpectedFileBase(metaclass=abc.ABCMeta):
def __init__(self, path: str, perms: int, file_type: int) -> None:
self.path = Path(path)
self.permissions = perms
self.file_type = file_type
def verify(
self, verifier: "SnapshotVerifier", path: Path, stat_info: os.stat_result
) -> None:
found_perms = stat_mod.S_IMODE(stat_info.st_mode)
if found_perms != self.permissions:
verifier.error(
f"{self.path}: expected permissions to be {self.permissions:#o}, "
f"found {found_perms:#o}"
)
found_file_type = stat_mod.S_IFMT(stat_info.st_mode)
if found_file_type != self.file_type:
verifier.error(
f"{self.path}: expected file type to be {self.file_type:#o}, "
f"found {found_file_type:#o}"
)
else:
self._verify_contents(verifier, path)
@abc.abstractmethod
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
pass
def _error(self, msg: str) -> None:
raise ValueError(msg)
class ExpectedFile(ExpectedFileBase):
def __init__(self, path: str, contents: bytes, perms: int = 0o644) -> None:
super().__init__(path, perms, stat_mod.S_IFREG)
self.contents = contents
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
with path.open("rb") as f:
actual_contents = f.read()
if actual_contents != self.contents:
verifier.error(
f"file contents mismatch for {self.path}:\n"
f"expected: {self.contents!r}\n"
f"actual: {actual_contents!r}"
)
class ExpectedSymlink(ExpectedFileBase):
def __init__(self, path: str, contents: bytes, perms: int = 0o777) -> None:
super().__init__(path, perms, stat_mod.S_IFLNK)
self.contents = contents
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
actual_contents = os.readlink(bytes(path))
if actual_contents != self.contents:
verifier.error(
f"symlink contents mismatch for {self.path}:\n"
f"expected: {self.contents!r}\n"
f"actual: {actual_contents!r}"
)
class ExpectedSocket(ExpectedFileBase):
def __init__(self, path: str, perms: int = 0o755) -> None:
super().__init__(path, perms, stat_mod.S_IFSOCK)
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
pass
class SnapshotVerifier:
def __init__(self) -> None:
self.errors: List[str] = []
self.quiet = False
def error(self, message: str) -> None:
self.errors.append(message)
if not self.quiet:
print(f"==ERROR== {message}")
def verify_directory(self, path: Path, expected: List[ExpectedFileBase]) -> None:
"""Confirm that the contents of a directory match the expected file state."""
found_files = enumerate_directory(path)
for expected_entry in expected:
file_stat = found_files.pop(expected_entry.path, None)
if file_stat is None:
self.error(f"{expected_entry.path}: file not present in snapshot")
continue
full_path = path / expected_entry.path
try:
expected_entry.verify(self, full_path, file_stat)
except AssertionError as ex:
self.error(f"{expected_entry.path}: {ex}")
continue
for path, stat_info in found_files.items():
if stat_mod.S_ISDIR(stat_info.st_mode):
# Don't require directories to be listed explicitly in the input files
continue
if str(path.parents[0]) == ".hg":
# Don't complain about files inside the .hg directory that the caller
# did not explicitly specify. Mercurial can create a variety of files
# here, and we don't care about checking the exact list of files it
# happened to create when the snapshot was generated.
continue
self.error(f"{path}: unexpected file present in snapshot")
def verify_hg_status(
self,
repo: hgrepo.HgRepository,
expected: Dict[str, str],
check_ignored: bool = True,
) -> None:
actual_status = repo.status(include_ignored=check_ignored)
for path, expected_char in expected.items():
actual_char = actual_status.pop(path, None)
if expected_char != actual_char:
self.error(
f"{path}: unexpected hg status difference: "
f"reported as {actual_char}, expected {expected_char}"
)
for path, actual_char in actual_status.items():
self.error(
f"{path}: unexpected hg status difference: "
f"reported as {actual_char}, expected None"
)
def enumerate_directory(path: Path) -> Dict[Path, os.stat_result]:
"""
Recursively walk a directory and return a dictionary of all of the files and
directories it contains.
Returns a dictionary of [path -> os.stat_result]
The returned paths are relative to the input directory.
"""
entries: Dict[Path, os.stat_result] = {}
_enumerate_directory_helper(path, Path(), entries)
return entries
def _enumerate_directory_helper(
root_path: Path, rel_path: Path, results: Dict[Path, os.stat_result]
) -> None:
for entry in os.scandir(root_path / rel_path):
# Current versions of typeshed don't know about the follow_symlinks argument,
# so ignore type errors on the next line.
stat_info = entry.stat(follow_symlinks=False) # type: ignore
entry_path = rel_path / entry.name
results[entry_path] = stat_info
if stat_mod.S_ISDIR(stat_info.st_mode):
_enumerate_directory_helper(root_path, entry_path, results)

Binary file not shown.