mirror of
https://github.com/facebook/sapling.git
synced 2024-12-30 16:42:57 +03:00
add code to test saved snapshots
Summary: Update the snapshot framework code to add tests that mount saved snapshots with the current edenfs build and verifies that they behave as expected. This also changes the snapshot generation code a fair amount as well, in order to be able to verify the snapshot data: - We now more cleanly split out data that should be saved when the snapshot is first generated (such as the `.eden` state directory) and data that should be re-generated each time the snapshot is unpacked (such as the `/etc/eden` config directory and the system configuration for `hg`). - The code is now capable of rewriting absolute paths in the Eden state files so it is usable in a new location after it has been unpacked. This also updates the "basic" snapshot type to create a wider variety of types of files and file changes, and to be able to verify the contents of a resumed snapshot. Reviewed By: strager Differential Revision: D9955411 fbshipit-source-id: 76012c01016cf4d89dc611c5596a05f5e7f013ed
This commit is contained in:
parent
e3dbed8148
commit
49a7c05d61
@ -9,23 +9,32 @@
|
||||
|
||||
import abc
|
||||
import contextlib
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import stat
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
import types
|
||||
import typing
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, Iterator, List, Optional, Type, TypeVar, Union
|
||||
from typing import Any, Callable, Dict, Iterator, List, Optional, Type, TypeVar, Union
|
||||
|
||||
import toml
|
||||
from eden.integration.lib import edenclient, hgrepo, util
|
||||
from eden.integration.lib.find_executables import FindExe
|
||||
from eden.integration.lib.temporary_directory import create_tmp_dir
|
||||
|
||||
from . import verify as verify_mod
|
||||
|
||||
|
||||
T = TypeVar("T", bound="BaseSnapshot")
|
||||
|
||||
|
||||
class BaseSnapshot:
|
||||
class BaseSnapshot(metaclass=abc.ABCMeta):
|
||||
# The NAME and DESCRIPTION class fields are intended to be overridden on subclasses
|
||||
# by the @snapshot_class decorator.
|
||||
NAME = "Base Snapshot Class"
|
||||
@ -33,7 +42,22 @@ class BaseSnapshot:
|
||||
|
||||
def __init__(self, base_dir: Path) -> None:
|
||||
self.base_dir = base_dir
|
||||
self.eden: Optional[edenclient.EdenFS] = None
|
||||
# All data inside self.data_dir will be saved as part of the snapshot
|
||||
self.data_dir = self.base_dir / "data"
|
||||
# Anything inside self.transient_dir will not be saved with the snapshot,
|
||||
# and will always be regenerated from scratch when resuming a snapshot.
|
||||
self.transient_dir = self.base_dir / "transient"
|
||||
|
||||
self.eden_state_dir = self.data_dir / "eden"
|
||||
|
||||
# We put the etc eden directory inside the transient directory.
|
||||
# Whenever we resume a snapshot we want to use a current version of the edenfs
|
||||
# daemon and its configuration, rather than an old copy of the edenfs
|
||||
# configuration.
|
||||
self.etc_eden_dir = self.transient_dir / "etc_eden"
|
||||
|
||||
# We put the home directory inside the transient directory as well.
|
||||
self.home_dir = self.transient_dir / "home"
|
||||
|
||||
def __enter__(self: T) -> T:
|
||||
return self
|
||||
@ -44,15 +68,7 @@ class BaseSnapshot:
|
||||
exc_value: Optional[BaseException],
|
||||
tb: Optional[types.TracebackType],
|
||||
) -> None:
|
||||
self.cleanup()
|
||||
|
||||
def cleanup(self) -> None:
|
||||
if self.eden is not None:
|
||||
try:
|
||||
self.eden.kill()
|
||||
except Exception as ex:
|
||||
logging.exception("error stopping edenfs")
|
||||
self.eden = None
|
||||
pass
|
||||
|
||||
def create_tarball(self, output_path: Path) -> None:
|
||||
"""Create a tarball from the snapshot contents.
|
||||
@ -85,46 +101,209 @@ class BaseSnapshot:
|
||||
subprocess.check_call(cmd, cwd=self.base_dir)
|
||||
|
||||
def generate(self) -> None:
|
||||
self._setup_directories()
|
||||
"""Generate the snapshot data.
|
||||
|
||||
This method should normally be called after constructing the snapshot object
|
||||
pointing to an empty directory.
|
||||
"""
|
||||
self._create_directories()
|
||||
self._emit_metadata()
|
||||
self.gen_before_eden_running()
|
||||
|
||||
self.eden = edenclient.EdenFS(
|
||||
with self.edenfs() as eden:
|
||||
eden.start()
|
||||
self.gen_eden_running(eden)
|
||||
|
||||
self.gen_after_eden_stopped()
|
||||
|
||||
# Rewrite the config state to point to "/tmp/dummy_snapshot_path"
|
||||
# This isn't really strictly necessary, but just makes the state that
|
||||
# gets saved slightly more deterministic.
|
||||
self._relocate_to(Path("/tmp/dummy_snapshot_path"))
|
||||
|
||||
def verify(self, verifier: verify_mod.SnapshotVerifier) -> None:
|
||||
"""Verify that the snapshot data looks correct.
|
||||
|
||||
This is generally invoked by tests to confirm that an unpacked snapshot still
|
||||
works properly with the current version of EdenFS.
|
||||
"""
|
||||
with self.edenfs() as eden:
|
||||
eden.start()
|
||||
print("Verifing snapshot data:")
|
||||
print("=" * 60)
|
||||
self.verify_snapshot_data(verifier, eden)
|
||||
print("=" * 60)
|
||||
|
||||
def edenfs(self) -> edenclient.EdenFS:
|
||||
"""Return an EdenFS object that can be used to run an edenfs daemon for this
|
||||
snapshot.
|
||||
|
||||
The returned EdenFS object will not be started yet; the caller must explicitly
|
||||
call start() on it.
|
||||
"""
|
||||
return edenclient.EdenFS(
|
||||
eden_dir=str(self.eden_state_dir),
|
||||
etc_eden_dir=str(self.etc_eden_dir),
|
||||
home_dir=str(self.home_dir),
|
||||
storage_engine="rocksdb",
|
||||
)
|
||||
try:
|
||||
self.eden.start()
|
||||
self.gen_eden_running()
|
||||
finally:
|
||||
self.eden.kill()
|
||||
self.eden = None
|
||||
|
||||
self.gen_after_eden_stopped()
|
||||
def resume(self) -> None:
|
||||
"""Prepare a snapshot to be resumed after unpacking it.
|
||||
|
||||
def _setup_directories(self) -> None:
|
||||
self.data_dir = self.base_dir / "data"
|
||||
This updates the snapshot data so it can be run from its new location,
|
||||
and recreates any transient state needed for the snapshot.
|
||||
"""
|
||||
self.create_transient_dir()
|
||||
self._relocate_to(self.base_dir)
|
||||
self.prep_resume()
|
||||
|
||||
def _create_directories(self) -> None:
|
||||
self.data_dir.mkdir()
|
||||
self.create_transient_dir()
|
||||
|
||||
self.eden_state_dir = self.data_dir / "eden"
|
||||
self.etc_eden_dir = self.data_dir / "etc_eden"
|
||||
def create_transient_dir(self) -> None:
|
||||
self.transient_dir.mkdir()
|
||||
self.etc_eden_dir.mkdir()
|
||||
self.home_dir = self.data_dir / "home"
|
||||
self.home_dir.mkdir()
|
||||
|
||||
# Set up configuration and hooks inside the etc eden directory.
|
||||
hooks_dir = self.etc_eden_dir / "hooks"
|
||||
hooks_dir.mkdir()
|
||||
os.symlink(FindExe.EDEN_POST_CLONE_HOOK, hooks_dir / "post-clone")
|
||||
config_dir = self.etc_eden_dir / "config.d"
|
||||
config_dir.mkdir()
|
||||
|
||||
# Set the hg.edenextension path to the empty string, so that
|
||||
# we use the version of the eden extension built into hg.par
|
||||
toml_config = {"hooks": {"hg.edenextension": ""}}
|
||||
with (config_dir / "hooks").open("w") as f:
|
||||
toml.dump(toml_config, f)
|
||||
|
||||
def _emit_metadata(self) -> None:
|
||||
now = time.time()
|
||||
|
||||
# In addition to recording the current time as a unix timestamp,
|
||||
# we also store a tuple of (year, month, day). This is primarily to help make
|
||||
# it easier for future verification code if we ever need to alter the
|
||||
# verification logic for older versions of the same snapshot type.
|
||||
# This will allow more human-readable time comparisons in the code, and makes it
|
||||
# easier to compare just based on a prefix of this tuple.
|
||||
now_date = datetime.datetime.fromtimestamp(now)
|
||||
date_tuple = (
|
||||
now_date.year,
|
||||
now_date.month,
|
||||
now_date.day,
|
||||
now_date.hour,
|
||||
now_date.minute,
|
||||
now_date.second,
|
||||
)
|
||||
|
||||
data = {
|
||||
"type": self.NAME,
|
||||
"description": self.DESCRIPTION,
|
||||
"time_created": time.time(),
|
||||
"time_created": int(now),
|
||||
"date_created": date_tuple,
|
||||
"base_dir": str(self.base_dir),
|
||||
}
|
||||
self._write_metadata(data)
|
||||
|
||||
metadata_path = self.data_dir / "info.json"
|
||||
with metadata_path.open("w") as f:
|
||||
@property
|
||||
def _metadata_path(self) -> Path:
|
||||
return self.data_dir / "info.json"
|
||||
|
||||
def _write_metadata(self, data: Dict[str, Any]) -> None:
|
||||
with self._metadata_path.open("w") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True)
|
||||
|
||||
def _read_metadata(self) -> Dict[str, Any]:
|
||||
with self._metadata_path.open("r") as f:
|
||||
return typing.cast(Dict[str, Any], json.load(f))
|
||||
|
||||
def _relocate_to(self, base_dir: Path) -> None:
|
||||
"""Rewrite data inside an unpacked snapshot directory to refer to the base
|
||||
directory using the specified path.
|
||||
|
||||
This replaces absolute path names in various data files to refer to the new
|
||||
location. This is needed so that a snapshot originally created in one location
|
||||
can be unpacked and used in another location.
|
||||
"""
|
||||
info = self._read_metadata()
|
||||
old_base_dir = Path(info["base_dir"])
|
||||
|
||||
# A few files in the RocksDB directory end up with the absolute path
|
||||
# embedded in them.
|
||||
rocks_db_path = self.eden_state_dir / "storage" / "rocks-db"
|
||||
for entry in rocks_db_path.iterdir():
|
||||
if entry.name.startswith("LOG") or entry.name.startswith("OPTIONS"):
|
||||
self._replace_file_contents(entry, bytes(old_base_dir), bytes(base_dir))
|
||||
|
||||
# Parse eden's config.json to get the list of checkouts, and update each one.
|
||||
eden_config_path = self.eden_state_dir / "config.json"
|
||||
with eden_config_path.open("r+") as config_file:
|
||||
eden_data = json.load(config_file)
|
||||
new_config_data = {}
|
||||
for _old_checkout_path, checkout_name in eden_data.items():
|
||||
new_checkout_path = self.data_dir / checkout_name
|
||||
new_config_data[str(new_checkout_path)] = checkout_name
|
||||
checkout_state_dir = self.eden_state_dir / "clients" / checkout_name
|
||||
self._relocate_checkout(checkout_state_dir, old_base_dir, base_dir)
|
||||
|
||||
config_file.seek(0)
|
||||
config_file.truncate()
|
||||
json.dump(new_config_data, config_file, indent=2, sort_keys=True)
|
||||
|
||||
# Update the info file with the new base path
|
||||
info["base_dir"] = str(base_dir)
|
||||
self._write_metadata(info)
|
||||
|
||||
def _relocate_checkout(
|
||||
self, checkout_state_dir: Path, old_base_dir: Path, new_base_dir: Path
|
||||
) -> None:
|
||||
self._replace_file_contents(
|
||||
checkout_state_dir / "config.toml", bytes(old_base_dir), bytes(new_base_dir)
|
||||
)
|
||||
overlay_dir = checkout_state_dir / "local"
|
||||
self._relocate_overlay_dir(
|
||||
overlay_dir, bytes(old_base_dir), bytes(new_base_dir)
|
||||
)
|
||||
|
||||
def _relocate_overlay_dir(
|
||||
self, dir_path: Path, old_data: bytes, new_data: bytes
|
||||
) -> None:
|
||||
# Recursively update the contents for every file in the overlay
|
||||
# if it contains the old path.
|
||||
#
|
||||
# This approach is pretty dumb: we aren't processing the overlay file formats at
|
||||
# all, just blindly replacing the contents if we happen to see something that
|
||||
# looks like the old path. For now this is the easiest thing to do, and the
|
||||
# chance of other data looking like the source path should be very unlikely.
|
||||
#
|
||||
# In practice we normally need to update the overlay files for at least the
|
||||
# following inodes:
|
||||
# .eden/root
|
||||
# .eden/client
|
||||
# .eden/socket
|
||||
# .hg/sharedpath
|
||||
#
|
||||
for path in dir_path.iterdir():
|
||||
stat_info = path.lstat()
|
||||
if stat.S_ISDIR(stat_info.st_mode):
|
||||
self._relocate_overlay_dir(path, old_data, new_data)
|
||||
else:
|
||||
self._replace_file_contents(path, old_data, new_data)
|
||||
|
||||
def _replace_file_contents(
|
||||
self, path: Path, old_data: bytes, new_data: bytes
|
||||
) -> None:
|
||||
with path.open("rb+") as f:
|
||||
file_contents = f.read()
|
||||
new_contents = file_contents.replace(old_data, new_data)
|
||||
if new_contents != file_contents:
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
f.write(new_contents)
|
||||
|
||||
def gen_before_eden_running(self) -> None:
|
||||
"""gen_before_eden_running() will be called when generating a new snapshot after
|
||||
the directory structure has been set up but before edenfs is started.
|
||||
@ -133,7 +312,7 @@ class BaseSnapshot:
|
||||
"""
|
||||
pass
|
||||
|
||||
def gen_eden_running(self) -> None:
|
||||
def gen_eden_running(self, eden: edenclient.EdenFS) -> None:
|
||||
"""gen_eden_running() will be called when generating a new snapshot once edenfs
|
||||
has been started.
|
||||
|
||||
@ -149,37 +328,60 @@ class BaseSnapshot:
|
||||
"""
|
||||
pass
|
||||
|
||||
def prep_resume(self) -> None:
|
||||
"""prep_resume() will be when preparing to resume a snapshot, before edenfs has
|
||||
been started.
|
||||
|
||||
Subclasses of BaseSnapshot can perform any work they want here.
|
||||
here.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def verify_snapshot_data(
|
||||
self, verifier: verify_mod.SnapshotVerifier, eden: edenclient.EdenFS
|
||||
) -> None:
|
||||
"""Verify that the snapshot data looks correct.
|
||||
|
||||
This method should be overridden by subclasses.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class HgSnapshot(BaseSnapshot, metaclass=abc.ABCMeta):
|
||||
"""A helper parent class for BaseSnapshot implementations that creates a single
|
||||
checkout of a mercurial repository."""
|
||||
|
||||
def gen_before_eden_running(self) -> None:
|
||||
# Prepare the system hgrc file
|
||||
self.system_hgrc_path = self.data_dir / "system_hgrc"
|
||||
def create_transient_dir(self) -> None:
|
||||
super().create_transient_dir()
|
||||
|
||||
# Note that we put the system hgrc file in self.transient_dir rather than
|
||||
# self.data_dir:
|
||||
# This file is not saved with the snapshot, and is instead regenerated each time
|
||||
# we unpack the snapshot. This reflects the fact that we always run with the
|
||||
# current system hgrc rather than an old snapshot of the system configs.
|
||||
self.system_hgrc_path = self.transient_dir / "system_hgrc"
|
||||
self.system_hgrc_path.write_text(hgrepo.HgRepository.get_system_hgrc_contents())
|
||||
|
||||
def hg_repo(self, path: Path) -> hgrepo.HgRepository:
|
||||
return hgrepo.HgRepository(str(path), system_hgrc=str(self.system_hgrc_path))
|
||||
|
||||
def gen_before_eden_running(self) -> None:
|
||||
logging.info("Creating backing repository...")
|
||||
# Create the repository
|
||||
backing_repo_path = self.data_dir / "repo"
|
||||
backing_repo_path.mkdir()
|
||||
self.backing_repo = hgrepo.HgRepository(
|
||||
str(backing_repo_path), system_hgrc=str(self.system_hgrc_path)
|
||||
)
|
||||
self.backing_repo = self.hg_repo(backing_repo_path)
|
||||
self.backing_repo.init()
|
||||
|
||||
self.populate_backing_repo()
|
||||
|
||||
def gen_eden_running(self) -> None:
|
||||
assert self.eden is not None
|
||||
def gen_eden_running(self, eden: edenclient.EdenFS) -> None:
|
||||
logging.info("Preparing checkout...")
|
||||
|
||||
checkout_path = self.data_dir / "checkout"
|
||||
self.eden.clone(self.backing_repo.path, str(checkout_path))
|
||||
eden.clone(self.backing_repo.path, str(self.checkout_path))
|
||||
|
||||
self.checkout_repo = hgrepo.HgRepository(
|
||||
str(checkout_path), system_hgrc=str(self.system_hgrc_path)
|
||||
)
|
||||
self.checkout_repo = self.hg_repo(self.checkout_path)
|
||||
self.populate_checkout()
|
||||
|
||||
@abc.abstractmethod
|
||||
@ -190,32 +392,62 @@ class HgSnapshot(BaseSnapshot, metaclass=abc.ABCMeta):
|
||||
def populate_checkout(self) -> None:
|
||||
pass
|
||||
|
||||
def checkout_path(self, *args: Union[Path, str]) -> Path:
|
||||
"""Compute a path inside the checkout."""
|
||||
return Path(self.checkout_repo.path, *args)
|
||||
@property
|
||||
def checkout_path(self) -> Path:
|
||||
"""Return the path to the checkout root."""
|
||||
return self.data_dir / "checkout"
|
||||
|
||||
def read_file(self, path: Union[Path, str]) -> bytes:
|
||||
"""Helper function to read a file in the checkout.
|
||||
This is primarily used to ensure that the file is loaded.
|
||||
"""
|
||||
file_path = self.checkout_path(path)
|
||||
file_path = self.checkout_path / path
|
||||
with file_path.open("rb") as f:
|
||||
data: bytes = f.read()
|
||||
return data
|
||||
|
||||
def write_file(self, path: Union[Path, str], contents: bytes) -> None:
|
||||
def write_file(
|
||||
self, path: Union[Path, str], contents: bytes, mode: int = 0o644
|
||||
) -> None:
|
||||
"""Helper function to write a file in the checkout."""
|
||||
file_path = self.checkout_path(path)
|
||||
file_path = self.checkout_path / path
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with file_path.open("wb") as f:
|
||||
os.fchmod(f.fileno(), mode)
|
||||
f.write(contents)
|
||||
|
||||
def chmod(self, path: Union[Path, str], mode: int) -> None:
|
||||
file_path = self.checkout_path / path
|
||||
os.chmod(file_path, mode)
|
||||
|
||||
def mkdir(self, path: Union[Path, str], mode: int = 0o755) -> None:
|
||||
dir_path = self.checkout_path / path
|
||||
dir_path.mkdir(mode=mode, parents=True, exist_ok=False)
|
||||
# Explicitly call chmod() to ignore any umask settings
|
||||
dir_path.chmod(mode)
|
||||
|
||||
def list_dir(self, path: Union[Path, str]) -> List[Path]:
|
||||
"""List the contents of a directory in the checkout.
|
||||
This can be used to ensure the directory has been loaded by Eden.
|
||||
"""
|
||||
dir_path = self.checkout_path(path)
|
||||
dir_path = self.checkout_path / path
|
||||
return list(dir_path.iterdir())
|
||||
|
||||
def make_socket(self, path: Union[Path, str], mode: int = 0o755) -> None:
|
||||
socket_path = self.checkout_path / path
|
||||
with socket.socket(socket.AF_UNIX) as sock:
|
||||
# Call fchmod() before we create the socket to ensure that its initial
|
||||
# permissions are not looser than requested. The OS will still honor the
|
||||
# umask when creating the socket.
|
||||
os.fchmod(sock.fileno(), mode)
|
||||
sock.bind(str(socket_path))
|
||||
sock.listen(10)
|
||||
# Call chmod() update the permissions ignoring the umask.
|
||||
# Note that we unfortunately must use path.chmod() here rather than
|
||||
# os.fchmod(): Linux appears to ignore fchmod() calls after the socket has
|
||||
# already been bound.
|
||||
socket_path.chmod(mode)
|
||||
|
||||
|
||||
snapshot_types: Dict[str, Type[BaseSnapshot]] = {}
|
||||
|
||||
@ -243,9 +475,43 @@ def generate(snapshot_type: Type[T]) -> Iterator[T]:
|
||||
temporary directory that will be cleaned up when exiting the `with` context.
|
||||
"""
|
||||
with create_tmp_dir() as tmpdir:
|
||||
with snapshot_type(tmpdir) as snapshot:
|
||||
snapshot.generate()
|
||||
yield snapshot
|
||||
snapshot = snapshot_type(tmpdir)
|
||||
snapshot.generate()
|
||||
yield snapshot
|
||||
|
||||
|
||||
class UnknownSnapshotTypeError(ValueError):
|
||||
def __init__(self, type_name: str) -> None:
|
||||
super().__init__(f"unknown snapshot type {type_name!r}")
|
||||
self.type_name = type_name
|
||||
|
||||
|
||||
def unpack_into(snapshot_path: Path, output_path: Path) -> BaseSnapshot:
|
||||
"""Unpack a snapshot into the specified output directory.
|
||||
|
||||
Returns the appropriate BaseSnapshot subclass for this snapshot.
|
||||
"""
|
||||
# GNU tar is smart enough to automatically figure out the correct
|
||||
# decompression method.
|
||||
untar_cmd = ["tar", "-xf", str(snapshot_path)]
|
||||
subprocess.check_call(untar_cmd, cwd=output_path)
|
||||
|
||||
data_dir = output_path / "data"
|
||||
try:
|
||||
with (data_dir / "info.json").open("r") as info_file:
|
||||
info = json.load(info_file)
|
||||
|
||||
type_name = info["type"]
|
||||
snapshot_type = snapshot_types.get(type_name)
|
||||
if snapshot_type is None:
|
||||
raise UnknownSnapshotTypeError(type_name)
|
||||
|
||||
snapshot = snapshot_type(output_path)
|
||||
snapshot.resume()
|
||||
return snapshot
|
||||
except Exception as ex:
|
||||
util.cleanup_tmp_dir(data_dir)
|
||||
raise
|
||||
|
||||
|
||||
def _import_snapshot_modules() -> None:
|
||||
|
131
eden/integration/snapshot/test_snapshots.py
Normal file
131
eden/integration/snapshot/test_snapshots.py
Normal file
@ -0,0 +1,131 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2016-present, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree. An additional grant
|
||||
# of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
import os
|
||||
import stat
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import Callable
|
||||
|
||||
from eden.integration.lib import edenclient
|
||||
|
||||
from . import snapshot as snapshot_mod, verify as verify_mod
|
||||
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
"""Tests to verify the contents of various saved snapshots.
|
||||
|
||||
All of the test functions in this class are dynamically added by register_tests()
|
||||
"""
|
||||
|
||||
def _test_snapshot(self, snapshot_path: Path) -> None:
|
||||
with snapshot_mod.create_tmp_dir() as tmp_dir:
|
||||
snapshot = snapshot_mod.unpack_into(snapshot_path, tmp_dir)
|
||||
self._run_test(snapshot)
|
||||
|
||||
def _run_test(self, snapshot: snapshot_mod.BaseSnapshot) -> None:
|
||||
verifier = verify_mod.SnapshotVerifier()
|
||||
snapshot.verify(verifier)
|
||||
|
||||
# Fail the test if any errors were found.
|
||||
# The individual errors will have been printed out previously
|
||||
# as they were found.
|
||||
if verifier.errors:
|
||||
self.fail(f"found {len(verifier.errors)} errors")
|
||||
|
||||
|
||||
class InfraTests(unittest.TestCase):
|
||||
"""Tests for the snapshot generation/verification code itself."""
|
||||
|
||||
NUM_SNAPSHOTS = 0
|
||||
|
||||
def test_snapshot_list(self) -> None:
|
||||
# Ensure that at least one snapshot file was found, so that the tests will
|
||||
# fail if we somehow can't find the snapshot data directory correctly.
|
||||
self.assertGreater(self.NUM_SNAPSHOTS, 0)
|
||||
|
||||
def test_verify_directory(self) -> None:
|
||||
File = verify_mod.ExpectedFile
|
||||
Socket = verify_mod.ExpectedSocket
|
||||
Symlink = verify_mod.ExpectedSymlink
|
||||
|
||||
expected = [
|
||||
File("a/b/normal.txt", b"abc\n", 0o644),
|
||||
File("a/b/normal_exe.exe", b"abc\n", 0o755),
|
||||
File("a/b/missing.txt", b"abc\n", 0o644),
|
||||
File("a/b/wrong_perms.txt", b"abc\n", 0o644),
|
||||
File("a/b/wrong_file_type.txt", b"abc\n", 0o644),
|
||||
Socket("a/normal.sock", 0o644),
|
||||
Socket("a/exe.sock", 0o755),
|
||||
Symlink("a/normal.link", b"symlink contents", 0o777),
|
||||
Symlink("a/missing.link", b"missing symlink", 0o777),
|
||||
]
|
||||
|
||||
# Define a subclass of HgSnapshot. We use define this solely so we can use its
|
||||
# helper write_file(), make_socket(), and mkdir() methods
|
||||
class MockSnapshot(snapshot_mod.HgSnapshot):
|
||||
def populate_backing_repo(self) -> None:
|
||||
pass
|
||||
|
||||
def populate_checkout(self) -> None:
|
||||
pass
|
||||
|
||||
def verify_snapshot_data(
|
||||
self, verifier: verify_mod.SnapshotVerifier, eden: edenclient.EdenFS
|
||||
) -> None:
|
||||
pass
|
||||
|
||||
with snapshot_mod.create_tmp_dir() as tmp_dir:
|
||||
snapshot = MockSnapshot(tmp_dir)
|
||||
snapshot.data_dir.mkdir()
|
||||
snapshot.checkout_path.mkdir()
|
||||
snapshot.write_file("a/b/normal.txt", b"abc\n", 0o644)
|
||||
snapshot.write_file("a/b/normal_exe.exe", b"abc\n", 0o755)
|
||||
snapshot.write_file("a/b/wrong_perms.txt", b"abc\n", 0o755)
|
||||
snapshot.make_socket("a/b/wrong_file_type.txt", 0o755)
|
||||
snapshot.make_socket("a/normal.sock", 0o644)
|
||||
snapshot.make_socket("a/exe.sock", 0o755)
|
||||
os.symlink(b"symlink contents", snapshot.checkout_path / "a/normal.link")
|
||||
# The verifier code only checks files, not directories, so it should not
|
||||
# complain about extra directories that may be present.
|
||||
snapshot.mkdir("a/b/c/extra_dir", 0o755)
|
||||
|
||||
verifier = verify_mod.SnapshotVerifier()
|
||||
verifier.verify_directory(snapshot.checkout_path, expected)
|
||||
|
||||
expected_errors = [
|
||||
"a/b/missing.txt: file not present in snapshot",
|
||||
"a/missing.link: file not present in snapshot",
|
||||
f"a/b/wrong_file_type.txt: expected file type to be {stat.S_IFREG:#o}, "
|
||||
f"found {stat.S_IFSOCK:#o}",
|
||||
f"a/b/wrong_file_type.txt: expected permissions to be 0o644, found 0o755",
|
||||
"a/b/wrong_perms.txt: expected permissions to be 0o644, found 0o755",
|
||||
]
|
||||
self.assertEqual(sorted(verifier.errors), sorted(expected_errors))
|
||||
|
||||
|
||||
def register_tests() -> None:
|
||||
# Create one test function for each snapshot
|
||||
snapshot_dir = Path("eden/test-data/snapshots").resolve()
|
||||
for snapshot in snapshot_dir.iterdir():
|
||||
# We don't use Path.stem here since it only strips off the very last suffix,
|
||||
# so foo.tar.bz2 becomes foo.tar rather than foo.
|
||||
stem = snapshot.name.split(".", 1)[0]
|
||||
setattr(Test, f"test_{stem}", _create_test_fn(snapshot))
|
||||
InfraTests.NUM_SNAPSHOTS += 1
|
||||
|
||||
|
||||
def _create_test_fn(snapshot: Path) -> Callable[[Test], None]:
|
||||
def test_fn(self: Test) -> None:
|
||||
self._test_snapshot(snapshot)
|
||||
|
||||
return test_fn
|
||||
|
||||
|
||||
register_tests()
|
@ -7,6 +7,8 @@
|
||||
# LICENSE file in the root directory of this source tree. An additional grant
|
||||
# of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
from eden.integration.lib import edenclient
|
||||
from eden.integration.snapshot import verify as verify_mod
|
||||
from eden.integration.snapshot.snapshot import HgSnapshot, snapshot_class
|
||||
|
||||
|
||||
@ -15,22 +17,129 @@ from eden.integration.snapshot.snapshot import HgSnapshot, snapshot_class
|
||||
"A simple directory structure with a mix of loaded, materialized, "
|
||||
"and unloaded files.",
|
||||
)
|
||||
class BaseSnapshot(HgSnapshot):
|
||||
class BasicSnapshot(HgSnapshot):
|
||||
def populate_backing_repo(self) -> None:
|
||||
repo = self.backing_repo
|
||||
repo.write_file("README.md", "project docs")
|
||||
repo.write_file("src/main.c", 'printf("hello world!\\n");\n')
|
||||
repo.write_file("src/lib.c", "void do_stuff() {}\n")
|
||||
repo.write_file("src/test/test.c", 'printf("success!\\n");\n')
|
||||
repo.write_file("include/lib.h", "void do_stuff();\n")
|
||||
repo.write_file("other/foo.txt", "foo\n")
|
||||
repo.write_file("other/bar.txt", "bar\n")
|
||||
repo.write_file("other/a/b/c.txt", "abc\n")
|
||||
repo.write_file(".gitignore", "ignored.txt\n")
|
||||
|
||||
repo.write_file("main/loaded_dir/loaded_file.c", "loaded")
|
||||
repo.write_file("main/loaded_dir/not_loaded_file.c", "not loaded")
|
||||
repo.write_file("main/loaded_dir/not_loaded_exe.sh", "not loaded", mode=0o755)
|
||||
|
||||
repo.write_file(
|
||||
"main/materialized_subdir/script.sh", "original script contents", mode=0o755
|
||||
)
|
||||
repo.write_file("main/materialized_subdir/test.c", "original test contents")
|
||||
repo.write_file("main/materialized_subdir/unmodified.txt", "original contents")
|
||||
repo.write_file("main/mode_changes/normal_to_exe.txt", "will change mode")
|
||||
repo.write_file(
|
||||
"main/mode_changes/exe_to_normal.txt", "will change mode", mode=0o755
|
||||
)
|
||||
repo.write_file("main/mode_changes/normal_to_readonly.txt", "will be readonly")
|
||||
|
||||
repo.write_file("never_accessed/foo/bar/baz.txt", "baz\n")
|
||||
repo.write_file("never_accessed/foo/bar/xyz.txt", "xyz\n")
|
||||
repo.write_file("never_accessed/foo/file.txt", "data\n")
|
||||
repo.commit("Initial commit.")
|
||||
|
||||
def populate_checkout(self) -> None:
|
||||
# Load the src directory and the src/lib.c file
|
||||
self.list_dir("src")
|
||||
self.read_file("src/lib.c")
|
||||
# Modify src/test/test.c to force it to be materialized
|
||||
self.write_file("src/test/test.c", b"new test contents")
|
||||
# Load the main/loaded_dir directory and the main/loaded_dir/lib.c file
|
||||
# This currently allocates inode numbers for everything in main/loaded_dir/ and
|
||||
# causes main/loaded_dir/ to be tracked in the overlay
|
||||
self.list_dir("main/loaded_dir")
|
||||
self.read_file("main/loaded_dir/loaded_file.c")
|
||||
|
||||
# Modify some files in main/materialized_subdir to force them to be materialized
|
||||
self.write_file(
|
||||
"main/materialized_subdir/script.sh", b"new script contents", 0o755
|
||||
)
|
||||
self.write_file("main/materialized_subdir/test.c", b"new test contents")
|
||||
|
||||
# Test materializing some files by changing their mode
|
||||
self.chmod("main/mode_changes/normal_to_exe.txt", 0o755)
|
||||
self.chmod("main/mode_changes/exe_to_normal.txt", 0o644)
|
||||
self.chmod("main/mode_changes/normal_to_readonly.txt", 0o400)
|
||||
|
||||
# Create a new top-level directory with some new files
|
||||
self.write_file("untracked/new/normal.txt", b"new src contents")
|
||||
self.write_file("untracked/new/normal2.txt", b"extra src contents")
|
||||
self.write_file("untracked/new/readonly.txt", b"new readonly contents", 0o400)
|
||||
self.write_file("untracked/executable.exe", b"do stuff", mode=0o755)
|
||||
self.make_socket("untracked/everybody.sock", mode=0o666)
|
||||
self.make_socket("untracked/owner_only.sock", mode=0o600)
|
||||
|
||||
# Create some untracked files in an existing tracked directory
|
||||
self.write_file("main/untracked.txt", b"new new untracked file")
|
||||
self.write_file("main/ignored.txt", b"new ignored file")
|
||||
self.write_file("main/untracked_dir/foo.txt", b"foobar")
|
||||
|
||||
def verify_snapshot_data(
|
||||
self, verifier: verify_mod.SnapshotVerifier, eden: edenclient.EdenFS
|
||||
) -> None:
|
||||
# Confirm that `hg status` reports the correct information
|
||||
self.verify_hg_status(verifier)
|
||||
|
||||
# Confirm that the files look like what we expect
|
||||
File = verify_mod.ExpectedFile
|
||||
Socket = verify_mod.ExpectedSocket
|
||||
Symlink = verify_mod.ExpectedSymlink
|
||||
expected_files = [
|
||||
# TODO: These symlink permissions should ideally be 0o777
|
||||
Symlink(".eden/root", bytes(self.checkout_path), 0o770),
|
||||
Symlink(
|
||||
".eden/client",
|
||||
bytes(self.eden_state_dir / "clients" / "checkout"),
|
||||
0o770,
|
||||
),
|
||||
Symlink(".eden/socket", bytes(self.eden_state_dir / "socket"), 0o770),
|
||||
File("README.md", b"project docs", 0o644),
|
||||
File(".gitignore", b"ignored.txt\n", 0o644),
|
||||
File("main/loaded_dir/loaded_file.c", b"loaded", 0o644),
|
||||
File("main/loaded_dir/not_loaded_file.c", b"not loaded", 0o644),
|
||||
File("main/loaded_dir/not_loaded_exe.sh", b"not loaded", 0o755),
|
||||
File("main/materialized_subdir/script.sh", b"new script contents", 0o755),
|
||||
File("main/materialized_subdir/test.c", b"new test contents", 0o644),
|
||||
File(
|
||||
"main/materialized_subdir/unmodified.txt", b"original contents", 0o644
|
||||
),
|
||||
File("main/mode_changes/normal_to_exe.txt", b"will change mode", 0o755),
|
||||
File("main/mode_changes/exe_to_normal.txt", b"will change mode", 0o644),
|
||||
File(
|
||||
"main/mode_changes/normal_to_readonly.txt", b"will be readonly", 0o400
|
||||
),
|
||||
File("main/untracked.txt", b"new new untracked file", 0o644),
|
||||
File("main/ignored.txt", b"new ignored file", 0o644),
|
||||
File("main/untracked_dir/foo.txt", b"foobar", 0o644),
|
||||
File("never_accessed/foo/bar/baz.txt", b"baz\n", 0o644),
|
||||
File("never_accessed/foo/bar/xyz.txt", b"xyz\n", 0o644),
|
||||
File("never_accessed/foo/file.txt", b"data\n", 0o644),
|
||||
File("untracked/new/normal.txt", b"new src contents", 0o644),
|
||||
File("untracked/new/normal2.txt", b"extra src contents", 0o644),
|
||||
File("untracked/new/readonly.txt", b"new readonly contents", 0o400),
|
||||
File("untracked/executable.exe", b"do stuff", 0o755),
|
||||
Socket("untracked/everybody.sock", 0o666),
|
||||
Socket("untracked/owner_only.sock", 0o600),
|
||||
]
|
||||
verifier.verify_directory(self.checkout_path, expected_files)
|
||||
|
||||
def verify_hg_status(self, verifier: verify_mod.SnapshotVerifier) -> None:
|
||||
expected_status = {
|
||||
"main/materialized_subdir/script.sh": "M",
|
||||
"main/materialized_subdir/test.c": "M",
|
||||
"main/mode_changes/normal_to_exe.txt": "M",
|
||||
"main/mode_changes/exe_to_normal.txt": "M",
|
||||
# We changed the mode on main/mode_changes/normal_to_readonly.txt,
|
||||
# but the change isn't significant to mercurial.
|
||||
"untracked/new/normal.txt": "?",
|
||||
"untracked/new/normal2.txt": "?",
|
||||
"untracked/new/readonly.txt": "?",
|
||||
"untracked/executable.exe": "?",
|
||||
"untracked/everybody.sock": "?",
|
||||
"untracked/owner_only.sock": "?",
|
||||
"main/untracked.txt": "?",
|
||||
"main/ignored.txt": "I",
|
||||
"main/untracked_dir/foo.txt": "?",
|
||||
}
|
||||
repo = self.hg_repo(self.checkout_path)
|
||||
verifier.verify_hg_status(repo, expected_status)
|
||||
|
174
eden/integration/snapshot/verify.py
Normal file
174
eden/integration/snapshot/verify.py
Normal file
@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python3
|
||||
#
|
||||
# Copyright (c) 2016-present, Facebook, Inc.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree. An additional grant
|
||||
# of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
import abc
|
||||
import os
|
||||
import stat as stat_mod
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from eden.integration.lib import hgrepo
|
||||
|
||||
|
||||
class ExpectedFileBase(metaclass=abc.ABCMeta):
|
||||
def __init__(self, path: str, perms: int, file_type: int) -> None:
|
||||
self.path = Path(path)
|
||||
self.permissions = perms
|
||||
self.file_type = file_type
|
||||
|
||||
def verify(
|
||||
self, verifier: "SnapshotVerifier", path: Path, stat_info: os.stat_result
|
||||
) -> None:
|
||||
found_perms = stat_mod.S_IMODE(stat_info.st_mode)
|
||||
if found_perms != self.permissions:
|
||||
verifier.error(
|
||||
f"{self.path}: expected permissions to be {self.permissions:#o}, "
|
||||
f"found {found_perms:#o}"
|
||||
)
|
||||
found_file_type = stat_mod.S_IFMT(stat_info.st_mode)
|
||||
if found_file_type != self.file_type:
|
||||
verifier.error(
|
||||
f"{self.path}: expected file type to be {self.file_type:#o}, "
|
||||
f"found {found_file_type:#o}"
|
||||
)
|
||||
else:
|
||||
self._verify_contents(verifier, path)
|
||||
|
||||
@abc.abstractmethod
|
||||
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
|
||||
pass
|
||||
|
||||
def _error(self, msg: str) -> None:
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
class ExpectedFile(ExpectedFileBase):
|
||||
def __init__(self, path: str, contents: bytes, perms: int = 0o644) -> None:
|
||||
super().__init__(path, perms, stat_mod.S_IFREG)
|
||||
self.contents = contents
|
||||
|
||||
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
|
||||
with path.open("rb") as f:
|
||||
actual_contents = f.read()
|
||||
if actual_contents != self.contents:
|
||||
verifier.error(
|
||||
f"file contents mismatch for {self.path}:\n"
|
||||
f"expected: {self.contents!r}\n"
|
||||
f"actual: {actual_contents!r}"
|
||||
)
|
||||
|
||||
|
||||
class ExpectedSymlink(ExpectedFileBase):
|
||||
def __init__(self, path: str, contents: bytes, perms: int = 0o777) -> None:
|
||||
super().__init__(path, perms, stat_mod.S_IFLNK)
|
||||
self.contents = contents
|
||||
|
||||
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
|
||||
actual_contents = os.readlink(bytes(path))
|
||||
if actual_contents != self.contents:
|
||||
verifier.error(
|
||||
f"symlink contents mismatch for {self.path}:\n"
|
||||
f"expected: {self.contents!r}\n"
|
||||
f"actual: {actual_contents!r}"
|
||||
)
|
||||
|
||||
|
||||
class ExpectedSocket(ExpectedFileBase):
|
||||
def __init__(self, path: str, perms: int = 0o755) -> None:
|
||||
super().__init__(path, perms, stat_mod.S_IFSOCK)
|
||||
|
||||
def _verify_contents(self, verifier: "SnapshotVerifier", path: Path) -> None:
|
||||
pass
|
||||
|
||||
|
||||
class SnapshotVerifier:
|
||||
def __init__(self) -> None:
|
||||
self.errors: List[str] = []
|
||||
self.quiet = False
|
||||
|
||||
def error(self, message: str) -> None:
|
||||
self.errors.append(message)
|
||||
if not self.quiet:
|
||||
print(f"==ERROR== {message}")
|
||||
|
||||
def verify_directory(self, path: Path, expected: List[ExpectedFileBase]) -> None:
|
||||
"""Confirm that the contents of a directory match the expected file state."""
|
||||
found_files = enumerate_directory(path)
|
||||
for expected_entry in expected:
|
||||
file_stat = found_files.pop(expected_entry.path, None)
|
||||
if file_stat is None:
|
||||
self.error(f"{expected_entry.path}: file not present in snapshot")
|
||||
continue
|
||||
|
||||
full_path = path / expected_entry.path
|
||||
try:
|
||||
expected_entry.verify(self, full_path, file_stat)
|
||||
except AssertionError as ex:
|
||||
self.error(f"{expected_entry.path}: {ex}")
|
||||
continue
|
||||
|
||||
for path, stat_info in found_files.items():
|
||||
if stat_mod.S_ISDIR(stat_info.st_mode):
|
||||
# Don't require directories to be listed explicitly in the input files
|
||||
continue
|
||||
if str(path.parents[0]) == ".hg":
|
||||
# Don't complain about files inside the .hg directory that the caller
|
||||
# did not explicitly specify. Mercurial can create a variety of files
|
||||
# here, and we don't care about checking the exact list of files it
|
||||
# happened to create when the snapshot was generated.
|
||||
continue
|
||||
self.error(f"{path}: unexpected file present in snapshot")
|
||||
|
||||
def verify_hg_status(
|
||||
self,
|
||||
repo: hgrepo.HgRepository,
|
||||
expected: Dict[str, str],
|
||||
check_ignored: bool = True,
|
||||
) -> None:
|
||||
actual_status = repo.status(include_ignored=check_ignored)
|
||||
|
||||
for path, expected_char in expected.items():
|
||||
actual_char = actual_status.pop(path, None)
|
||||
if expected_char != actual_char:
|
||||
self.error(
|
||||
f"{path}: unexpected hg status difference: "
|
||||
f"reported as {actual_char}, expected {expected_char}"
|
||||
)
|
||||
|
||||
for path, actual_char in actual_status.items():
|
||||
self.error(
|
||||
f"{path}: unexpected hg status difference: "
|
||||
f"reported as {actual_char}, expected None"
|
||||
)
|
||||
|
||||
|
||||
def enumerate_directory(path: Path) -> Dict[Path, os.stat_result]:
|
||||
"""
|
||||
Recursively walk a directory and return a dictionary of all of the files and
|
||||
directories it contains.
|
||||
|
||||
Returns a dictionary of [path -> os.stat_result]
|
||||
The returned paths are relative to the input directory.
|
||||
"""
|
||||
entries: Dict[Path, os.stat_result] = {}
|
||||
_enumerate_directory_helper(path, Path(), entries)
|
||||
return entries
|
||||
|
||||
|
||||
def _enumerate_directory_helper(
|
||||
root_path: Path, rel_path: Path, results: Dict[Path, os.stat_result]
|
||||
) -> None:
|
||||
for entry in os.scandir(root_path / rel_path):
|
||||
# Current versions of typeshed don't know about the follow_symlinks argument,
|
||||
# so ignore type errors on the next line.
|
||||
stat_info = entry.stat(follow_symlinks=False) # type: ignore
|
||||
entry_path = rel_path / entry.name
|
||||
results[entry_path] = stat_info
|
||||
if stat_mod.S_ISDIR(stat_info.st_mode):
|
||||
_enumerate_directory_helper(root_path, entry_path, results)
|
BIN
eden/test-data/snapshots/basic-20181030.tar.xz
Normal file
BIN
eden/test-data/snapshots/basic-20181030.tar.xz
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user