have the snapshot code also fix up stored UIDs and GIDs

Summary:
When unpacking a snapshot, rewrite Eden's inode metadata table to change the
UID and GID values to the current user.

This is needed so that the current user can access files inside the mounted
checkout correctly.

Reviewed By: wez

Differential Revision: D12966640

fbshipit-source-id: eec4aba690117bf7b8f944221b31b7c7cc66fc0c
This commit is contained in:
Adam Simpkins 2018-11-07 19:17:53 -08:00 committed by Facebook Github Bot
parent 134f5dfc2b
commit b5690cbec0
2 changed files with 215 additions and 10 deletions

View File

@ -0,0 +1,191 @@
#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import os
import struct
import tempfile
import typing
from pathlib import Path
from typing import BinaryIO, Type
class MdvHeader:
# uint32_t magic;
# uint32_t version; // 1
# uint32_t recordVersion; // T::VERSION
# uint32_t recordSize; // sizeof(T)
# uint64_t entryCount; // end() - begin()
# uint64_t padding;
FORMAT = struct.Struct("=4sIIIQQ")
MAGIC = b"MDV\0"
VERSION_1 = 1
def __init__(
self,
magic: bytes,
version: int,
record_version: int,
record_size: int,
entry_count: int,
) -> None:
self.magic = magic
self.version = version
self.record_version = record_version
self.record_size = record_size
self.entry_count = entry_count
def serialize(self) -> bytes:
return self.FORMAT.pack(
self.magic,
self.version,
self.record_version,
self.record_size,
self.entry_count,
0,
)
@classmethod
def parse(cls: Type["MdvHeader"], data: bytes) -> "MdvHeader":
fields = cls.FORMAT.unpack(data)
(magic, version, record_version, record_size, entry_count, _padding) = fields
return cls(magic, version, record_version, record_size, entry_count)
@classmethod
def read(cls: Type["MdvHeader"], input_file: BinaryIO) -> "MdvHeader":
data = input_file.read(cls.FORMAT.size)
return cls.parse(data)
class InodeMetadataV0:
# uint64_t inode_number
# mode_t mode
# uid_t uid
# gid_t gid
# uint32_t padding
# uint64_t atime # encoded as EdenTimestamp (nanoseconds from 1901-12-13)
# uint64_t mtime # EdenTimestamp
# uint64_t ctime # EdenTimestamp
FORMAT = struct.Struct("=QIIIIQQQ")
VERSION = 0
def __init__(
self,
inode_number: int,
mode: int,
uid: int,
gid: int,
atime: int,
mtime: int,
ctime: int,
) -> None:
self.inode_number = inode_number
self.mode = mode
self.uid = uid
self.gid = gid
self.atime = atime
self.mtime = mtime
self.ctime = ctime
def serialize(self) -> bytes:
return self.FORMAT.pack(
self.inode_number,
self.mode,
self.uid,
self.gid,
0,
self.atime,
self.mtime,
self.ctime,
)
@classmethod
def parse(cls: Type["InodeMetadataV0"], data: bytes) -> "InodeMetadataV0":
fields = cls.FORMAT.unpack(data)
(inode_number, mode, uid, gid, _padding, atime, mtime, ctime) = fields
return cls(inode_number, mode, uid, gid, atime, mtime, ctime)
@classmethod
def read(cls: Type["InodeMetadataV0"], input_file: BinaryIO) -> "InodeMetadataV0":
data = input_file.read(cls.FORMAT.size)
if len(data) != cls.FORMAT.size:
raise Exception(f"short inode metadata table header: size={len(data)}")
return cls.parse(data)
def update_ownership(metadata_path: Path, uid: int, gid: int) -> None:
"""Update an Eden inode metadata table file, replacing the UID and GID fields
for all inodes with the specified values.
"""
with typing.cast(BinaryIO, metadata_path.open("rb")) as input_file:
header = MdvHeader.read(input_file)
if header.magic != MdvHeader.MAGIC:
raise Exception(
"unsupported inode metadata table file format: "
f"magic={header.magic!r}"
)
if header.version != MdvHeader.VERSION_1:
raise Exception(
"unsupported inode metadata table file format: "
f"version={header.version}"
)
if header.record_version != InodeMetadataV0.VERSION:
raise Exception(
"unsupported inode metadata table file format: "
f"record_version={header.record_version}"
)
if header.record_size != InodeMetadataV0.FORMAT.size:
raise Exception(
"unsupported inode metadata table file format: "
f"record_size: {header.record_size} != {InodeMetadataV0.FORMAT.size}"
)
tmp_fd, tmp_file_name = tempfile.mkstemp(
dir=str(metadata_path.parent), prefix=metadata_path.name + "."
)
tmp_file = os.fdopen(tmp_fd, "wb")
try:
tmp_file.write(header.serialize())
_rewrite_ownership_v0(input_file, tmp_file, header, uid, gid)
tmp_file.close()
tmp_file = None
os.rename(tmp_file_name, metadata_path)
except Exception as ex:
try:
os.unlink(tmp_file_name)
except Exception as ex:
pass
raise
finally:
if tmp_file is not None:
tmp_file.close()
def _rewrite_ownership_v0(
input_file: BinaryIO, new_file: BinaryIO, header: MdvHeader, uid: int, gid: int
) -> None:
entries_processed = 0
entry_size = InodeMetadataV0.FORMAT.size
for _ in range(header.entry_count):
entries_processed += 1
entry_data = input_file.read(entry_size)
if len(entry_data) != entry_size:
raise Exception("inode metadata table appears truncated")
entry = InodeMetadataV0.parse(entry_data)
entry.uid = uid
entry.gid = gid
new_file.write(entry.serialize())
# Copy the remaining file contents as is. This is normally all 0-filled data
# that provides space for new entries to be written in the future.
padding = input_file.read()
new_file.write(padding)

View File

@ -16,7 +16,6 @@ import os
import socket
import stat
import subprocess
import tempfile
import time
import types
import typing
@ -28,7 +27,7 @@ from eden.integration.lib import edenclient, hgrepo, util
from eden.integration.lib.find_executables import FindExe
from eden.integration.lib.temporary_directory import create_tmp_dir
from . import verify as verify_mod
from . import inode_metadata as inode_metadata_mod, verify as verify_mod
T = TypeVar("T", bound="BaseSnapshot")
@ -119,7 +118,10 @@ class BaseSnapshot(metaclass=abc.ABCMeta):
# Rewrite the config state to point to "/tmp/dummy_snapshot_path"
# This isn't really strictly necessary, but just makes the state that
# gets saved slightly more deterministic.
self._relocate_to(Path("/tmp/dummy_snapshot_path"))
#
# Also update uid and gid information 99.
# This is commonly the UID & GID for "nobody" on many systems.
self._update_eden_state(Path("/tmp/dummy_snapshot_path"), uid=99, gid=99)
def verify(self, verifier: verify_mod.SnapshotVerifier) -> None:
"""Verify that the snapshot data looks correct.
@ -155,7 +157,7 @@ class BaseSnapshot(metaclass=abc.ABCMeta):
and recreates any transient state needed for the snapshot.
"""
self.create_transient_dir()
self._relocate_to(self.base_dir)
self._update_eden_state(self.base_dir, uid=os.getuid(), gid=os.getgid())
self.prep_resume()
def _create_directories(self) -> None:
@ -220,13 +222,18 @@ class BaseSnapshot(metaclass=abc.ABCMeta):
with self._metadata_path.open("r") as f:
return typing.cast(Dict[str, Any], json.load(f))
def _relocate_to(self, base_dir: Path) -> None:
"""Rewrite data inside an unpacked snapshot directory to refer to the base
directory using the specified path.
def _update_eden_state(self, base_dir: Path, uid: int, gid: int) -> None:
"""Update Eden's stored state for the snapshot so it will work in a new
location.
This replaces absolute path names in various data files to refer to the new
location. This is needed so that a snapshot originally created in one location
can be unpacked and used in another location.
- Replace absolute path names in various data files to refer to the new
location. This is needed so that a snapshot originally created in one
location can be unpacked and used in another location.
- Update UID and GID values stored by Eden's to reflect the specified values.
This is needed so that unpacked snapshots can be used by the current user
without getting permissions errors when they try to access files inside the
Eden checkouts.
"""
info = self._read_metadata()
old_base_dir = Path(info["base_dir"])
@ -248,6 +255,7 @@ class BaseSnapshot(metaclass=abc.ABCMeta):
new_config_data[str(new_checkout_path)] = checkout_name
checkout_state_dir = self.eden_state_dir / "clients" / checkout_name
self._relocate_checkout(checkout_state_dir, old_base_dir, base_dir)
self._update_ownership(checkout_state_dir, uid, gid)
config_file.seek(0)
config_file.truncate()
@ -257,6 +265,12 @@ class BaseSnapshot(metaclass=abc.ABCMeta):
info["base_dir"] = str(base_dir)
self._write_metadata(info)
def _update_ownership(self, checkout_state_dir: Path, uid: int, gid: int) -> None:
"""Update Eden's stored metadata about files to mark that files are owned by
the current user."""
metadata_path = checkout_state_dir / "local" / "metadata.table"
inode_metadata_mod.update_ownership(metadata_path, uid, gid)
def _relocate_checkout(
self, checkout_state_dir: Path, old_base_dir: Path, new_base_dir: Path
) -> None: