sapling/eden/integration/takeover_test.py

235 lines
9.2 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
#
# Copyright (c) 2016-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree. An additional grant
# of patent rights can be found in the PATENTS file in the same directory.
import os
import resource
import sys
import threading
from .lib import testcase
@testcase.eden_repo_test
class TakeoverTest(testcase.EdenRepoTest):
def populate_repo(self):
self.pagesize = resource.getpagesize()
self.page1 = "1" * self.pagesize
self.page2 = "2" * self.pagesize
self.repo.write_file('tree/hello', self.page1 + self.page2)
self.repo.write_file('tree/deleted', self.page1 + self.page2)
self.repo.write_file('src/main.c', 'hello world')
self.commit1 = self.repo.commit('Initial commit.')
self.repo.write_file('src/main.c', 'hello world v2')
self.repo.write_file('src/test/test1.py', 'test1')
self.repo.write_file('src/test/test2.py', 'test2')
self.commit2 = self.repo.commit('Initial commit.')
def select_storage_engine(self):
''' we need to persist data across restarts '''
return 'sqlite'
def edenfs_logging_settings(self):
if self._testMethodName == 'test_takeover_with_io':
# test_takeover_with_io causes lots of I/O, so do not enable
# verbose logging of I/O operations in this test.
return {}
return {'eden.strace': 'DBG7', 'eden.fs.fuse': 'DBG7'}
def do_takeover_test(self):
hello = os.path.join(self.mount, 'tree/hello')
deleted = os.path.join(self.mount, 'tree/deleted')
deleted_local = os.path.join(self.mount, 'deleted-local')
# To test our handling of unlinked inodes, in addition
# to unlinking something that is in the manifest we
# need to check that we handle the case of a local
# file being deleted to make sure that we cover both
# code paths for FileInode.
with open(deleted_local, 'w') as dl:
dl.write(self.page1)
dl.write(self.page2)
# We'd like to make sure that we do something reasonable
# for directories that have been unlinked and that are
# still referenced via a file descriptor. Ideally we'd call
# opendir() here and then readdir() it after we've performed
# the graceful restart, but we can't directly call those
# functions from python. The approach used here is to
# open a file descriptor to the directory and then try
# to stat() it after the restart. Since the directory
# has to be empty in order to be unlinked, a readdir
# from it wouldn't return any interesting results anyway.
deleted_dir = os.path.join(self.mount, 'deleted-dir')
os.mkdir(deleted_dir)
deleted_dir_fd = os.open(deleted_dir, 0)
os.rmdir(deleted_dir)
with open(hello, 'r') as f, \
open(deleted, 'r') as d, \
open(deleted_local, 'r') as dl:
# Read the first page only (rather than the whole file)
# before we restart the process.
# This is so that we can check that the kernel really
# does call in to us for the second page and that we're
# really servicing the read for the second page and that
# it isn't just getting served from the kernel buffer cache
self.assertEqual(self.page1, f.read(self.pagesize))
# Let's make sure that unlinked inodes continue to
# work appropriately too. We've opened the file
# handles and are holding them alive in `d` and `dl`,
# so now let's unlink it from the filesystem
os.unlink(deleted)
os.unlink(deleted_local)
print('=== beginning restart ===', file=sys.stderr)
self.eden.graceful_restart()
print('=== restart complete ===', file=sys.stderr)
# Ensure that our file handle is still live across
# the restart boundary
f.seek(0)
self.assertEqual(self.page1, f.read(self.pagesize))
self.assertEqual(self.page2, f.read(self.pagesize))
# We should be able to read from the `d` file handle
# even though we deleted the file from the tree
self.assertEqual(self.page1, d.read(self.pagesize))
self.assertEqual(self.page2, d.read(self.pagesize))
# Likewise for the `dl` file handle
self.assertEqual(self.page1, dl.read(self.pagesize))
self.assertEqual(self.page2, dl.read(self.pagesize))
# Now check that the unlinked directory handle still seems
# connected. This is difficult to do directly in python;
# the directory had to be empty in order to be removed
# so even if we could read its directory entries there
# wouldn't be anything to read.
# Note that os.stat() will throw if the fd is deemed
# bad either by the kernel or the eden instance,
# so we're just calling it and discarding the return
# value.
os.stat(deleted_dir_fd)
os.close(deleted_dir_fd)
# Let's also test opening the same file up again,
# just to make sure that that is still working after
# the graceful restart.
with open(hello, 'r') as f:
self.assertEqual(self.page1, f.read(self.pagesize))
self.assertEqual(self.page2, f.read(self.pagesize))
def test_takeover(self):
return self.do_takeover_test()
def test_takeover_after_diff_revisions(self):
# Make a getScmStatusBetweenRevisions() call to Eden.
# Previously this thrift call caused Eden to create temporary inode
# objects outside of the normal root inode tree, and this would cause
# Eden to crash when shutting down afterwards.
with self.get_thrift_client() as client:
client.getScmStatusBetweenRevisions(
self.mount, self.commit1, self.commit2
)
return self.do_takeover_test()
def test_takeover_with_io(self):
num_threads = 4
write_chunk_size = 1024 * 1024
max_file_length = write_chunk_size * 100
# TODO: Setting this higher than 1 currently makes it likely that
# edenfs will crash during restart.
# There are still some other bugs we need to track down in the restart
# ordering.
num_restarts = 1
stop = threading.Event()
bufs = [b'x' * write_chunk_size, b'y' * write_chunk_size]
def do_io(thread_id, running_event):
path = os.path.join(
self.mount, 'src', 'test', 'data%d.log' % thread_id
)
with open(path, 'wb') as f:
# Use raw file descriptors to avoid going through python's I/O
# buffering code.
fd = f.fileno()
buf_idx = 0
buf = bufs[buf_idx]
offset = 0
# Repeatedly write and rewrite the same file,
#jalternating between two different data buffers.
running_event.set()
while True:
os.pwrite(fd, buf, offset)
if stop.is_set():
return
offset += len(buf)
if offset >= max_file_length:
buf_idx += 1
buf = bufs[buf_idx % len(bufs)]
offset = 0
# Log the mount points device ID at the start of the test
# (Just in case anything hangs and we need to abort the mount
# using /sys/fs/fuse/connections/<dev>/)
st = os.lstat(self.mount)
print('=== eden mount device=%d ===' % st.st_dev, file=sys.stderr)
# Start several threads doing I/O while we we perform a takeover
threads = []
try:
running_events = []
for n in range(num_threads):
running = threading.Event()
thread = threading.Thread(target=do_io, args=(n, running))
thread.start()
threads.append(thread)
running_events.append(running)
# Wait until all threads have started and are doing I/O
for event in running_events:
event.wait()
# Restart edenfs
for n in range(num_restarts):
print('=== beginning restart %d ===' % n, file=sys.stderr)
self.eden.graceful_restart()
print('=== restart %d complete ===' % n, file=sys.stderr)
finally:
stop.set()
for thread in threads:
thread.join()
def test_takeover_preserves_inode_numbers_for_open_nonmaterialized_files(
self
):
hello = os.path.join(self.mount, 'tree/hello')
fd = os.open(hello, os.O_RDONLY)
try:
inode_number = os.fstat(fd).st_ino
self.eden.graceful_restart()
self.assertEqual(inode_number, os.fstat(fd).st_ino)
finally:
os.close(fd)
fd = os.open(hello, os.O_RDONLY)
try:
self.assertEqual(inode_number, os.fstat(fd).st_ino)
finally:
os.close(fd)