2017-11-20 22:34:37 +03:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
#
|
|
|
|
# Copyright (c) 2016-present, Facebook, Inc.
|
|
|
|
# All rights reserved.
|
|
|
|
#
|
|
|
|
# This source code is licensed under the BSD-style license found in the
|
|
|
|
# LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
# of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
import os
|
2018-01-10 09:01:00 +03:00
|
|
|
import resource
|
2017-11-20 22:34:37 +03:00
|
|
|
import sys
|
2018-03-29 08:10:46 +03:00
|
|
|
import threading
|
2017-11-20 22:34:37 +03:00
|
|
|
|
|
|
|
from .lib import testcase
|
|
|
|
|
|
|
|
|
|
|
|
@testcase.eden_repo_test
|
2018-04-05 03:31:25 +03:00
|
|
|
class TakeoverTest(testcase.EdenRepoTest):
|
2017-11-20 22:34:37 +03:00
|
|
|
def populate_repo(self):
|
2018-01-10 09:01:00 +03:00
|
|
|
self.pagesize = resource.getpagesize()
|
|
|
|
self.page1 = "1" * self.pagesize
|
|
|
|
self.page2 = "2" * self.pagesize
|
2018-03-17 00:35:46 +03:00
|
|
|
self.repo.write_file('tree/hello', self.page1 + self.page2)
|
|
|
|
self.repo.write_file('tree/deleted', self.page1 + self.page2)
|
2018-03-21 02:34:08 +03:00
|
|
|
self.repo.write_file('src/main.c', 'hello world')
|
|
|
|
self.commit1 = self.repo.commit('Initial commit.')
|
|
|
|
|
|
|
|
self.repo.write_file('src/main.c', 'hello world v2')
|
|
|
|
self.repo.write_file('src/test/test1.py', 'test1')
|
|
|
|
self.repo.write_file('src/test/test2.py', 'test2')
|
|
|
|
self.commit2 = self.repo.commit('Initial commit.')
|
2017-11-20 22:34:37 +03:00
|
|
|
|
2018-02-09 06:54:18 +03:00
|
|
|
def select_storage_engine(self):
|
|
|
|
''' we need to persist data across restarts '''
|
|
|
|
return 'sqlite'
|
|
|
|
|
2017-11-20 22:34:37 +03:00
|
|
|
def edenfs_logging_settings(self):
|
2018-03-29 08:10:46 +03:00
|
|
|
if self._testMethodName == 'test_takeover_with_io':
|
|
|
|
# test_takeover_with_io causes lots of I/O, so do not enable
|
|
|
|
# verbose logging of I/O operations in this test.
|
|
|
|
return {}
|
2018-01-10 09:00:58 +03:00
|
|
|
return {'eden.strace': 'DBG7', 'eden.fs.fuse': 'DBG7'}
|
2017-11-20 22:34:37 +03:00
|
|
|
|
2018-03-21 02:34:08 +03:00
|
|
|
def do_takeover_test(self):
|
2018-03-17 00:35:46 +03:00
|
|
|
hello = os.path.join(self.mount, 'tree/hello')
|
|
|
|
deleted = os.path.join(self.mount, 'tree/deleted')
|
2018-01-12 23:12:08 +03:00
|
|
|
deleted_local = os.path.join(self.mount, 'deleted-local')
|
|
|
|
|
|
|
|
# To test our handling of unlinked inodes, in addition
|
|
|
|
# to unlinking something that is in the manifest we
|
|
|
|
# need to check that we handle the case of a local
|
|
|
|
# file being deleted to make sure that we cover both
|
|
|
|
# code paths for FileInode.
|
|
|
|
with open(deleted_local, 'w') as dl:
|
|
|
|
dl.write(self.page1)
|
|
|
|
dl.write(self.page2)
|
|
|
|
|
|
|
|
# We'd like to make sure that we do something reasonable
|
|
|
|
# for directories that have been unlinked and that are
|
|
|
|
# still referenced via a file descriptor. Ideally we'd call
|
|
|
|
# opendir() here and then readdir() it after we've performed
|
|
|
|
# the graceful restart, but we can't directly call those
|
|
|
|
# functions from python. The approach used here is to
|
|
|
|
# open a file descriptor to the directory and then try
|
|
|
|
# to stat() it after the restart. Since the directory
|
|
|
|
# has to be empty in order to be unlinked, a readdir
|
|
|
|
# from it wouldn't return any interesting results anyway.
|
|
|
|
deleted_dir = os.path.join(self.mount, 'deleted-dir')
|
|
|
|
os.mkdir(deleted_dir)
|
|
|
|
deleted_dir_fd = os.open(deleted_dir, 0)
|
|
|
|
os.rmdir(deleted_dir)
|
|
|
|
|
|
|
|
with open(hello, 'r') as f, \
|
|
|
|
open(deleted, 'r') as d, \
|
|
|
|
open(deleted_local, 'r') as dl:
|
2018-01-10 09:01:00 +03:00
|
|
|
# Read the first page only (rather than the whole file)
|
|
|
|
# before we restart the process.
|
|
|
|
# This is so that we can check that the kernel really
|
|
|
|
# does call in to us for the second page and that we're
|
|
|
|
# really servicing the read for the second page and that
|
|
|
|
# it isn't just getting served from the kernel buffer cache
|
|
|
|
self.assertEqual(self.page1, f.read(self.pagesize))
|
|
|
|
|
2018-01-12 23:12:08 +03:00
|
|
|
# Let's make sure that unlinked inodes continue to
|
|
|
|
# work appropriately too. We've opened the file
|
|
|
|
# handles and are holding them alive in `d` and `dl`,
|
|
|
|
# so now let's unlink it from the filesystem
|
|
|
|
os.unlink(deleted)
|
|
|
|
os.unlink(deleted_local)
|
|
|
|
|
2018-01-10 09:01:00 +03:00
|
|
|
print('=== beginning restart ===', file=sys.stderr)
|
|
|
|
self.eden.graceful_restart()
|
|
|
|
print('=== restart complete ===', file=sys.stderr)
|
|
|
|
|
|
|
|
# Ensure that our file handle is still live across
|
|
|
|
# the restart boundary
|
|
|
|
f.seek(0)
|
|
|
|
self.assertEqual(self.page1, f.read(self.pagesize))
|
|
|
|
self.assertEqual(self.page2, f.read(self.pagesize))
|
|
|
|
|
2018-01-12 23:12:08 +03:00
|
|
|
# We should be able to read from the `d` file handle
|
|
|
|
# even though we deleted the file from the tree
|
|
|
|
self.assertEqual(self.page1, d.read(self.pagesize))
|
|
|
|
self.assertEqual(self.page2, d.read(self.pagesize))
|
|
|
|
# Likewise for the `dl` file handle
|
|
|
|
self.assertEqual(self.page1, dl.read(self.pagesize))
|
|
|
|
self.assertEqual(self.page2, dl.read(self.pagesize))
|
|
|
|
|
|
|
|
# Now check that the unlinked directory handle still seems
|
|
|
|
# connected. This is difficult to do directly in python;
|
|
|
|
# the directory had to be empty in order to be removed
|
|
|
|
# so even if we could read its directory entries there
|
|
|
|
# wouldn't be anything to read.
|
|
|
|
# Note that os.stat() will throw if the fd is deemed
|
|
|
|
# bad either by the kernel or the eden instance,
|
|
|
|
# so we're just calling it and discarding the return
|
|
|
|
# value.
|
|
|
|
os.stat(deleted_dir_fd)
|
|
|
|
os.close(deleted_dir_fd)
|
|
|
|
|
|
|
|
# Let's also test opening the same file up again,
|
2018-01-10 09:01:00 +03:00
|
|
|
# just to make sure that that is still working after
|
|
|
|
# the graceful restart.
|
2017-11-20 22:34:37 +03:00
|
|
|
with open(hello, 'r') as f:
|
2018-01-10 09:01:00 +03:00
|
|
|
self.assertEqual(self.page1, f.read(self.pagesize))
|
|
|
|
self.assertEqual(self.page2, f.read(self.pagesize))
|
2018-03-17 00:35:46 +03:00
|
|
|
|
2018-03-21 02:34:08 +03:00
|
|
|
def test_takeover(self):
|
|
|
|
return self.do_takeover_test()
|
|
|
|
|
|
|
|
def test_takeover_after_diff_revisions(self):
|
|
|
|
# Make a getScmStatusBetweenRevisions() call to Eden.
|
|
|
|
# Previously this thrift call caused Eden to create temporary inode
|
|
|
|
# objects outside of the normal root inode tree, and this would cause
|
|
|
|
# Eden to crash when shutting down afterwards.
|
|
|
|
with self.get_thrift_client() as client:
|
|
|
|
client.getScmStatusBetweenRevisions(
|
|
|
|
self.mount, self.commit1, self.commit2
|
|
|
|
)
|
|
|
|
|
|
|
|
return self.do_takeover_test()
|
|
|
|
|
2018-03-29 08:10:46 +03:00
|
|
|
def test_takeover_with_io(self):
|
|
|
|
num_threads = 4
|
|
|
|
write_chunk_size = 1024 * 1024
|
|
|
|
max_file_length = write_chunk_size * 100
|
|
|
|
|
|
|
|
# TODO: Setting this higher than 1 currently makes it likely that
|
|
|
|
# edenfs will crash during restart.
|
|
|
|
# There are still some other bugs we need to track down in the restart
|
|
|
|
# ordering.
|
|
|
|
num_restarts = 1
|
|
|
|
|
|
|
|
stop = threading.Event()
|
|
|
|
bufs = [b'x' * write_chunk_size, b'y' * write_chunk_size]
|
|
|
|
|
|
|
|
def do_io(thread_id, running_event):
|
|
|
|
path = os.path.join(
|
|
|
|
self.mount, 'src', 'test', 'data%d.log' % thread_id
|
|
|
|
)
|
|
|
|
with open(path, 'wb') as f:
|
|
|
|
# Use raw file descriptors to avoid going through python's I/O
|
|
|
|
# buffering code.
|
|
|
|
fd = f.fileno()
|
|
|
|
|
|
|
|
buf_idx = 0
|
|
|
|
buf = bufs[buf_idx]
|
|
|
|
offset = 0
|
|
|
|
|
|
|
|
# Repeatedly write and rewrite the same file,
|
|
|
|
#jalternating between two different data buffers.
|
|
|
|
running_event.set()
|
|
|
|
while True:
|
|
|
|
os.pwrite(fd, buf, offset)
|
|
|
|
if stop.is_set():
|
|
|
|
return
|
|
|
|
offset += len(buf)
|
|
|
|
if offset >= max_file_length:
|
|
|
|
buf_idx += 1
|
|
|
|
buf = bufs[buf_idx % len(bufs)]
|
|
|
|
offset = 0
|
|
|
|
|
|
|
|
# Log the mount points device ID at the start of the test
|
|
|
|
# (Just in case anything hangs and we need to abort the mount
|
|
|
|
# using /sys/fs/fuse/connections/<dev>/)
|
|
|
|
st = os.lstat(self.mount)
|
|
|
|
print('=== eden mount device=%d ===' % st.st_dev, file=sys.stderr)
|
|
|
|
|
|
|
|
# Start several threads doing I/O while we we perform a takeover
|
|
|
|
threads = []
|
|
|
|
try:
|
|
|
|
running_events = []
|
|
|
|
for n in range(num_threads):
|
|
|
|
running = threading.Event()
|
|
|
|
thread = threading.Thread(target=do_io, args=(n, running))
|
|
|
|
thread.start()
|
|
|
|
threads.append(thread)
|
|
|
|
running_events.append(running)
|
|
|
|
|
|
|
|
# Wait until all threads have started and are doing I/O
|
|
|
|
for event in running_events:
|
|
|
|
event.wait()
|
|
|
|
|
|
|
|
# Restart edenfs
|
|
|
|
for n in range(num_restarts):
|
|
|
|
print('=== beginning restart %d ===' % n, file=sys.stderr)
|
|
|
|
self.eden.graceful_restart()
|
|
|
|
print('=== restart %d complete ===' % n, file=sys.stderr)
|
|
|
|
finally:
|
|
|
|
stop.set()
|
|
|
|
for thread in threads:
|
|
|
|
thread.join()
|
|
|
|
|
2018-03-21 02:34:08 +03:00
|
|
|
def test_takeover_preserves_inode_numbers_for_open_nonmaterialized_files(
|
|
|
|
self
|
|
|
|
):
|
2018-03-17 00:35:46 +03:00
|
|
|
hello = os.path.join(self.mount, 'tree/hello')
|
|
|
|
|
|
|
|
fd = os.open(hello, os.O_RDONLY)
|
|
|
|
try:
|
|
|
|
inode_number = os.fstat(fd).st_ino
|
|
|
|
|
|
|
|
self.eden.graceful_restart()
|
|
|
|
|
|
|
|
self.assertEqual(inode_number, os.fstat(fd).st_ino)
|
|
|
|
finally:
|
|
|
|
os.close(fd)
|
|
|
|
|
|
|
|
fd = os.open(hello, os.O_RDONLY)
|
|
|
|
try:
|
|
|
|
self.assertEqual(inode_number, os.fstat(fd).st_ino)
|
|
|
|
finally:
|
|
|
|
os.close(fd)
|