Kernel: Support read-only filesystem mounts

This adds support for MS_RDONLY, a mount flag that tells the kernel to disallow
any attempts to write to the newly mounted filesystem. As this flag is
per-mount, and different mounts of the same filesystems (such as in case of bind
mounts) can have different mutability settings, you have to go though a custody
to find out if the filesystem is mounted read-only, instead of just asking the
filesystem itself whether it's inherently read-only.

This also adds a lot of checks we were previously missing; and moves some of
them to happen after more specific checks (such as regular permission checks).

One outstanding hole in this system is sys$mprotect(PROT_WRITE), as there's no
way we can know if the original file description this region has been mounted
from had been opened through a readonly mount point. Currently, we always allow
such sys$mprotect() calls to succeed, which effectively allows anyone to
circumvent the effect of MS_RDONLY. We should solve this one way or another.
This commit is contained in:
Sergey Bugaev 2020-05-28 17:56:25 +03:00 committed by Andreas Kling
parent b905126365
commit fdb71cdf8f
Notes: sideshowbarker 2024-07-19 06:00:51 +09:00
6 changed files with 60 additions and 16 deletions

View File

@ -59,4 +59,12 @@ String Custody::absolute_path() const
return builder.to_string();
}
bool Custody::is_readonly() const
{
if (m_mount_flags & MS_RDONLY)
return true;
return m_inode->fs().is_readonly();
}
}

View File

@ -54,6 +54,7 @@ public:
String absolute_path() const;
int mount_flags() const { return m_mount_flags; }
bool is_readonly() const;
private:
Custody(Custody* parent, const StringView& name, Inode&, int mount_flags);

View File

@ -177,14 +177,15 @@ void VFS::traverse_directory_inode(Inode& dir_inode, Function<bool(const FS::Dir
KResult VFS::utime(StringView path, Custody& base, time_t atime, time_t mtime)
{
auto descriptor_or_error = VFS::the().open(move(path), 0, 0, base);
if (descriptor_or_error.is_error())
return descriptor_or_error.error();
auto& inode = *descriptor_or_error.value()->inode();
if (inode.fs().is_readonly())
return KResult(-EROFS);
auto custody_or_error = VFS::the().resolve_path(move(path), base);
if (custody_or_error.is_error())
return custody_or_error.error();
auto& custody = *custody_or_error.value();
auto& inode = custody.inode();
if (!Process::current->is_superuser() && inode.metadata().uid != Process::current->euid())
return KResult(-EACCES);
if (custody.is_readonly())
return KResult(-EROFS);
int error = inode.set_atime(atime);
if (error)
@ -264,6 +265,12 @@ KResultOr<NonnullRefPtr<FileDescription>> VFS::open(StringView path, int options
descriptor_or_error.value()->set_original_inode({}, inode);
return descriptor_or_error;
}
// Check for read-only FS. Do this after handling preopen FD and devices,
// but before modifying the inode in any way.
if ((options & O_WRONLY) && custody.is_readonly())
return KResult(-EROFS);
if (should_truncate_file) {
inode.truncate(0);
inode.set_mtime(kgettimeofday().tv_sec);
@ -290,6 +297,8 @@ KResult VFS::mknod(StringView path, mode_t mode, dev_t dev, Custody& base)
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
if (parent_custody->is_readonly())
return KResult(-EROFS);
LexicalPath p(path);
dbg() << "VFS::mknod: '" << p.basename() << "' mode=" << mode << " dev=" << dev << " in " << parent_inode.identifier();
@ -310,6 +319,9 @@ KResultOr<NonnullRefPtr<FileDescription>> VFS::create(StringView path, int optio
auto& parent_inode = parent_custody.inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
if (parent_custody.is_readonly())
return KResult(-EROFS);
LexicalPath p(path);
#ifdef VFS_DEBUG
dbg() << "VFS::create: '" << p.basename() << "' in " << parent_inode.identifier();
@ -348,6 +360,8 @@ KResult VFS::mkdir(StringView path, mode_t mode, Custody& base)
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
if (parent_custody->is_readonly())
return KResult(-EROFS);
LexicalPath p(path);
#ifdef VFS_DEBUG
@ -371,6 +385,8 @@ KResult VFS::access(StringView path, int mode, Custody& base)
if (mode & W_OK) {
if (!metadata.may_write(*Process::current))
return KResult(-EACCES);
if (custody.is_readonly())
return KResult(-EROFS);
}
if (mode & X_OK) {
if (!metadata.may_execute(*Process::current))
@ -396,11 +412,11 @@ KResultOr<NonnullRefPtr<Custody>> VFS::open_directory(StringView path, Custody&
KResult VFS::chmod(Custody& custody, mode_t mode)
{
auto& inode = custody.inode();
if (inode.fs().is_readonly())
return KResult(-EROFS);
if (Process::current->euid() != inode.metadata().uid && !Process::current->is_superuser())
return KResult(-EPERM);
if (custody.is_readonly())
return KResult(-EROFS);
// Only change the permission bits.
mode = (inode.mode() & ~04777u) | (mode & 04777u);
@ -449,6 +465,9 @@ KResult VFS::rename(StringView old_path, StringView new_path, Custody& base)
return KResult(-EACCES);
}
if (old_parent_custody->is_readonly() || new_parent_custody->is_readonly())
return KResult(-EROFS);
auto new_basename = LexicalPath(new_path).basename();
if (!new_custody_or_error.is_error()) {
@ -482,9 +501,6 @@ KResult VFS::rename(StringView old_path, StringView new_path, Custody& base)
KResult VFS::chown(Custody& custody, uid_t a_uid, gid_t a_gid)
{
auto& inode = custody.inode();
if (inode.fs().is_readonly())
return KResult(-EROFS);
auto metadata = inode.metadata();
if (Process::current->euid() != metadata.uid && !Process::current->is_superuser())
@ -504,6 +520,9 @@ KResult VFS::chown(Custody& custody, uid_t a_uid, gid_t a_gid)
new_gid = a_gid;
}
if (custody.is_readonly())
return KResult(-EROFS);
dbg() << "VFS::chown(): inode " << inode.identifier() << " <- uid:" << new_uid << " gid:" << new_gid;
if (metadata.is_setuid() || metadata.is_setgid()) {
@ -546,15 +565,15 @@ KResult VFS::link(StringView old_path, StringView new_path, Custody& base)
if (parent_inode.fsid() != old_inode.fsid())
return KResult(-EXDEV);
if (parent_inode.fs().is_readonly())
return KResult(-EROFS);
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
if (old_inode.is_directory())
return KResult(-EPERM);
if (parent_custody->is_readonly())
return KResult(-EROFS);
return parent_inode.add_child(old_inode.identifier(), LexicalPath(new_path).basename(), old_inode.mode());
}
@ -570,6 +589,11 @@ KResult VFS::unlink(StringView path, Custody& base)
if (inode.is_directory())
return KResult(-EISDIR);
// We have just checked that the inode is not a directory, and thus it's not
// the root. So it should have a parent. Note that this would be invalidated
// if we were to support bind-mounting regular files on top of the root.
ASSERT(parent_custody);
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
@ -579,6 +603,9 @@ KResult VFS::unlink(StringView path, Custody& base)
return KResult(-EACCES);
}
if (parent_custody->is_readonly())
return KResult(-EROFS);
auto result = parent_inode.remove_child(LexicalPath(path).basename());
if (result.is_error())
return result;
@ -599,6 +626,8 @@ KResult VFS::symlink(StringView target, StringView linkpath, Custody& base)
auto& parent_inode = parent_custody->inode();
if (!parent_inode.metadata().may_write(*Process::current))
return KResult(-EACCES);
if (parent_custody->is_readonly())
return KResult(-EROFS);
LexicalPath p(linkpath);
dbg() << "VFS::symlink: '" << p.basename() << "' (-> '" << target << "') in " << parent_inode.identifier();
@ -621,8 +650,6 @@ KResult VFS::rmdir(StringView path, Custody& base)
auto& custody = *custody_or_error.value();
auto& inode = custody.inode();
if (inode.fs().is_readonly())
return KResult(-EROFS);
// FIXME: We should return EINVAL if the last component of the path is "."
// FIXME: We should return ENOTEMPTY if the last component of the path is ".."
@ -641,6 +668,9 @@ KResult VFS::rmdir(StringView path, Custody& base)
if (inode.directory_entry_count() != 2)
return KResult(-ENOTEMPTY);
if (custody.is_readonly())
return KResult(-EROFS);
auto result = inode.remove_child(".");
if (result.is_error())
return result;

View File

@ -318,6 +318,9 @@ static bool validate_inode_mmap_prot(const Process& process, int prot, const Ino
return false;
if (map_shared) {
// FIXME: What about readonly filesystem mounts? We cannot make a
// decision here without knowing the mount flags, so we would need to
// keep a Custody or something from mmap time.
if ((prot & PROT_WRITE) && !metadata.may_write(process))
return false;
InterruptDisabler disabler;

View File

@ -52,6 +52,7 @@
#define MS_NOEXEC (1 << 1)
#define MS_NOSUID (1 << 2)
#define MS_BIND (1 << 3)
#define MS_RDONLY (1 << 4)
#define PERF_EVENT_MALLOC 1
#define PERF_EVENT_FREE 2

View File

@ -153,6 +153,7 @@ enum {
#define MS_NOEXEC (1 << 1)
#define MS_NOSUID (1 << 2)
#define MS_BIND (1 << 3)
#define MS_RDONLY (1 << 4)
/*
* We aren't fully compliant (don't support policies, and don't have a wide