workingcopy: add a file metadata abstraction

Summary: Add a Metadata type to abstract across the various places we get file metadata from. This doesn't buy us much right now, but will allow us to easily inject file metadata from Watchman into our file comparison logic.

Reviewed By: quark-zju

Differential Revision: D44033617

fbshipit-source-id: ed08e44df9ff44bb23e8fff031d431c4a519eaa4
This commit is contained in:
Muir Manders 2023-03-27 19:03:49 -07:00 committed by Facebook GitHub Bot
parent c7b7ddcd61
commit cd760409ef
7 changed files with 210 additions and 93 deletions

View File

@ -14,7 +14,5 @@ pub use util::lock::PathLock;
pub use crate::async_vfs::AsyncVfsWriter;
pub use crate::pathauditor::AuditError;
pub use crate::pathauditor::PathAuditor;
pub use crate::vfs::is_executable;
pub use crate::vfs::is_symlink;
pub use crate::vfs::UpdateFlag;
pub use crate::vfs::VFS;

View File

@ -492,35 +492,6 @@ fn metadata_eq(m1: &Metadata, m2: &Metadata) -> Result<bool> {
&& m1.file_type() == m2.file_type())
}
#[cfg(windows)]
pub fn is_executable(_metadata: &Metadata) -> bool {
panic!("is_executable is not supported on Windows");
}
#[cfg(unix)]
pub fn is_executable(metadata: &Metadata) -> bool {
// Symlinks show as executable, but don't be fooled. "executable"
// and "symlink" are mutually exclusive in the manifest, so it is
// just confusing if we have filesystem metadata entries that
// claim to be both.
if is_symlink(metadata) {
return false;
}
metadata.permissions().mode() & 0o111 != 0
}
pub fn is_symlink(metadata: &Metadata) -> bool {
#[cfg(unix)]
return metadata.file_type().is_symlink();
#[cfg(target_os = "windows")]
{
let _ = metadata;
panic!("is_symlink is not supported on Windows");
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -8,6 +8,7 @@ edition = "2021"
[dependencies]
anyhow = "1.0.65"
async-runtime = { version = "0.1.0", path = "../async-runtime" }
bitflags = "1.3"
configmodel = { version = "0.1.0", path = "../config/model" }
crossbeam = "0.8"
edenfs_client = { version = "0.1.0", path = "../edenfs-client", optional = true }

View File

@ -6,7 +6,6 @@
*/
use std::collections::HashMap;
use std::fs::Metadata;
use std::sync::Arc;
use anyhow::anyhow;
@ -16,7 +15,6 @@ use crossbeam::channel::Receiver;
use crossbeam::channel::SendError;
use crossbeam::channel::Sender;
use futures::StreamExt;
use manifest::FileType;
use manifest::Manifest;
use manifest_tree::TreeManifest;
use parking_lot::RwLock;
@ -29,17 +27,15 @@ use treestate::filestate::StateFlags;
use types::Key;
use types::RepoPath;
use types::RepoPathBuf;
use vfs::is_executable;
use vfs::is_symlink;
use vfs::VFS;
use crate::filesystem::ChangeType;
use crate::metadata::HgModifiedTime;
use crate::walker::WalkError;
use crate::metadata::Metadata;
pub type ArcReadFileContents = Arc<dyn ReadFileContents<Error = anyhow::Error> + Send + Sync>;
pub enum FileChangeResult {
pub(crate) enum FileChangeResult {
Yes(ChangeType),
No,
Maybe(Metadata),
@ -67,7 +63,9 @@ impl ResolvedFileChangeResult {
}
}
pub trait FileChangeDetectorTrait: IntoIterator<Item = Result<ResolvedFileChangeResult>> {
pub(crate) trait FileChangeDetectorTrait:
IntoIterator<Item = Result<ResolvedFileChangeResult>>
{
fn submit(&mut self, state: Option<FileStateV2>, path: &RepoPath);
fn total_work_hint(&self, _hint: u64) {}
}
@ -103,7 +101,7 @@ impl FileChangeDetector {
const NEED_CHECK: StateFlags = StateFlags::NEED_CHECK;
const EXIST_P1: StateFlags = StateFlags::EXIST_P1;
pub fn file_changed_given_metadata(
pub(crate) fn file_changed_given_metadata(
vfs: &VFS,
path: &RepoPath,
last_write: HgModifiedTime,
@ -111,7 +109,7 @@ pub fn file_changed_given_metadata(
state: Option<FileStateV2>,
) -> Result<FileChangeResult> {
// First handle when metadata is None (i.e. file doesn't exist).
let (metadata, state) = match (metadata, state) {
let (fs_meta, state) = match (metadata, state) {
// File was untracked during crawl but no longer exists.
(None, None) => {
tracing::trace!(?path, "neither on disk nor in treestate");
@ -138,7 +136,7 @@ pub fn file_changed_given_metadata(
// Don't check EXIST_P2. If file is only in P2 we want to report "changed"
// even if its contents happen to match an untracked file on disk.
let in_parent = matches!(&state, Some(s) if s.state.intersects(EXIST_P1));
let is_trackable_file = metadata.is_file() || metadata.is_symlink();
let is_trackable_file = fs_meta.is_file(vfs) || fs_meta.is_symlink(vfs);
let state = match (in_parent, is_trackable_file) {
// If the file is not valid (e.g. a directory or a weird file like
@ -164,6 +162,8 @@ pub fn file_changed_given_metadata(
let flags = state.state;
let ts_meta: Metadata = state.into();
// If working copy file size or flags are different from what is in treestate, it has changed.
// Note: state.size is i32 since Mercurial uses negative numbers to indicate special files.
// A -1 indicates the file is either in a merge state or a lookup state.
@ -173,31 +173,29 @@ pub fn file_changed_given_metadata(
// Regardless, if the size is negative, we'll do a lookup comparison since we can't
// determine if the file has changed relative to p1. This logic is a mess and we should get
// rid of all these negative numbers.
let valid_size = state.size >= 0;
if valid_size {
let size_different = metadata.len() != state.size.try_into().unwrap_or(std::u64::MAX);
let exec_different =
vfs.supports_executables() && is_executable(&metadata) != state.is_executable();
let symlink_different =
vfs.supports_symlinks() && is_symlink(&metadata) != state.is_symlink();
if let Some(ts_size) = ts_meta.len() {
let size_different = fs_meta.len() != Some(ts_size);
let exec_different = fs_meta.is_executable(vfs) != ts_meta.is_executable(vfs);
let symlink_different = fs_meta.is_symlink(vfs) != ts_meta.is_symlink(vfs);
if size_different || exec_different || symlink_different {
tracing::trace!(
?path,
size_different,
exec_different,
symlink_different,
"changed"
"changed (metadata mismatch)"
);
return Ok(FileChangeResult::changed(path.to_owned()));
}
} else {
tracing::trace!(?path, "maybe (no size)");
return Ok(FileChangeResult::Maybe(fs_meta));
}
// If it's marked NEED_CHECK, we always need to do a lookup, regardless of the mtime.
let needs_check = flags.intersects(NEED_CHECK) || !valid_size;
if needs_check {
if flags.intersects(NEED_CHECK) {
tracing::trace!(?path, "maybe (NEED_CHECK)");
return Ok(FileChangeResult::Maybe(metadata));
return Ok(FileChangeResult::Maybe(fs_meta));
}
// If the mtime has changed or matches the last normal() write time, we need to compare the
@ -205,18 +203,17 @@ pub fn file_changed_given_metadata(
// the file is in a lookup state. Since a -1 will always cause the equality comparison
// below to fail and force a lookup, the -1 is handled correctly without special casing. In
// theory all -1 files should be marked NEED_CHECK above (I think).
if state.mtime < 0 {
tracing::trace!(?path, "maybe (mtime < 0)");
return Ok(FileChangeResult::Maybe(metadata));
}
let ts_mtime = match ts_meta.mtime() {
None => {
tracing::trace!(?path, "maybe (no mtime)");
return Ok(FileChangeResult::Maybe(fs_meta));
}
Some(ts) => ts,
};
let state_mtime: Result<HgModifiedTime> = state.mtime.try_into();
let state_mtime = state_mtime.map_err(|e| WalkError::InvalidMTime(path.to_owned(), e))?;
let mtime: HgModifiedTime = metadata.modified()?.into();
if mtime != state_mtime || mtime == last_write {
if Some(ts_mtime) != fs_meta.mtime() || ts_mtime == last_write {
tracing::trace!(?path, "maybe (mtime doesn't match)");
return Ok(FileChangeResult::Maybe(metadata));
return Ok(FileChangeResult::Maybe(fs_meta));
}
tracing::trace!(?path, "no (fallthrough)");
@ -257,7 +254,7 @@ fn compare_repo_bytes_to_disk(
}
impl FileChangeDetector {
pub fn has_changed_with_fresh_metadata(
pub(crate) fn has_changed_with_fresh_metadata(
&mut self,
state: Option<FileStateV2>,
path: &RepoPath,
@ -276,7 +273,7 @@ impl FileChangeDetector {
impl FileChangeDetectorTrait for FileChangeDetector {
fn submit(&mut self, state: Option<FileStateV2>, path: &RepoPath) {
let metadata = match self.vfs.metadata(path) {
Ok(metadata) => Some(metadata),
Ok(metadata) => Some(metadata.into()),
Err(e) => match e.downcast_ref::<std::io::Error>() {
Some(e) if e.kind() == std::io::ErrorKind::NotFound => None,
_ => {
@ -303,24 +300,9 @@ impl FileChangeDetectorTrait for FileChangeDetector {
}
}
fn manifest_flags_mismatch(vfs: &VFS, mf_type: FileType, fs_meta: &Metadata) -> bool {
if vfs.supports_symlinks() {
let is_symlink = is_symlink(fs_meta);
if is_symlink != (mf_type == FileType::Symlink) {
return true;
}
// Ignore executable check since symlinks always appear executable.
if is_symlink {
return false;
}
}
if vfs.supports_executables() && is_executable(fs_meta) != (mf_type == FileType::Executable) {
return true;
}
false
fn manifest_flags_mismatch(vfs: &VFS, mf_meta: Metadata, fs_meta: &Metadata) -> bool {
mf_meta.is_symlink(vfs) != fs_meta.is_symlink(vfs)
|| mf_meta.is_executable(vfs) != fs_meta.is_executable(vfs)
}
// Allows case insensitive tracking of RepoPathBuf->V. We need this because we
@ -377,7 +359,7 @@ impl IntoIterator for FileChangeDetector {
Ok(file) => {
if manifest_flags_mismatch(
&self.vfs,
file.meta.file_type,
file.meta.file_type.into(),
self.lookups.get(&file.path).unwrap(),
) {
tracing::trace!(path=?file.path, "changed (mf flags mismatch disk)");
@ -568,7 +550,7 @@ impl ParallelDetector {
Ok(file) => {
if manifest_flags_mismatch(
&self.vfs,
file.meta.file_type,
file.meta.file_type.into(),
lookups.get(&file.path).unwrap(),
) {
tracing::trace!(path=?file.path, "changed (mf flags mismatch disk)");
@ -623,7 +605,7 @@ impl ParallelDetector {
lookup_send: &Sender<(RepoPathBuf, Metadata)>,
) -> Result<()> {
let metadata = match vfs.metadata(&path) {
Ok(metadata) => Some(metadata),
Ok(metadata) => Some(metadata.into()),
Err(e) => match e.downcast_ref::<std::io::Error>() {
Some(e) if e.kind() == std::io::ErrorKind::NotFound => None,
_ => {

View File

@ -5,13 +5,174 @@
* GNU General Public License version 2.
*/
#[cfg(unix)]
use std::os::unix::prelude::PermissionsExt;
use std::time::SystemTime;
use anyhow::Error;
use anyhow::Result;
use bitflags::bitflags;
use manifest::FileType;
use treestate::filestate::FileStateV2;
use vfs::VFS;
bitflags! {
pub(crate) struct MetadataFlags: u8 {
const IS_SYMLINK = 1 << 0;
const IS_EXEC = 1 << 1;
const IS_REGULAR = 1 << 2;
const HAS_MTIME = 1 << 3;
const HAS_SIZE = 1 << 4;
}
}
/// Metadata abstracts across the different places file metadata can come from.
#[derive(Debug, Clone)]
pub(crate) struct Metadata {
flags: MetadataFlags,
size: u64,
mtime: HgModifiedTime,
}
impl Metadata {
pub fn is_symlink(&self, vfs: &VFS) -> bool {
vfs.supports_symlinks() && self.flags.intersects(MetadataFlags::IS_SYMLINK)
}
pub fn is_executable(&self, vfs: &VFS) -> bool {
vfs.supports_executables() && self.flags.intersects(MetadataFlags::IS_EXEC)
}
pub fn is_file(&self, vfs: &VFS) -> bool {
!self.is_symlink(vfs) && self.flags.intersects(MetadataFlags::IS_REGULAR)
}
pub fn len(&self) -> Option<u64> {
if self.flags.intersects(MetadataFlags::HAS_SIZE) {
Some(self.size)
} else {
None
}
}
pub fn mtime(&self) -> Option<HgModifiedTime> {
if self.flags.intersects(MetadataFlags::HAS_MTIME) {
Some(self.mtime)
} else {
None
}
}
pub fn from_stat(mode: u32, size: u64, mtime: i64) -> Self {
// Watchman sends mode_t even on Windows where they aren't fully
// reflected in libc. Let's just hardcode the values we need.
const S_IFLNK: u32 = 0o120000;
const S_IFMT: u32 = 0o170000;
const S_IFREG: u32 = 0o100000;
let mut flags = MetadataFlags::HAS_SIZE | MetadataFlags::HAS_MTIME;
if mode & S_IFMT == S_IFLNK {
flags |= MetadataFlags::IS_SYMLINK;
} else if mode & 0o111 != 0 {
flags |= MetadataFlags::IS_EXEC;
}
if mode & S_IFMT == S_IFREG {
flags |= MetadataFlags::IS_REGULAR;
}
Self {
flags,
size,
mtime: mask_stat_mtime(mtime),
}
}
}
impl From<FileStateV2> for Metadata {
fn from(s: FileStateV2) -> Self {
let mut flags = MetadataFlags::empty();
let size = match s.size {
size if size < 0 => 0,
size => {
flags |= MetadataFlags::HAS_SIZE;
size as u64
}
};
let mtime = match s.mtime {
m if m < 0 => HgModifiedTime(0),
m => {
flags |= MetadataFlags::HAS_MTIME;
HgModifiedTime(m as u64)
}
};
if s.is_symlink() {
flags |= MetadataFlags::IS_SYMLINK;
} else {
flags |= MetadataFlags::IS_REGULAR;
if s.is_executable() {
flags |= MetadataFlags::IS_EXEC;
}
}
Self { flags, size, mtime }
}
}
impl From<std::fs::Metadata> for Metadata {
fn from(m: std::fs::Metadata) -> Self {
let mut flags = MetadataFlags::HAS_SIZE;
if m.is_symlink() {
flags |= MetadataFlags::IS_SYMLINK;
} else if m.is_file() {
flags |= MetadataFlags::IS_REGULAR;
#[cfg(unix)]
if m.permissions().mode() & 0o111 != 0 {
flags |= MetadataFlags::IS_EXEC;
}
}
let mtime = match m.modified() {
Err(_) => HgModifiedTime(0),
Ok(mtime) => {
flags |= MetadataFlags::HAS_MTIME;
mtime.into()
}
};
Self {
flags,
mtime,
size: m.len(),
}
}
}
impl From<FileType> for Metadata {
fn from(ft: FileType) -> Self {
let flags = match ft {
FileType::Regular => MetadataFlags::IS_REGULAR,
FileType::Executable => MetadataFlags::IS_EXEC | MetadataFlags::IS_REGULAR,
FileType::Symlink => MetadataFlags::IS_SYMLINK,
FileType::GitSubmodule => MetadataFlags::empty(),
};
Self {
flags,
mtime: HgModifiedTime(0),
size: 0,
}
}
}
/// Represents a file modification time in Mercurial, in seconds since the unix epoch.
#[derive(Clone, Copy, PartialEq)]
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct HgModifiedTime(u64);
impl From<u64> for HgModifiedTime {
@ -32,6 +193,13 @@ impl From<u32> for HgModifiedTime {
// the original upstream introduction of this workaround.
const CRAZY_MTIME_MASK: i64 = 0x7FFFFFFF;
fn mask_stat_mtime(mtime: i64) -> HgModifiedTime {
// Handle crazy mtimes by masking into reasonable range. This is what
// dirstate.py does, so we may get some modicum of compatibility by
// using the same approach.
HgModifiedTime((mtime & CRAZY_MTIME_MASK) as u64)
}
impl From<SystemTime> for HgModifiedTime {
fn from(value: SystemTime) -> Self {
let signed_epoch = match value.duration_since(SystemTime::UNIX_EPOCH) {
@ -40,10 +208,7 @@ impl From<SystemTime> for HgModifiedTime {
Err(err) => -(err.duration().as_secs() as i64),
};
// Handle crazy mtimes by masking into reasonable range. This is what
// dirstate.py does, so we may get some modicum of compatibility by
// using the same approach.
HgModifiedTime((signed_epoch & CRAZY_MTIME_MASK) as u64)
mask_stat_mtime(signed_epoch)
}
}

View File

@ -157,7 +157,7 @@ impl<M: Matcher + Clone + Send + Sync + 'static> PendingChanges<M> {
.has_changed_with_fresh_metadata(
ts.normalized_get(path)?,
path,
Some(metadata),
Some(metadata.into()),
)?;
if let FileChangeResult::Yes(change_type) = changed {

View File

@ -371,7 +371,7 @@ fn warn_about_fresh_instance(io: &IO, old_pid: Option<u32>, new_pid: Option<u32>
// figure out all the files that may have changed and check them for
// changes. Also track paths we need to mark or unmark as NEED_CHECK
// in the treestate.
pub fn detect_changes(
pub(crate) fn detect_changes(
matcher: Arc<dyn Matcher + Send + Sync + 'static>,
ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>,
mut file_change_detector: impl FileChangeDetectorTrait + 'static,