filesystem: add filesystem walking to PendingChanges

Summary:
The first phase of pending changes is inspecting the filesystem for
changes. This diff adds that logic.

Reviewed By: xavierd

Differential Revision: D20546909

fbshipit-source-id: 1c2c0fa7f700dbff4acfce4d5271b4472a13571f
This commit is contained in:
Durham Goode 2020-04-24 13:56:51 -07:00 committed by Facebook GitHub Bot
parent be9628b2bc
commit 73a45b695b
6 changed files with 190 additions and 12 deletions

View File

@ -109,6 +109,16 @@ pub struct FileStateV2 {
pub copied: Option<Box<[u8]>>,
}
impl FileStateV2 {
pub fn is_executable(&self) -> bool {
self.mode & 0o100 == 0o100
}
pub fn is_symlink(&self) -> bool {
self.mode & 0o120000 == 0o120000
}
}
#[cfg(test)]
impl rand::distributions::Distribution<FileStateV2> for rand::distributions::Standard {
fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> FileStateV2 {

View File

@ -9,4 +9,4 @@ mod pathauditor;
mod vfs;
pub use crate::pathauditor::PathAuditor;
pub use crate::vfs::{UpdateFlag, VFS};
pub use crate::vfs::{is_executable, is_symlink, UpdateFlag, VFS};

View File

@ -11,6 +11,8 @@ use std::{
path::{Path, PathBuf},
};
use std::fs::Metadata;
#[cfg(not(windows))]
use std::{
fs::{set_permissions, Permissions},
@ -30,7 +32,8 @@ use crate::pathauditor::PathAuditor;
pub struct VFS {
root: PathBuf,
auditor: PathAuditor,
can_symlink: bool,
supports_symlinks: bool,
supports_executables: bool,
}
#[derive(Clone, Copy)]
@ -42,13 +45,16 @@ pub enum UpdateFlag {
impl VFS {
pub fn new(root: PathBuf) -> Result<Self> {
let auditor = PathAuditor::new(&root);
let can_symlink = supports_symlinks(&root)
.with_context(|| format!("Can't construct a VFS for {:?}", root))?;
let fs_type =
fstype(&root).with_context(|| format!("Can't construct a VFS for {:?}", root))?;
let supports_symlinks = supports_symlinks(&fs_type);
let supports_executables = supports_executables(&fs_type);
Ok(Self {
root,
auditor,
can_symlink,
supports_symlinks,
supports_executables,
})
}
@ -136,7 +142,7 @@ impl VFS {
let result = Self::plain_symlink_file(link_name, link_dest);
#[cfg(not(windows))]
let result = if self.can_symlink {
let result = if self.supports_symlinks {
std::os::unix::fs::symlink(link_dest, link_name).map_err(Into::into)
} else {
Self::plain_symlink_file(link_name, link_dest)
@ -202,6 +208,14 @@ impl VFS {
}
Ok(())
}
pub fn supports_symlinks(&self) -> bool {
self.supports_symlinks
}
pub fn supports_executables(&self) -> bool {
self.supports_executables
}
}
/// Since Windows doesn't support symlinks (without Windows' Developer Mode), and NTFS on unices is
@ -210,6 +224,28 @@ impl VFS {
///
/// Once the need to use NTFS on unices is gone (because this module solves the slowness), this
/// hack will be removed.
fn supports_symlinks(path: &Path) -> Result<bool> {
Ok(fstype(path)? != FsType::NTFS)
fn supports_symlinks(fs_type: &FsType) -> bool {
*fs_type != FsType::NTFS
}
/// Since Windows determines if a file is executable based on its extension, it doesn't support
/// marking files as executable.
fn supports_executables(fs_type: &FsType) -> bool {
*fs_type != FsType::NTFS
}
pub fn is_executable(metadata: &Metadata) -> bool {
#[cfg(unix)]
return metadata.permissions().mode() & 0o111 != 0;
#[cfg(target_os = "windows")]
panic!("is_executable is not supported on Windows");
}
pub fn is_symlink(metadata: &Metadata) -> bool {
#[cfg(unix)]
return metadata.file_type().is_symlink();
#[cfg(target_os = "windows")]
panic!("is_symlink is not supported on Windows");
}

View File

@ -8,6 +8,7 @@ anyhow = "1.0.20"
parking_lot = "0.9"
pathmatcher = { path = "../pathmatcher"}
thiserror = "1.0.5"
treestate = { path = "../treestate"}
types = { path = "../types" }
vfs = { path = "../vfs" }

View File

@ -6,15 +6,24 @@
*/
use std::{
collections::HashSet,
convert::{TryFrom, TryInto},
fs::Metadata,
path::PathBuf,
sync::Arc,
time::SystemTime,
};
use anyhow::{Error, Result};
use parking_lot::Mutex;
use pathmatcher::Matcher;
use types::RepoPathBuf;
use vfs::VFS;
use treestate::filestate::StateFlags;
use treestate::treestate::TreeState;
use types::{RepoPath, RepoPathBuf};
use vfs::{is_executable, is_symlink, VFS};
use crate::walker::{WalkEntry, WalkError, Walker};
/// Represents a file modification time in Mercurial, in seconds since the unix epoch.
#[derive(PartialEq)]
@ -61,19 +70,38 @@ impl PhysicalFileSystem {
})
}
pub fn pending_changes<M: Matcher + Clone>(&self, matcher: M) -> PendingChanges<M> {
pub fn pending_changes<M: Matcher + Clone>(
&self,
treestate: Arc<Mutex<TreeState>>,
matcher: M,
include_directories: bool,
last_write: HgModifiedTime,
) -> PendingChanges<M> {
let walker = Walker::new(self.vfs.root().to_path_buf(), matcher.clone(), false);
PendingChanges {
vfs: self.vfs.clone(),
walker,
matcher,
treestate,
stage: PendingChangesStage::Walk,
include_directories,
seen: HashSet::new(),
lookups: vec![],
last_write,
}
}
}
pub struct PendingChanges<M: Matcher + Clone> {
vfs: VFS,
walker: Walker<M>,
matcher: M,
treestate: Arc<Mutex<TreeState>>,
stage: PendingChangesStage,
include_directories: bool,
seen: HashSet<RepoPathBuf>,
lookups: Vec<RepoPathBuf>,
last_write: HgModifiedTime,
}
#[derive(PartialEq)]
@ -106,8 +134,102 @@ pub enum PendingChangeResult {
}
impl<M: Matcher + Clone> PendingChanges<M> {
fn is_changed(&mut self, path: &RepoPath, metadata: &Metadata) -> Result<bool> {
let mut treestate = self.treestate.lock();
let state = treestate.get(path)?;
let state = match state {
Some(state) => state,
// File exists but is not in the treestate (untracked)
None => return Ok(true),
};
// If it's not in P1, (i.e. it's added or untracked) it's considered changed.
let flags = state.state;
let in_parent = flags.intersects(StateFlags::EXIST_P1); // TODO: Also check against P2?
if !in_parent {
return Ok(true);
}
// If working copy file size or flags are different from what is in treestate, it has changed.
// Note: state.size is i32 since Mercurial uses negative numbers to indicate special files.
// A -1 indicates the file is either in a merge state or a lookup state.
// A -2 indicates the file comes from the other parent (and may or may not exist in the
// current parent).
//
// Regardless, if the size is negative, we'll do a lookup comparison since we can't
// determine if the file has changed relative to p1. This logic is a mess and we should get
// rid of all these negative numbers.
let valid_size = state.size >= 0;
if valid_size {
let size_different = metadata.len() != state.size.try_into().unwrap_or(std::u64::MAX);
let exec_different =
self.vfs.supports_executables() && is_executable(metadata) != state.is_executable();
let symlink_different =
self.vfs.supports_symlinks() && is_symlink(metadata) != state.is_symlink();
if size_different || exec_different || symlink_different {
return Ok(true);
}
}
// If it's marked NEED_CHECK, we always need to do a lookup, regardless of the mtime.
let needs_check = flags.intersects(StateFlags::NEED_CHECK) || !valid_size;
if needs_check {
self.lookups.push(path.to_owned());
return Ok(false);
}
// If the mtime has changed or matches the last normal() write time, we need to compare the
// file contents in the later Lookups phase. mtime can be negative as well. A -1 indicates
// the file is in a lookup state. Since a -1 will always cause the equality comparison
// below to fail and force a lookup, the -1 is handled correctly without special casing. In
// theory all -1 files should be marked NEED_CHECK above (I think).
if state.mtime < 0 {
self.lookups.push(path.to_owned());
} else {
let state_mtime: Result<HgModifiedTime> = state.mtime.try_into();
let state_mtime =
state_mtime.map_err(|e| WalkError::InvalidMTime(path.to_owned(), e))?;
let mtime: HgModifiedTime = metadata.modified()?.try_into()?;
if mtime != state_mtime || mtime == self.last_write {
self.lookups.push(path.to_owned());
}
}
Ok(false)
}
fn next_walk(&mut self) -> Option<Result<PendingChangeResult>> {
None
loop {
match self.walker.next() {
Some(Ok(WalkEntry::File(file, metadata))) => {
let file = normalize(file);
self.seen.insert(file.to_owned());
let changed = match self.is_changed(&file, &metadata) {
Ok(result) => result,
Err(e) => return Some(Err(e)),
};
if changed {
return Some(Ok(PendingChangeResult::File(ChangeType::Changed(file))));
}
}
Some(Ok(WalkEntry::Directory(dir))) => {
if self.include_directories {
let dir = normalize(dir);
return Some(Ok(PendingChangeResult::SeenDirectory(dir)));
}
}
Some(Err(e)) => {
return Some(Err(e));
}
None => {
return None;
}
};
}
}
fn next_tree(&mut self) -> Option<Result<PendingChangeResult>> {
@ -143,3 +265,8 @@ impl<M: Matcher + Clone> Iterator for PendingChanges<M> {
}
}
}
fn normalize(path: RepoPathBuf) -> RepoPathBuf {
// TODO: Support path normalization on case insensitive file systems
path
}

View File

@ -26,6 +26,8 @@ pub enum WalkError {
RepoPathError(String, #[source] ParseError),
#[error("invalid file type at '{0}'")]
InvalidFileType(RepoPathBuf),
#[error("invalid mtime for '{0}': {1}")]
InvalidMTime(RepoPathBuf, #[source] anyhow::Error),
}
impl WalkError {
@ -35,6 +37,7 @@ impl WalkError {
WalkError::IOError(path, _) => path.to_string(),
WalkError::RepoPathError(path, _) => path.to_string(),
WalkError::InvalidFileType(path) => path.to_string(),
WalkError::InvalidMTime(path, _) => path.to_string(),
}
}
@ -44,6 +47,7 @@ impl WalkError {
WalkError::IOError(_, error) => error.to_string(),
WalkError::RepoPathError(_, error) => error.to_string(),
WalkError::InvalidFileType(_) => "invalid file type".to_string(),
WalkError::InvalidMTime(_, error) => format!("invalid mtime - {}", error.to_string()),
}
}
}