pathmatcher: allow errors in match function definition

Summary:
The most common scenario where we see matcher errors is when we iterate through
a manifest and the user sends SIGTERM to the process. The matcher may be both
Rust and Python code. The Python code handles the interrupt and prevents future
function calls. The iterating Rust code will continue to call matcher functions
through this time so we get matcher errors from the terminated Python stack.

As long as we have Python matcher code, errors are valid.
It is unclear to me whether the matcher trait should have `Result` return
values when all implementations are Rust. It is easy to imagine implementations
that can fail in different circumstances but the ones that we use if we just
port the Python code wouldn't fail.
All in all, I think that this is a reasonable step forward.

Reviewed By: quark-zju

Differential Revision: D25697099

fbshipit-source-id: f61c80bd0a8caa58040a447ed02d48a1ae84ad60
This commit is contained in:
Stefan Filip 2021-01-07 16:21:11 -08:00 committed by Facebook GitHub Bot
parent 9c3d099697
commit 02606da6c5
13 changed files with 125 additions and 102 deletions

View File

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2018"
[dependencies]
anyhow = "1.0"
cpython-ext = { path = "../../../../lib/cpython-ext", default-features = false }
cpython = { version = "0.5", default-features = false }
pathmatcher = { path = "../../../../lib/pathmatcher" }

View File

@ -10,9 +10,10 @@
use std::path::Path;
use cpython::*;
use cpython_ext::error::ResultPyErrExt;
use cpython_ext::error::{AnyhowResultExt, ResultPyErrExt};
use cpython_ext::{PyPath, PyPathBuf, Str};
use anyhow::Result;
use pathmatcher::{DirectoryMatch, GitignoreMatcher, Matcher, TreeMatcher};
use types::RepoPath;
@ -97,12 +98,12 @@ impl<'a> PythonMatcher<'a> {
}
impl<'a> Matcher for PythonMatcher<'a> {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
matches_directory_impl(self.py, &self.py_matcher, &path)
fn matches_directory(&self, path: &RepoPath) -> Result<DirectoryMatch> {
matches_directory_impl(self.py, &self.py_matcher, &path).into_anyhow_result()
}
fn matches_file(&self, path: &RepoPath) -> bool {
matches_file_impl(self.py, &self.py_matcher, &path)
fn matches_file(&self, path: &RepoPath) -> Result<bool> {
matches_file_impl(self.py, &self.py_matcher, &path).into_anyhow_result()
}
}
@ -129,29 +130,31 @@ impl Clone for UnsafePythonMatcher {
}
impl<'a> Matcher for UnsafePythonMatcher {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
fn matches_directory(&self, path: &RepoPath) -> Result<DirectoryMatch> {
let assumed_py = unsafe { Python::assume_gil_acquired() };
matches_directory_impl(assumed_py, &self.py_matcher, &path)
matches_directory_impl(assumed_py, &self.py_matcher, &path).into_anyhow_result()
}
fn matches_file(&self, path: &RepoPath) -> bool {
fn matches_file(&self, path: &RepoPath) -> Result<bool> {
let assumed_py = unsafe { Python::assume_gil_acquired() };
matches_file_impl(assumed_py, &self.py_matcher, &path)
matches_file_impl(assumed_py, &self.py_matcher, &path).into_anyhow_result()
}
}
fn matches_directory_impl(py: Python, py_matcher: &PyObject, path: &RepoPath) -> DirectoryMatch {
fn matches_directory_impl(
py: Python,
py_matcher: &PyObject,
path: &RepoPath,
) -> PyResult<DirectoryMatch> {
let py_path = PyPathBuf::from(path);
// PANICS! The interface in Rust doesn't expose exceptions. Unwrapping seems fine since
// it crashes the rust stuff and returns a rust exception to Python.
let py_value = py_matcher
.call_method(py, "visitdir", (py_path,), None)
.unwrap();
let py_value = py_matcher.call_method(py, "visitdir", (py_path,), None)?;
let is_all = PyString::extract(py, &py_value)
.and_then(|py_str| py_str.to_string(py).map(|s| s == "all"))
.unwrap_or(false);
if is_all {
let matches = if is_all {
DirectoryMatch::Everything
} else {
if py_value.is_true(py).unwrap() {
@ -159,16 +162,14 @@ fn matches_directory_impl(py: Python, py_matcher: &PyObject, path: &RepoPath) ->
} else {
DirectoryMatch::Nothing
}
}
};
Ok(matches)
}
fn matches_file_impl(py: Python, py_matcher: &PyObject, path: &RepoPath) -> bool {
fn matches_file_impl(py: Python, py_matcher: &PyObject, path: &RepoPath) -> PyResult<bool> {
let py_path = PyPathBuf::from(path);
// PANICS! The interface in Rust doesn't expose exceptions. Unwrapping seems fine since
// it crashes the rust stuff and returns a rust exception to Python.
py_matcher
.call(py, (py_path,), None)
.unwrap()
.is_true(py)
.unwrap()
let matches = py_matcher.call(py, (py_path,), None)?.is_true(py)?;
Ok(matches)
}

View File

@ -7,8 +7,6 @@ edition = "2018"
anyhow = "1.0.20"
cpython = { version = "0.5", default-features = false }
cpython-ext = { path = "../../../../lib/cpython-ext", default-features = false }
pathmatcher = { path = "../../../../lib/pathmatcher" }
pypathmatcher = { path = "../pypathmatcher" }
pytreestate = { path = "../pytreestate" }
types = { path = "../../../../lib/types" }
workingcopy = { path = "../../../../lib/workingcopy" }

View File

@ -42,7 +42,9 @@ py_class!(class physicalfilesystem |py| {
let fs = self.filesystem(py);
let treestate = pytreestate.get_state(py);
let last_write = last_write.into();
let pending = fs.borrow().pending_changes(treestate, matcher, include_directories, last_write);
let pending = fs.borrow()
.pending_changes(treestate, matcher, include_directories, last_write)
.map_pyerr(py)?;
pendingchanges::create_instance(py, RefCell::new(pending))
}
});
@ -80,7 +82,8 @@ py_class!(class walker |py| {
data _errors: RefCell<Vec<Error>>;
def __new__(_cls, root: PyPathBuf, pymatcher: PyObject, include_directories: bool) -> PyResult<walker> {
let matcher = UnsafePythonMatcher::new(pymatcher);
walker::create_instance(py, RefCell::new(Walker::new(root.to_path_buf(), matcher, include_directories)), RefCell::new(Vec::new()))
let walker = Walker::new(root.to_path_buf(), matcher, include_directories).map_pyerr(py)?;
walker::create_instance(py, RefCell::new(walker), RefCell::new(Vec::new()))
}
def __iter__(&self) -> PyResult<Self> {

View File

@ -206,21 +206,21 @@ fn diff_single<'a>(
) -> Result<Vec<DiffEntry>> {
let (files, dirs) = dir.list(store)?;
let items = dirs
.into_iter()
.filter(|d| matcher.matches_directory(&d.path) != DirectoryMatch::Nothing)
.map(|d| DiffItem::Single(d, side));
next.extend(items);
let entries = files
.into_iter()
.filter(|f| matcher.matches_file(&f.path))
.map(|f| match side {
Side::Left => DiffEntry::left(f),
Side::Right => DiffEntry::right(f),
})
.collect();
for d in dirs.into_iter() {
if matcher.matches_directory(&d.path)? != DirectoryMatch::Nothing {
next.push_back(DiffItem::Single(d, side));
}
}
let mut entries = Vec::new();
for f in files.into_iter() {
if matcher.matches_file(&f.path)? {
let entry = match side {
Side::Left => DiffEntry::left(f),
Side::Right => DiffEntry::right(f),
};
entries.push(entry);
}
}
Ok(entries)
}
@ -239,8 +239,8 @@ fn diff<'a>(
) -> Result<Vec<DiffEntry>> {
let (lfiles, ldirs) = left.list(lstore)?;
let (rfiles, rdirs) = right.list(rstore)?;
next.extend(diff_dirs(ldirs, rdirs, matcher));
Ok(diff_files(lfiles, rfiles, matcher))
next.extend(diff_dirs(ldirs, rdirs, matcher)?);
diff_files(lfiles, rfiles, matcher)
}
/// Given two sorted file lists, return diff entries for non-matching files.
@ -248,13 +248,14 @@ fn diff_files<'a>(
lfiles: Vec<File>,
rfiles: Vec<File>,
matcher: &'a dyn Matcher,
) -> Vec<DiffEntry> {
) -> Result<Vec<DiffEntry>> {
let mut output = Vec::new();
let mut add_to_output = |entry: DiffEntry| {
if matcher.matches_file(&entry.path) {
let mut add_to_output = |entry: DiffEntry| -> Result<()> {
if matcher.matches_file(&entry.path)? {
output.push(entry);
}
Ok(())
};
debug_assert!(is_sorted(&lfiles));
@ -269,30 +270,30 @@ fn diff_files<'a>(
match (lfile, rfile) {
(Some(l), Some(r)) => match l.path.cmp(&r.path) {
Ordering::Less => {
add_to_output(DiffEntry::left(l));
add_to_output(DiffEntry::left(l))?;
lfile = lfiles.next();
rfile = Some(r);
}
Ordering::Greater => {
add_to_output(DiffEntry::right(r));
add_to_output(DiffEntry::right(r))?;
lfile = Some(l);
rfile = rfiles.next();
}
Ordering::Equal => {
if l.meta != r.meta {
add_to_output(DiffEntry::changed(l, r));
add_to_output(DiffEntry::changed(l, r))?;
}
lfile = lfiles.next();
rfile = rfiles.next();
}
},
(Some(l), None) => {
add_to_output(DiffEntry::left(l));
add_to_output(DiffEntry::left(l))?;
lfile = lfiles.next();
rfile = None;
}
(None, Some(r)) => {
add_to_output(DiffEntry::right(r));
add_to_output(DiffEntry::right(r))?;
lfile = None;
rfile = rfiles.next();
}
@ -300,7 +301,7 @@ fn diff_files<'a>(
}
}
output
Ok(output)
}
/// Given two sorted directory lists, return diff items for non-matching directories.
@ -308,13 +309,14 @@ fn diff_dirs<'a>(
ldirs: Vec<DirLink<'a>>,
rdirs: Vec<DirLink<'a>>,
matcher: &'a dyn Matcher,
) -> Vec<DiffItem<'a>> {
) -> Result<Vec<DiffItem<'a>>> {
let mut output = Vec::new();
let mut add_to_output = |item: DiffItem<'a>| {
if matcher.matches_directory(item.path()) != DirectoryMatch::Nothing {
let mut add_to_output = |item: DiffItem<'a>| -> Result<()> {
if matcher.matches_directory(item.path())? != DirectoryMatch::Nothing {
output.push(item);
}
Ok(())
};
debug_assert!(is_sorted(&ldirs));
@ -329,12 +331,12 @@ fn diff_dirs<'a>(
match (ldir, rdir) {
(Some(l), Some(r)) => match l.path.cmp(&r.path) {
Ordering::Less => {
add_to_output(DiffItem::left(l));
add_to_output(DiffItem::left(l))?;
ldir = ldirs.next();
rdir = Some(r);
}
Ordering::Greater => {
add_to_output(DiffItem::right(r));
add_to_output(DiffItem::right(r))?;
ldir = Some(l);
rdir = rdirs.next();
}
@ -344,19 +346,19 @@ fn diff_dirs<'a>(
// have not yet been persisted), in which case we must manually compare
// all of the entries since we can't tell if they are the same.
if l.hgid() != r.hgid() || l.hgid().is_none() {
add_to_output(DiffItem::Changed(l, r));
add_to_output(DiffItem::Changed(l, r))?;
}
ldir = ldirs.next();
rdir = rdirs.next();
}
},
(Some(l), None) => {
add_to_output(DiffItem::left(l));
add_to_output(DiffItem::left(l))?;
ldir = ldirs.next();
rdir = None;
}
(None, Some(r)) => {
add_to_output(DiffItem::right(r));
add_to_output(DiffItem::right(r))?;
ldir = None;
rdir = rdirs.next();
}
@ -364,7 +366,7 @@ fn diff_dirs<'a>(
}
}
output
Ok(output)
}
fn is_sorted<T: Ord>(iter: impl IntoIterator<Item = T>) -> bool {
@ -478,7 +480,7 @@ mod tests {
];
let matcher = AlwaysMatcher::new();
let entries = diff_files(lfiles, rfiles, &matcher);
let entries = diff_files(lfiles, rfiles, &matcher).unwrap();
let expected = vec![
DiffEntry::new(
repo_path_buf("b"),

View File

@ -79,8 +79,10 @@ impl<'a> Iterator for BfsIter<'a> {
for (component, link) in children.iter() {
let mut child_path = path.clone();
child_path.push(component.as_ref());
if link.matches(&self.matcher, &child_path) {
self.queue.push_back((child_path, &link));
match link.matches(&self.matcher, &child_path) {
Ok(true) => self.queue.push_back((child_path, &link)),
Ok(false) => {}
Err(e) => return Some(Err(e)),
}
}
Some(Ok((path, FsNodeMetadata::Directory(hgid))))

View File

@ -94,11 +94,11 @@ impl Link {
}
}
pub fn matches(&self, matcher: &impl Matcher, path: &RepoPath) -> bool {
pub fn matches(&self, matcher: &impl Matcher, path: &RepoPath) -> Result<bool> {
match self {
Link::Leaf(_) => matcher.matches_file(path),
Link::Durable(_) | Link::Ephemeral(_) => {
matcher.matches_directory(path) != DirectoryMatch::Nothing
Ok(matcher.matches_directory(path)? != DirectoryMatch::Nothing)
}
}
}

View File

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2018"
[dependencies]
anyhow = "1.0"
bitflags = "1.0"
globset = "0.4.2"
ignore = "0.4"

View File

@ -9,6 +9,7 @@ use std::cell::RefCell;
use std::collections::HashMap;
use std::path::{Component, Path, PathBuf};
use anyhow::Result;
use ignore::{
self,
gitignore::{self, Glob},
@ -301,16 +302,17 @@ impl Explain {
}
impl Matcher for GitignoreMatcher {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
match self.match_path(path.as_str(), true, self, &mut None) {
fn matches_directory(&self, path: &RepoPath) -> Result<DirectoryMatch> {
let dm = match self.match_path(path.as_str(), true, self, &mut None) {
MatchResult::Ignored => DirectoryMatch::Everything,
MatchResult::Included => DirectoryMatch::Nothing,
MatchResult::Unspecified => DirectoryMatch::ShouldTraverse,
}
};
Ok(dm)
}
fn matches_file(&self, path: &RepoPath) -> bool {
self.match_relative(path.as_str(), false)
fn matches_file(&self, path: &RepoPath) -> Result<bool> {
Ok(self.match_relative(path.as_str(), false))
}
}

View File

@ -11,6 +11,8 @@ mod utils;
use std::ops::Deref;
use anyhow::Result;
use types::RepoPath;
/// Limits the set of files to be operated on.
@ -19,11 +21,11 @@ pub trait Matcher {
/// It allows for fast paths where whole subtrees are skipped.
/// It should be noted that the DirectoryMatch::ShouldTraverse return value is always correct.
/// Other values enable fast code paths only (performance).
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch;
fn matches_directory(&self, path: &RepoPath) -> Result<DirectoryMatch>;
/// Returns true when the file path should be kept in the file set and returns false when
/// it has to be removed.
fn matches_file(&self, path: &RepoPath) -> bool;
fn matches_file(&self, path: &RepoPath) -> Result<bool>;
}
/// Allows for fast code paths when dealing with patterns selecting directories.
@ -41,11 +43,11 @@ pub enum DirectoryMatch {
}
impl<T: Matcher + ?Sized, U: Deref<Target = T>> Matcher for U {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
fn matches_directory(&self, path: &RepoPath) -> Result<DirectoryMatch> {
T::matches_directory(self, path)
}
fn matches_file(&self, path: &RepoPath) -> bool {
fn matches_file(&self, path: &RepoPath) -> Result<bool> {
T::matches_file(self, path)
}
}
@ -59,11 +61,11 @@ impl AlwaysMatcher {
}
impl Matcher for AlwaysMatcher {
fn matches_directory(&self, _path: &RepoPath) -> DirectoryMatch {
DirectoryMatch::Everything
fn matches_directory(&self, _path: &RepoPath) -> Result<DirectoryMatch> {
Ok(DirectoryMatch::Everything)
}
fn matches_file(&self, _path: &RepoPath) -> bool {
true
fn matches_file(&self, _path: &RepoPath) -> Result<bool> {
Ok(true)
}
}
@ -76,11 +78,11 @@ impl NeverMatcher {
}
impl Matcher for NeverMatcher {
fn matches_directory(&self, _path: &RepoPath) -> DirectoryMatch {
DirectoryMatch::Nothing
fn matches_directory(&self, _path: &RepoPath) -> Result<DirectoryMatch> {
Ok(DirectoryMatch::Nothing)
}
fn matches_file(&self, _path: &RepoPath) -> bool {
false
fn matches_file(&self, _path: &RepoPath) -> Result<bool> {
Ok(false)
}
}

View File

@ -9,6 +9,7 @@
//!
//! [TreeMatcher] is the main structure.
use anyhow::Result;
use bitflags::bitflags;
use globset::{Glob, GlobBuilder, GlobSet, GlobSetBuilder};
use std::path::Path;
@ -235,16 +236,17 @@ impl TreeMatcher {
}
impl Matcher for TreeMatcher {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
match self.match_recursive(path.as_str()) {
fn matches_directory(&self, path: &RepoPath) -> Result<DirectoryMatch> {
let dm = match self.match_recursive(path.as_str()) {
Some(true) => DirectoryMatch::Everything,
Some(false) => DirectoryMatch::Nothing,
None => DirectoryMatch::ShouldTraverse,
}
};
Ok(dm)
}
fn matches_file(&self, path: &RepoPath) -> bool {
self.matches(path.as_str())
fn matches_file(&self, path: &RepoPath) -> Result<bool> {
Ok(self.matches(path.as_str()))
}
}

View File

@ -77,9 +77,9 @@ impl PhysicalFileSystem {
matcher: M,
include_directories: bool,
last_write: HgModifiedTime,
) -> PendingChanges<M> {
let walker = Walker::new(self.vfs.root().to_path_buf(), matcher.clone(), false);
PendingChanges {
) -> Result<PendingChanges<M>> {
let walker = Walker::new(self.vfs.root().to_path_buf(), matcher.clone(), false)?;
let pending_changes = PendingChanges {
vfs: self.vfs.clone(),
walker,
matcher,
@ -90,7 +90,8 @@ impl PhysicalFileSystem {
lookups: vec![],
tree_iter: None,
last_write,
}
};
Ok(pending_changes)
}
}
@ -253,8 +254,15 @@ impl<M: Matcher + Clone> PendingChanges<M> {
let tracked = tracked.unwrap();
for path in tracked.into_iter() {
if self.seen.contains(&path) || !self.matcher.matches_file(&path) {
continue;
if self.seen.contains(&path) {
match self.matcher.matches_file(&path) {
Err(e) => {
results.push(Err(e));
continue;
}
Ok(false) => continue,
Ok(true) => {}
}
}
// If it's behind a symlink consider it deleted.

View File

@ -80,18 +80,19 @@ impl<M> Walker<M>
where
M: Matcher,
{
pub fn new(root: PathBuf, matcher: M, include_directories: bool) -> Self {
pub fn new(root: PathBuf, matcher: M, include_directories: bool) -> Result<Self> {
let mut dir_matches = vec![];
if matcher.matches_directory(&RepoPathBuf::new()) != DirectoryMatch::Nothing {
if matcher.matches_directory(&RepoPathBuf::new())? != DirectoryMatch::Nothing {
dir_matches.push(RepoPathBuf::new());
}
Walker {
let walker = Walker {
root,
dir_matches,
results: Vec::new(),
matcher,
include_directories,
}
};
Ok(walker)
}
fn match_entry(&mut self, next_dir: &RepoPathBuf, entry: DirEntry) -> Result<()> {
@ -110,7 +111,7 @@ where
let mut candidate_path = next_dir.clone();
candidate_path.push(filename);
if filetype.is_file() || filetype.is_symlink() {
if self.matcher.matches_file(candidate_path.as_repo_path()) {
if self.matcher.matches_file(candidate_path.as_repo_path())? {
self.results
.push(Ok(WalkEntry::File(candidate_path, entry.metadata()?)));
}
@ -118,12 +119,12 @@ where
if filename.as_str() != ".hg"
&& self
.matcher
.matches_directory(candidate_path.as_repo_path())
.matches_directory(candidate_path.as_repo_path())?
!= DirectoryMatch::Nothing
{
self.dir_matches.push(candidate_path);
}
} else if self.matcher.matches_file(candidate_path.as_repo_path()) {
} else if self.matcher.matches_file(candidate_path.as_repo_path())? {
return Err(WalkError::InvalidFileType(filename.to_owned()).into());
}
Ok(())
@ -202,7 +203,7 @@ mod tests {
let files = vec!["dirA/a.txt", "dirA/b.txt", "dirB/dirC/dirD/c.txt"];
let root_dir = create_directory(&directories, &files)?;
let root_path = PathBuf::from(root_dir.path());
let walker = Walker::new(root_path, AlwaysMatcher::new(), false);
let walker = Walker::new(root_path, AlwaysMatcher::new(), false)?;
let walked_files: Result<Vec<_>> = walker.collect();
let walked_files = walked_files?;
assert_eq!(walked_files.len(), 3);
@ -218,7 +219,7 @@ mod tests {
let files = vec!["dirA/a.txt", "b.txt"];
let root_dir = create_directory(&directories, &files)?;
let root_path = PathBuf::from(root_dir.path());
let walker = Walker::new(root_path, NeverMatcher::new(), false);
let walker = Walker::new(root_path, NeverMatcher::new(), false)?;
let walked_files: Vec<_> = walker.collect();
assert!(walked_files.is_empty());
Ok(())