workingcopy: add error handling and reporting to rust walker

Summary:
Previously, if the rust walker encountered any issues with individual
files it would stop and throw an exception or in some cases panic. To replace
the python walker we need to handle errors more gracefully, so let's add logic
for reporting which files had issues and what those issues were.

This also contains a minor one-line fix to prevent the walker from traversing
directories that contain a '.hg' directory.

Reviewed By: quark-zju

Differential Revision: D19376540

fbshipit-source-id: ede40f2b31aa1e856c564dd9956b9f593f21cf27
This commit is contained in:
Durham Goode 2020-01-13 15:24:17 -08:00 committed by Facebook Github Bot
parent aa64745a35
commit 1c573e7569
5 changed files with 83 additions and 16 deletions

View File

@ -212,6 +212,11 @@ class physicalfilesystem(object):
st = util.lstat(join(fn))
yield fn, st
for path, walkerror in walker.errors():
path = encoding.unitolocal(path)
walkerror = encoding.unitolocal(walkerror)
match.bad(path, walkerror)
@util.timefunction("fswalk", 0, "ui")
def _walk(self, match, listignored=False):
join = self.opener.join

View File

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2018"
[dependencies]
anyhow = "1.0.20"
cpython = { version = "0.3", features = ["python27-sys"], default-features = false }
cpython-ext = { path = "../../../../lib/cpython-ext" }
encoding = { path = "../../../../lib/encoding" }

View File

@ -9,12 +9,13 @@
use std::cell::RefCell;
use anyhow::Error;
use cpython::*;
use cpython_ext::ResultPyErrExt;
use encoding::{local_bytes_to_path, repo_path_to_local_bytes};
use pypathmatcher::UnsafePythonMatcher;
use workingcopy::Walker;
use workingcopy::{WalkError, Walker};
pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
let name = [package, "workingcopy"].join(".");
@ -25,12 +26,13 @@ pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
py_class!(class walker |py| {
data walker: RefCell<Walker<UnsafePythonMatcher>>;
data _errors: RefCell<Vec<Error>>;
def __new__(_cls, root: PyBytes, pymatcher: PyObject) -> PyResult<walker> {
let root = local_bytes_to_path(root.data(py))
.map_pyerr(py)?
.to_path_buf();
let matcher = UnsafePythonMatcher::new(pymatcher);
walker::create_instance(py, RefCell::new(Walker::new(root, matcher)))
walker::create_instance(py, RefCell::new(Walker::new(root, matcher)), RefCell::new(Vec::new()))
}
def __iter__(&self) -> PyResult<Self> {
@ -38,11 +40,24 @@ py_class!(class walker |py| {
}
def __next__(&self) -> PyResult<Option<PyBytes>> {
let item = match self.walker(py).borrow_mut().next() {
Some(path) => Some(PyBytes::new(py, repo_path_to_local_bytes(path.map_pyerr(py)?.as_ref()))),
None => None,
};
Ok(item)
loop {
match self.walker(py).borrow_mut().next() {
Some(Ok(path)) => {
return Ok(Some(PyBytes::new(py, repo_path_to_local_bytes(path.as_ref()))));
},
Some(Err(e)) => {
self._errors(py).borrow_mut().push(e)
},
None => return Ok(None),
};
}
}
def errors(&self) -> PyResult<Vec<(String, String)>> {
Ok(self._errors(py).borrow().iter().map(|e| match e.downcast_ref::<WalkError>() {
Some(e) => (e.filename(), e.message()),
None => ("unknown".to_string(), e.to_string()),
}).collect::<Vec<(String, String)>>())
}
});

View File

@ -6,6 +6,7 @@ edition = "2018"
[dependencies]
anyhow = "1.0.20"
pathmatcher = { path = "../pathmatcher"}
thiserror = "1.0.5"
types = { path = "../types"}
[dev-dependencies]

View File

@ -7,13 +7,43 @@
use std::fs::{self, DirEntry};
use std::io;
use std::path::PathBuf;
use std::path::{Path, PathBuf};
use anyhow::Result;
use thiserror::Error;
use pathmatcher::{DirectoryMatch, Matcher};
use types::path::ParseError;
use types::{RepoPath, RepoPathBuf};
#[derive(Error, Debug)]
pub enum WalkError {
#[error("invalid file name encoding '{0}'")]
FsUtf8Error(String),
#[error("IO error at '{0}': {1}")]
IOError(RepoPathBuf, #[source] io::Error),
#[error("path error at '{0}': {1}")]
RepoPathError(String, #[source] ParseError),
}
impl WalkError {
pub fn filename(&self) -> String {
match self {
WalkError::FsUtf8Error(path) => path.to_string(),
WalkError::IOError(path, _) => path.to_string(),
WalkError::RepoPathError(path, _) => path.to_string(),
}
}
pub fn message(&self) -> String {
match self {
WalkError::FsUtf8Error(_) => "invalid file name encoding".to_string(),
WalkError::IOError(_, error) => error.to_string(),
WalkError::RepoPathError(_, error) => error.to_string(),
}
}
}
/// Walker traverses the working copy, starting at the root of the repo,
/// finding files matched by matcher
pub struct Walker<M> {
@ -40,11 +70,19 @@ where
}
}
fn match_entry(&mut self, next_dir: &RepoPathBuf, entry: io::Result<DirEntry>) -> Result<()> {
let entry = entry?;
fn match_entry(&mut self, next_dir: &RepoPathBuf, entry: DirEntry) -> Result<()> {
// It'd be nice to move all this conversion noise to a function, but having it here saves
// us from allocating filename repeatedly.
let filename = entry.file_name();
let filename = RepoPath::from_str(filename.to_str().unwrap())?;
let filetype = entry.file_type()?;
let filename = filename.to_str().ok_or(WalkError::FsUtf8Error(
filename.to_string_lossy().into_owned(),
))?;
let filename = RepoPath::from_str(filename)
.map_err(|e| WalkError::RepoPathError(filename.to_owned(), e))?;
let filetype = entry
.file_type()
.map_err(|e| WalkError::IOError(filename.to_owned(), e))?;
let mut candidate_path = next_dir.clone();
candidate_path.push(filename);
if filetype.is_file() || filetype.is_symlink() {
@ -67,10 +105,17 @@ where
/// Lazy traversal to find matching files
fn walk(&mut self) -> Result<()> {
while self.file_matches.is_empty() && !self.dir_matches.is_empty() {
let mut next_dir = self.dir_matches.pop().unwrap();
for entry in fs::read_dir(self.root.join(next_dir.as_str()))? {
if let Err(e) = self.match_entry(&mut next_dir, entry) {
self.file_matches.push(Err(e));
let next_dir = self.dir_matches.pop().unwrap();
let abs_next_dir = self.root.join(next_dir.as_str());
// Don't process the directory if it contains a .hg directory, unless it's the root.
if next_dir.is_empty() || !Path::exists(&abs_next_dir.join(".hg")) {
for entry in fs::read_dir(abs_next_dir)
.map_err(|e| WalkError::IOError(next_dir.clone(), e))?
{
let entry = entry.map_err(|e| WalkError::IOError(next_dir.clone(), e))?;
if let Err(e) = self.match_entry(&next_dir, entry) {
self.file_matches.push(Err(e));
}
}
}
}