pathmatcher: add Matcher trait

Summary:
"matcher.py" is used in several places where the the file tree is traversed.
We need to use implement the matcher functionality in the Rust manifest
implementation. We define a common type to be used in our internal code.

In our current state of interfacing a lot with Python, fast paths of matching
full trees is useful so we have 3 states for matching a directory:
 * everything in the directory subtree should be returned
 * nothing in the directory subtree should be returned
 * another state where there is no fast path and the directory should be
 traversed recusively; this state is always valid to return and doe not
 impact correctness

The interface for the Matcher is defined in relationship to RepoPath. We store
paths internally in the same binary format no matter the operating system path
representation. Using std::path would incur a translation cost.

Reviewed By: quark-zju

Differential Revision: D16352528

fbshipit-source-id: 61b259f4347cfaf6f74ee36fa5955e45e4beb739
This commit is contained in:
Stefan Filip 2019-07-22 12:56:51 -07:00 committed by Facebook Github Bot
parent cb57f5d7ac
commit 75fd92d1a2
4 changed files with 118 additions and 5 deletions

View File

@ -7,6 +7,7 @@ edition = "2018"
bitflags = "1.0"
globset = "0.4.2"
ignore = "0.4"
types = { path = "../types" }
[dev-dependencies]
tempfile = "3.0.7"

View File

@ -3,14 +3,18 @@
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
use std::cell::RefCell;
use std::collections::HashMap;
use std::path::{Component, Path, PathBuf};
use ignore::{
self,
gitignore::{self, Glob},
Match,
};
use std::cell::RefCell;
use std::collections::HashMap;
use std::path::{Component, Path, PathBuf};
use types::RepoPath;
use crate::{DirectoryMatch, Matcher};
/// Lazy `.gitignore` matcher that loads `.gitignore` files on demand.
pub struct GitignoreMatcher {
@ -114,13 +118,14 @@ impl GitignoreMatcher {
}
/// Check .gitignore for the relative path.
fn match_path(
fn match_path<P: AsRef<Path>>(
&self,
path: &Path,
path: P,
is_dir: bool,
root: &GitignoreMatcher,
explain: &mut Option<&mut Explain>,
) -> MatchResult {
let path = path.as_ref();
// Everything is ignored regardless if this directory is ignored.
if self.ignored {
if let Some(ref mut explain) = explain {
@ -293,6 +298,20 @@ impl Explain {
}
}
impl Matcher for GitignoreMatcher {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
match self.match_path(path.as_str(), true, self, &mut None) {
MatchResult::Ignored => DirectoryMatch::Everything,
MatchResult::Whitelisted => DirectoryMatch::Nothing,
MatchResult::Unspecified => DirectoryMatch::ShouldTraverse,
}
}
fn matches_file(&self, path: &RepoPath) -> bool {
self.match_relative(path.as_str(), false)
}
}
#[cfg(test)]
mod tests {
use super::*;

View File

@ -7,6 +7,81 @@ mod gitignore_matcher;
mod tree_matcher;
mod utils;
use std::ops::Deref;
use types::RepoPath;
/// Limits the set of files to be operated on.
pub trait Matcher {
/// This method is intended for tree traversals of the file system.
/// It allows for fast paths where whole subtrees are skipped.
/// It should be noted that the DirectoryMatch::ShouldTraverse return value is always correct.
/// Other values enable fast code paths only (performance).
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch;
/// Returns true when the file path should be kept in the file set and returns false when
/// it has to be removed.
fn matches_file(&self, path: &RepoPath) -> bool;
}
/// Allows for fast code paths when dealing with patterns selecting directories.
/// `Everything` means that all the files in the subtree of the given directory need to be part
/// of the returned file set.
/// `Nothing` means that no files in the subtree of the given directory will be part of the
/// returned file set. Recursive traversal can be stopped at this point.
/// `ShouldTraverse` is a value that is always valid. It does not provide additional information.
/// Subtrees should be traversed and the matches should continue to be asked.
#[derive(Debug, Clone, Copy, Eq, Hash, PartialEq)]
pub enum DirectoryMatch {
Everything,
Nothing,
ShouldTraverse,
}
impl<T: Matcher + ?Sized, U: Deref<Target = T>> Matcher for U {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
T::matches_directory(self, path)
}
fn matches_file(&self, path: &RepoPath) -> bool {
T::matches_file(self, path)
}
}
pub struct AlwaysMatcher {}
impl AlwaysMatcher {
pub fn new() -> Self {
AlwaysMatcher {}
}
}
impl Matcher for AlwaysMatcher {
fn matches_directory(&self, _path: &RepoPath) -> DirectoryMatch {
DirectoryMatch::Everything
}
fn matches_file(&self, _path: &RepoPath) -> bool {
true
}
}
pub struct NeverMatcher {}
impl NeverMatcher {
pub fn new() -> Self {
NeverMatcher {}
}
}
impl Matcher for NeverMatcher {
fn matches_directory(&self, _path: &RepoPath) -> DirectoryMatch {
DirectoryMatch::Nothing
}
fn matches_file(&self, _path: &RepoPath) -> bool {
false
}
}
pub use gitignore_matcher::GitignoreMatcher;
pub use tree_matcher::TreeMatcher;
pub use utils::expand_curly_brackets;

View File

@ -11,6 +11,10 @@ use bitflags::bitflags;
use globset::{Glob, GlobBuilder, GlobSet, GlobSetBuilder};
use std::path::Path;
use types::RepoPath;
use crate::{DirectoryMatch, Matcher};
bitflags! {
struct RuleFlags: u8 {
// A negative rule.
@ -217,6 +221,20 @@ impl TreeMatcher {
}
}
impl Matcher for TreeMatcher {
fn matches_directory(&self, path: &RepoPath) -> DirectoryMatch {
match self.match_recursive(path.as_str()) {
Some(true) => DirectoryMatch::Everything,
Some(false) => DirectoryMatch::Nothing,
None => DirectoryMatch::ShouldTraverse,
}
}
fn matches_file(&self, path: &RepoPath) -> bool {
self.matches(path.as_str())
}
}
fn build_glob(pat: &str) -> Option<Glob> {
GlobBuilder::new(pat)
.literal_separator(true) // `*` or `?` should not match `/`