From f618d8a7964d6d0631239f12717f70e67c7d7def Mon Sep 17 00:00:00 2001 From: Joshua Clayton Date: Thu, 26 May 2016 15:25:43 -0400 Subject: [PATCH] Use .gitignore to determine files for fingerprinting a project Why? ==== Because a .gitignore file captures a fair number of project-specific directories and files to ignore, we can use this list to reduce the number of files to look at when determining a fingerprint for a project. Because the fingerprint should be based on files we care about changing, the project-specific .gitignore is a great place to start. This drastically reduces fingerprint timing - for larger projects, or projects with a massive number of files (e.g. anything doing anything significant with NPM and a front-end framework), this will help make caching usable. For normal projects, this cuts fingerprint calculation to 10%-20% of what it was previously. Closes #38 --- src/Unused/Cache/DirectoryFingerprint.hs | 11 ++++- src/Unused/Cache/FindArgsFromIgnoredPaths.hs | 47 +++++++++++++++++++ src/Unused/Util.hs | 10 ++++ .../Cache/FindArgsFromIgnoredPathsSpec.hs | 27 +++++++++++ unused.cabal | 1 + 5 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 src/Unused/Cache/FindArgsFromIgnoredPaths.hs create mode 100644 test/Unused/Cache/FindArgsFromIgnoredPathsSpec.hs diff --git a/src/Unused/Cache/DirectoryFingerprint.hs b/src/Unused/Cache/DirectoryFingerprint.hs index 067860e..62aa6ba 100644 --- a/src/Unused/Cache/DirectoryFingerprint.hs +++ b/src/Unused/Cache/DirectoryFingerprint.hs @@ -3,6 +3,9 @@ module Unused.Cache.DirectoryFingerprint ) where import System.Process +import Data.Maybe (fromMaybe) +import Unused.Cache.FindArgsFromIgnoredPaths +import Unused.Util (readIfFileExists) sha :: IO String sha = @@ -13,10 +16,16 @@ sha = head' _ = "" fileList :: IO String -fileList = readProcess "find" [".", "-type", "f", "-not", "-path", "*/tmp/unused/*", "-exec", "md5", "{}", "+"] "" +fileList = do + filterNamePathArgs <- findArgs <$> ignoredPaths + let args = [".", "-type", "f", "-not", "-path", "*/.git/*"] ++ filterNamePathArgs ++ ["-exec", "md5", "{}", "+"] + readProcess "find" args "" sortInput :: String -> IO String sortInput = readProcess "sort" ["-k", "2"] md5Result :: String -> IO String md5Result = readProcess "md5" [] + +ignoredPaths :: IO [String] +ignoredPaths = fromMaybe [] <$> (fmap lines <$> readIfFileExists ".gitignore") diff --git a/src/Unused/Cache/FindArgsFromIgnoredPaths.hs b/src/Unused/Cache/FindArgsFromIgnoredPaths.hs new file mode 100644 index 0000000..a842212 --- /dev/null +++ b/src/Unused/Cache/FindArgsFromIgnoredPaths.hs @@ -0,0 +1,47 @@ +module Unused.Cache.FindArgsFromIgnoredPaths + ( findArgs + ) where + +import Data.Char (isAlphaNum) +import Data.List (isSuffixOf) +import System.FilePath + +findArgs :: [String] -> [String] +findArgs = concatMap ignoreToFindArgs . validIgnoreOptions + +wildcardPrefix :: String -> String +wildcardPrefix a@('*':'/':_) = a +wildcardPrefix ('*':s) = "*/" ++ s +wildcardPrefix ('/':s) = "*/" ++ s +wildcardPrefix a = "*/" ++ a + +toExclusions :: String -> [String] +toExclusions s = + case (isWildcardFilename s, isMissingFilename s) of + (True, _) -> ["-not", "-path", s] + (_, True) -> ["-not", "-path", wildcardSuffix s] + (_, False) -> ["-not", "-name", s, "-not", "-path", wildcardSuffix s] + +ignoreToFindArgs :: String -> [String] +ignoreToFindArgs = toExclusions . wildcardPrefix + +wildcardSuffix :: String -> String +wildcardSuffix s + | isWildcardFilename s = s + | "/" `isSuffixOf` s = s ++ "*" + | otherwise = s ++ "/*" + +isWildcardFilename :: String -> Bool +isWildcardFilename = elem '*' . takeFileName + +isMissingFilename :: String -> Bool +isMissingFilename s = takeFileName s == "" + +validIgnoreOptions :: [String] -> [String] +validIgnoreOptions = + filter isPath + where + isPath "" = False + isPath ('/':_) = True + isPath ('.':_) = True + isPath s = isAlphaNum $ head s diff --git a/src/Unused/Util.hs b/src/Unused/Util.hs index ac1894c..efb48ff 100644 --- a/src/Unused/Util.hs +++ b/src/Unused/Util.hs @@ -1,8 +1,10 @@ module Unused.Util ( groupBy , stringToInt + , readIfFileExists ) where +import System.Directory (doesFileExist) import Control.Arrow ((&&&)) import qualified Data.List as L import Data.Function @@ -19,3 +21,11 @@ stringToInt xs | otherwise = Nothing where loop = foldl (\acc x -> acc * 10 + digitToInt x) + +readIfFileExists :: String -> IO (Maybe String) +readIfFileExists path = do + exists <- doesFileExist path + + if exists + then Just <$> readFile path + else return Nothing diff --git a/test/Unused/Cache/FindArgsFromIgnoredPathsSpec.hs b/test/Unused/Cache/FindArgsFromIgnoredPathsSpec.hs new file mode 100644 index 0000000..079cbe4 --- /dev/null +++ b/test/Unused/Cache/FindArgsFromIgnoredPathsSpec.hs @@ -0,0 +1,27 @@ +module Unused.Cache.FindArgsFromIgnoredPathsSpec + ( main + , spec + ) where + +import Test.Hspec +import Unused.Cache.FindArgsFromIgnoredPaths + +main :: IO () +main = hspec spec + +spec :: Spec +spec = parallel $ + describe "findArgs" $ do + it "converts paths" $ + findArgs ["a/*", "/b/*", "c/"] `shouldBe` [ "-not", "-path", "*/a/*" + , "-not", "-path", "*/b/*" + , "-not", "-path", "*/c/*"] + + it "converts wildcards" $ + findArgs ["a/*.csv", "/b/*.csv"] `shouldBe` [ "-not", "-path", "*/a/*.csv" + , "-not", "-path", "*/b/*.csv"] + + it "filenames and paths at the same time" $ + findArgs ["/.foreman", ".bundle/"] `shouldBe` [ "-not", "-name", "*/.foreman" + , "-not", "-path", "*/.foreman/*" + , "-not", "-path", "*/.bundle/*"] diff --git a/unused.cabal b/unused.cabal index 5c389f6..38e4f20 100644 --- a/unused.cabal +++ b/unused.cabal @@ -33,6 +33,7 @@ library , Unused.LikelihoodCalculator , Unused.Cache , Unused.Cache.DirectoryFingerprint + , Unused.Cache.FindArgsFromIgnoredPaths , Unused.TagsSource , Unused.CLI , Unused.CLI.Search