Use .gitignore to determine files for fingerprinting a project

Why?
====

Because a .gitignore file captures a fair number of project-specific
directories and files to ignore, we can use this list to reduce the
number of files to look at when determining a fingerprint for a project.

Because the fingerprint should be based on files we care about changing,
the project-specific .gitignore is a great place to start.

This drastically reduces fingerprint timing - for larger projects, or
projects with a massive number of files (e.g. anything doing anything
significant with NPM and a front-end framework), this will help make
caching usable. For normal projects, this cuts fingerprint
calculation to 10%-20% of what it was previously.

Closes #38
This commit is contained in:
Joshua Clayton 2016-05-26 15:25:43 -04:00
parent 279cdfa494
commit f618d8a796
5 changed files with 95 additions and 1 deletions

View File

@ -3,6 +3,9 @@ module Unused.Cache.DirectoryFingerprint
) where
import System.Process
import Data.Maybe (fromMaybe)
import Unused.Cache.FindArgsFromIgnoredPaths
import Unused.Util (readIfFileExists)
sha :: IO String
sha =
@ -13,10 +16,16 @@ sha =
head' _ = ""
fileList :: IO String
fileList = readProcess "find" [".", "-type", "f", "-not", "-path", "*/tmp/unused/*", "-exec", "md5", "{}", "+"] ""
fileList = do
filterNamePathArgs <- findArgs <$> ignoredPaths
let args = [".", "-type", "f", "-not", "-path", "*/.git/*"] ++ filterNamePathArgs ++ ["-exec", "md5", "{}", "+"]
readProcess "find" args ""
sortInput :: String -> IO String
sortInput = readProcess "sort" ["-k", "2"]
md5Result :: String -> IO String
md5Result = readProcess "md5" []
ignoredPaths :: IO [String]
ignoredPaths = fromMaybe [] <$> (fmap lines <$> readIfFileExists ".gitignore")

View File

@ -0,0 +1,47 @@
module Unused.Cache.FindArgsFromIgnoredPaths
( findArgs
) where
import Data.Char (isAlphaNum)
import Data.List (isSuffixOf)
import System.FilePath
findArgs :: [String] -> [String]
findArgs = concatMap ignoreToFindArgs . validIgnoreOptions
wildcardPrefix :: String -> String
wildcardPrefix a@('*':'/':_) = a
wildcardPrefix ('*':s) = "*/" ++ s
wildcardPrefix ('/':s) = "*/" ++ s
wildcardPrefix a = "*/" ++ a
toExclusions :: String -> [String]
toExclusions s =
case (isWildcardFilename s, isMissingFilename s) of
(True, _) -> ["-not", "-path", s]
(_, True) -> ["-not", "-path", wildcardSuffix s]
(_, False) -> ["-not", "-name", s, "-not", "-path", wildcardSuffix s]
ignoreToFindArgs :: String -> [String]
ignoreToFindArgs = toExclusions . wildcardPrefix
wildcardSuffix :: String -> String
wildcardSuffix s
| isWildcardFilename s = s
| "/" `isSuffixOf` s = s ++ "*"
| otherwise = s ++ "/*"
isWildcardFilename :: String -> Bool
isWildcardFilename = elem '*' . takeFileName
isMissingFilename :: String -> Bool
isMissingFilename s = takeFileName s == ""
validIgnoreOptions :: [String] -> [String]
validIgnoreOptions =
filter isPath
where
isPath "" = False
isPath ('/':_) = True
isPath ('.':_) = True
isPath s = isAlphaNum $ head s

View File

@ -1,8 +1,10 @@
module Unused.Util
( groupBy
, stringToInt
, readIfFileExists
) where
import System.Directory (doesFileExist)
import Control.Arrow ((&&&))
import qualified Data.List as L
import Data.Function
@ -19,3 +21,11 @@ stringToInt xs
| otherwise = Nothing
where
loop = foldl (\acc x -> acc * 10 + digitToInt x)
readIfFileExists :: String -> IO (Maybe String)
readIfFileExists path = do
exists <- doesFileExist path
if exists
then Just <$> readFile path
else return Nothing

View File

@ -0,0 +1,27 @@
module Unused.Cache.FindArgsFromIgnoredPathsSpec
( main
, spec
) where
import Test.Hspec
import Unused.Cache.FindArgsFromIgnoredPaths
main :: IO ()
main = hspec spec
spec :: Spec
spec = parallel $
describe "findArgs" $ do
it "converts paths" $
findArgs ["a/*", "/b/*", "c/"] `shouldBe` [ "-not", "-path", "*/a/*"
, "-not", "-path", "*/b/*"
, "-not", "-path", "*/c/*"]
it "converts wildcards" $
findArgs ["a/*.csv", "/b/*.csv"] `shouldBe` [ "-not", "-path", "*/a/*.csv"
, "-not", "-path", "*/b/*.csv"]
it "filenames and paths at the same time" $
findArgs ["/.foreman", ".bundle/"] `shouldBe` [ "-not", "-name", "*/.foreman"
, "-not", "-path", "*/.foreman/*"
, "-not", "-path", "*/.bundle/*"]

View File

@ -33,6 +33,7 @@ library
, Unused.LikelihoodCalculator
, Unused.Cache
, Unused.Cache.DirectoryFingerprint
, Unused.Cache.FindArgsFromIgnoredPaths
, Unused.TagsSource
, Unused.CLI
, Unused.CLI.Search