Simplify parsing and caching of results

Why?
====

Parsec is overkill when all that's really needed is splitting on
semicolons and converting a string to a non-negative Int.

One side-effect of this is to convert the caching mechanism from flat
text to CSV, with cassava handling (de-)serialization.

Additional
==========

Introduce ReaderT to calculate sha once per cache interaction

Previously, we were calculating the fingerprint (SHA) for match results
potentially twice, once when reading from the cache, and a second time
if no cache was found. This introduces a ReaderT to manage cache
interaction with a single fingerprint calculation.

This also abstracts what's being cached to only care about the fact that
the data can be converted to/from csv.
This commit is contained in:
Joshua Clayton 2016-05-23 17:21:08 -04:00
parent f618d8a796
commit 0d2470815d
15 changed files with 118 additions and 167 deletions

View File

@ -3,12 +3,13 @@ module Main where
import Options.Applicative import Options.Applicative
import System.IO (hSetBuffering, BufferMode(NoBuffering), stdout) import System.IO (hSetBuffering, BufferMode(NoBuffering), stdout)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import Unused.Parser (parseLines) import Unused.Parser (parseResults)
import Unused.Types (TermMatchSet, RemovalLikelihood(..)) import Unused.Types (TermMatchSet, RemovalLikelihood(..))
import Unused.TermSearch (SearchResults(..), fromResults)
import Unused.ResultsClassifier import Unused.ResultsClassifier
import Unused.ResponseFilter (withOneOccurrence, withLikelihoods, ignoringPaths) import Unused.ResponseFilter (withOneOccurrence, withLikelihoods, ignoringPaths)
import Unused.Grouping (CurrentGrouping(..), groupedResponses) import Unused.Grouping (CurrentGrouping(..), groupedResponses)
import Unused.CLI (SearchRunner(..), withoutCursor, renderHeader, executeSearch, printParseError, printMissingTagsFileError, printSearchResults, resetScreen, withInterruptHandler) import Unused.CLI (SearchRunner(..), withoutCursor, renderHeader, executeSearch, printMissingTagsFileError, printSearchResults, resetScreen, withInterruptHandler)
import Unused.Cache import Unused.Cache
import Unused.TagsSource import Unused.TagsSource
@ -47,13 +48,13 @@ run options = withoutCursor $ do
languageConfig <- loadLanguageConfig languageConfig <- loadLanguageConfig
results <- withCache options $ unlines <$> executeSearch (oSearchRunner options) terms results <- withCache options $ executeSearch (oSearchRunner options) terms
let response = parseLines languageConfig results let response = parseResults languageConfig results
resetScreen resetScreen
either printParseError (printResults options) response printResults options response
return () return ()
@ -67,8 +68,8 @@ calculateTagInput :: Options -> IO (Either TagSearchOutcome [String])
calculateTagInput Options{ oFromStdIn = True } = loadTagsFromPipe calculateTagInput Options{ oFromStdIn = True } = loadTagsFromPipe
calculateTagInput Options{ oFromStdIn = False } = loadTagsFromFile calculateTagInput Options{ oFromStdIn = False } = loadTagsFromFile
withCache :: Options -> IO String -> IO String withCache :: Options -> IO SearchResults -> IO SearchResults
withCache Options{ oWithCache = True } = cached withCache Options{ oWithCache = True } = fmap SearchResults . cached "term-matches" . fmap fromResults
withCache Options{ oWithCache = False } = id withCache Options{ oWithCache = False } = id
withInfo :: Parser a -> String -> String -> String -> ParserInfo a withInfo :: Parser a -> String -> String -> String -> ParserInfo a

View File

@ -3,7 +3,6 @@ module Unused.CLI
) where ) where
import Unused.CLI.Search as X import Unused.CLI.Search as X
import Unused.CLI.SearchError as X
import Unused.CLI.MissingTagsFileError as X import Unused.CLI.MissingTagsFileError as X
import Unused.CLI.SearchResult as X import Unused.CLI.SearchResult as X
import Unused.CLI.Util as X import Unused.CLI.Util as X

View File

@ -4,7 +4,7 @@ module Unused.CLI.Search
, executeSearch , executeSearch
) where ) where
import Unused.TermSearch (search) import Unused.TermSearch (SearchResults, search)
import Unused.CLI.Util import Unused.CLI.Util
import Unused.CLI.ProgressIndicator import Unused.CLI.ProgressIndicator
@ -15,7 +15,7 @@ renderHeader terms = do
resetScreen resetScreen
printAnalysisHeader terms printAnalysisHeader terms
executeSearch :: SearchRunner -> [String] -> IO [String] executeSearch :: SearchRunner -> [String] -> IO SearchResults
executeSearch runner terms = do executeSearch runner terms = do
renderHeader terms renderHeader terms
runSearch runner terms <* resetScreen runSearch runner terms <* resetScreen
@ -33,6 +33,6 @@ printAnalysisHeader terms = do
setSGR [Reset] setSGR [Reset]
putStr " terms" putStr " terms"
runSearch :: SearchRunner -> [String] -> IO [String] runSearch :: SearchRunner -> [String] -> IO SearchResults
runSearch SearchWithProgress = progressWithIndicator search createProgressBar runSearch SearchWithProgress = progressWithIndicator search createProgressBar
runSearch SearchWithoutProgress = progressWithIndicator search createSpinner runSearch SearchWithoutProgress = progressWithIndicator search createSpinner

View File

@ -1,24 +0,0 @@
module Unused.CLI.SearchError
( printParseError
) where
import Unused.Parser (ParseError)
import Unused.CLI.Util
printParseError :: ParseError -> IO ()
printParseError e = do
setSGR [SetColor Background Vivid Red]
setSGR [SetColor Foreground Vivid White]
setSGR [SetConsoleIntensity BoldIntensity]
putStrLn "\nThere was a problem parsing the data:\n"
setSGR [Reset]
setSGR [SetColor Foreground Vivid Red]
setSGR [SetConsoleIntensity BoldIntensity]
print e
putStr "\n"
setSGR [Reset]

View File

@ -2,33 +2,45 @@ module Unused.Cache
( cached ( cached
) where ) where
import Control.Monad.Trans.Reader
import System.Directory import System.Directory
import Data.Csv (FromRecord, ToRecord, HasHeader(..), encode, decode)
import Data.Vector (toList)
import qualified Data.ByteString.Lazy as BS
import Unused.Cache.DirectoryFingerprint (sha) import Unused.Cache.DirectoryFingerprint (sha)
cached :: IO String -> IO String cached :: (FromRecord a, ToRecord a) => String -> IO [a] -> IO [a]
cached f = maybe (writeCache =<< f) return =<< readCache cached context f =
runReaderT fromCache =<< cacheFileName context
where
fromCache = maybe (writeCache =<< liftReaderT f) return =<< readCache
writeCache :: String -> IO String writeCache :: ToRecord a => [a] -> ReaderT String IO [a]
writeCache contents = do writeCache contents = do
createDirectoryIfMissing True cacheDirectory liftReaderT $ createDirectoryIfMissing True cacheDirectory
fileName <- cacheFileName fileName <- ask
writeFile fileName contents liftReaderT $ BS.writeFile fileName $ encode contents
return contents return contents
readCache :: IO (Maybe String) readCache :: FromRecord a => ReaderT String IO (Maybe [a])
readCache = do readCache = do
putStrLn "\n\nCalculating cache fingerprint... " fileName <- ask
fileName <- cacheFileName exists <- liftReaderT $ doesFileExist fileName
exists <- doesFileExist fileName
if exists if exists
then Just <$> readFile fileName then fmap processCsv (decode NoHeader <$> liftReaderT (BS.readFile fileName))
else return Nothing else return Nothing
where
processCsv = either (const Nothing) (Just . toList)
cacheFileName :: IO String cacheFileName :: String -> IO String
cacheFileName = do cacheFileName context = do
putStrLn "\n\nCalculating cache fingerprint... "
currentSha <- sha currentSha <- sha
return $ cacheDirectory ++ "/" ++ currentSha ++ ".cache" return $ cacheDirectory ++ "/" ++ context ++ "-" ++ currentSha ++ ".csv"
cacheDirectory :: String cacheDirectory :: String
cacheDirectory = "tmp/unused" cacheDirectory = "tmp/unused"
liftReaderT :: m a -> ReaderT r m a
liftReaderT m = ReaderT $ const m

View File

@ -1,19 +1,14 @@
module Unused.Parser module Unused.Parser
( parseLines ( parseResults
, ParseError
) where ) where
import Data.Bifunctor (second) import Data.Bifunctor (second)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Unused.Util (groupBy) import Unused.Util (groupBy)
import Unused.Types (ParseResponse, TermMatchSet, TermMatch, resultsFromMatches, tmTerm) import Unused.TermSearch (SearchResults, fromResults)
import Unused.Types (TermMatchSet, resultsFromMatches, tmTerm)
import Unused.LikelihoodCalculator import Unused.LikelihoodCalculator
import Unused.Parser.Internal
parseLines :: [LanguageConfiguration] -> String -> ParseResponse parseResults :: [LanguageConfiguration] -> SearchResults -> TermMatchSet
parseLines lcs = parseResults lcs =
fmap (matchesToMatchSet lcs) . parse parseTermMatches "matches" Map.fromList . map (second $ calculateLikelihood lcs . resultsFromMatches) . groupBy tmTerm . fromResults
matchesToMatchSet :: [LanguageConfiguration] -> [TermMatch] -> TermMatchSet
matchesToMatchSet lcs =
Map.fromList . map (second $ calculateLikelihood lcs . resultsFromMatches) . groupBy tmTerm

View File

@ -1,47 +0,0 @@
module Unused.Parser.Internal
( parseTermMatches
, parseTermMatch
, parse
, ParseError
) where
import Control.Monad (void)
import Data.Maybe (fromMaybe)
import Text.Parsec
import Text.Parsec.String (Parser)
import Unused.Types (TermMatch(..))
import Unused.Util (stringToInt)
parseTermMatches :: Parser [TermMatch]
parseTermMatches = many1 parseTermMatch <* eof
parseTermMatch :: Parser TermMatch
parseTermMatch = do
colonSep
path <- pathParser
colonSep
occurrences <- occurrenceParser
colonSep
term <- termParser
void eol
return $ TermMatch term path occurrences
where
colonSep = void $ try $ char ':'
termParser :: Parser String
termParser = many1 (noneOf "\n")
pathParser :: Parser String
pathParser = many1 (noneOf ":")
occurrenceParser :: Parser Int
occurrenceParser =
fromMaybe 0 . stringToInt <$> many1 digit
eol :: Parser String
eol = try (string "\n\r")
<|> try (string "\r\n")
<|> string "\n"
<|> string "\r"
<?> "end of line"

View File

@ -1,22 +1,18 @@
module Unused.TermSearch module Unused.TermSearch
( search ( SearchResults(..)
, fromResults
, search
) where ) where
import System.Process import System.Process
import Unused.TermSearch.Internal (commandLineOptions) import Data.Maybe (mapMaybe)
import Unused.TermSearch.Types
import Unused.TermSearch.Internal
search :: String -> IO [String] search :: String -> IO SearchResults
search t = do search t = do
results <- ag t results <- lines <$> ag t
return $ linesMap suffixTerm results return $ SearchResults $ mapMaybe (parseSearchResult t) results
where
suffixTerm = (++ (":" ++ t))
linesMap :: (String -> String) -> String -> [String]
linesMap f =
filter empty . map f . lines
where
empty = not . null
ag :: String -> IO String ag :: String -> IO String
ag t = do ag t = do

View File

@ -1,14 +1,31 @@
{-# LANGUAGE OverloadedStrings #-}
module Unused.TermSearch.Internal module Unused.TermSearch.Internal
( commandLineOptions ( commandLineOptions
, parseSearchResult
) where ) where
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Unused.Types (TermMatch(..))
import Unused.Regex import Unused.Regex
import Unused.Util (stringToInt)
commandLineOptions :: String -> [String] commandLineOptions :: String -> [String]
commandLineOptions t = commandLineOptions t =
case regexSafeTerm t of case regexSafeTerm t of
True -> ["(\\W|^)" ++ t ++ "(\\W|$)", ".", "-c", "--ackmate"] True -> ["(\\W|^)" ++ t ++ "(\\W|$)", "."] ++ baseFlags
False -> [t, ".", "-c", "-Q", "--ackmate"] False -> [t, ".", "-Q"] ++ baseFlags
where
baseFlags = ["-c", "--ackmate", "--ignore-dir", "tmp/unused"]
parseSearchResult :: String -> String -> Maybe TermMatch
parseSearchResult term s =
toTermMatch $ map T.unpack $ T.splitOn ":" $ T.pack s
where
toTermMatch [_, path, count] = Just $ TermMatch term path (countInt count)
toTermMatch _ = Nothing
countInt = fromMaybe 0 . stringToInt
regexSafeTerm :: String -> Bool regexSafeTerm :: String -> Bool
regexSafeTerm = regexSafeTerm =

View File

@ -0,0 +1,13 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Unused.TermSearch.Types
( SearchResults(..)
, fromResults
) where
import Unused.Types (TermMatch)
newtype SearchResults = SearchResults [TermMatch] deriving (Monoid)
fromResults :: SearchResults -> [TermMatch]
fromResults (SearchResults a) = a

View File

@ -1,8 +1,9 @@
{-# LANGUAGE DeriveGeneric #-}
module Unused.Types module Unused.Types
( TermMatch(..) ( TermMatch(..)
, TermResults(..) , TermResults(..)
, TermMatchSet , TermMatchSet
, ParseResponse
, RemovalLikelihood(..) , RemovalLikelihood(..)
, Removal(..) , Removal(..)
, Occurrences(..) , Occurrences(..)
@ -12,15 +13,19 @@ module Unused.Types
, appOccurrenceCount , appOccurrenceCount
) where ) where
import Text.Parsec (ParseError)
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
import Data.Csv
import GHC.Generics
import Unused.Regex import Unused.Regex
data TermMatch = TermMatch data TermMatch = TermMatch
{ tmTerm :: String { tmTerm :: String
, tmPath :: String , tmPath :: String
, tmOccurrences :: Int , tmOccurrences :: Int
} deriving (Eq, Show) } deriving (Eq, Show, Generic)
instance FromRecord TermMatch
instance ToRecord TermMatch
data Occurrences = Occurrences data Occurrences = Occurrences
{ oFiles :: Int { oFiles :: Int
@ -45,8 +50,6 @@ data RemovalLikelihood = High | Medium | Low | Unknown | NotCalculated deriving
type TermMatchSet = Map.Map String TermResults type TermMatchSet = Map.Map String TermResults
type ParseResponse = Either ParseError TermMatchSet
totalFileCount :: TermResults -> Int totalFileCount :: TermResults -> Int
totalFileCount = oFiles . trTotalOccurrences totalFileCount = oFiles . trTotalOccurrences

View File

@ -1,21 +0,0 @@
module Unused.Parser.InternalSpec where
import Test.Hspec
import Unused.Types
import Unused.Parser.Internal
main :: IO ()
main = hspec spec
spec :: Spec
spec = parallel $
describe "parseTermMatch" $ do
it "parses normal lines" $ do
let (Right result) = parse parseTermMatch "source" ":app/files/location:12:simple\n"
result `shouldBe` TermMatch "simple" "app/files/location" 12
it "parses weird lines" $ do
let (Right result) = parse parseTermMatch "source" ":app/files/location:12:Foo::Bar\n"
result `shouldBe` TermMatch "Foo::Bar" "app/files/location" 12

View File

@ -3,6 +3,7 @@ module Unused.ParserSpec where
import Test.Hspec import Test.Hspec
import Unused.Types import Unused.Types
import Unused.Parser import Unused.Parser
import Unused.TermSearch
import Unused.ResultsClassifier import Unused.ResultsClassifier
import qualified Data.Map.Strict as Map import qualified Data.Map.Strict as Map
@ -11,13 +12,8 @@ main = hspec spec
spec :: Spec spec :: Spec
spec = parallel $ spec = parallel $
describe "parseLines" $ do describe "parseResults" $ do
it "parses from the correct format" $ do it "parses from the correct format" $ do
let input = ":app/path/foo.rb:1:method_name\n\
\:app/path/other.rb:1:other\n\
\:app/path/other.rb:5:method_name\n\
\:spec/path/foo_spec.rb:10:method_name\n"
let r1Matches = [ TermMatch "method_name" "app/path/foo.rb" 1 let r1Matches = [ TermMatch "method_name" "app/path/foo.rb" 1
, TermMatch "method_name" "app/path/other.rb" 5 , TermMatch "method_name" "app/path/other.rb" 5
, TermMatch "method_name" "spec/path/foo_spec.rb" 10 , TermMatch "method_name" "spec/path/foo_spec.rb" 10
@ -28,12 +24,14 @@ spec = parallel $
let r2Results = TermResults "other" r2Matches (Occurrences 0 0) (Occurrences 1 1) (Occurrences 1 1) (Removal High "used once") let r2Results = TermResults "other" r2Matches (Occurrences 0 0) (Occurrences 1 1) (Occurrences 1 1) (Removal High "used once")
(Right config) <- loadConfig (Right config) <- loadConfig
let (Right result) = parseLines config input let searchResults = r1Matches ++ r2Matches
let result = parseResults config $ SearchResults searchResults
result `shouldBe` result `shouldBe`
Map.fromList [ ("method_name", r1Results), ("other", r2Results) ] Map.fromList [ ("method_name", r1Results), ("other", r2Results) ]
it "handles empty input" $ do it "handles empty input" $ do
(Right config) <- loadConfig (Right config) <- loadConfig
let (Left result) = parseLines config "" let result = parseResults config $ SearchResults []
show result `shouldContain` "unexpected end of input" result `shouldBe` Map.fromList []

View File

@ -4,19 +4,27 @@ module Unused.TermSearch.InternalSpec
) where ) where
import Test.Hspec import Test.Hspec
import Unused.Types
import Unused.TermSearch.Internal import Unused.TermSearch.Internal
main :: IO () main :: IO ()
main = hspec spec main = hspec spec
spec :: Spec spec :: Spec
spec = parallel $ spec = parallel $ do
describe "commandLineOptions" $ do describe "commandLineOptions" $ do
it "does not use regular expressions when the term contains non-word characters" $ do it "does not use regular expressions when the term contains non-word characters" $ do
commandLineOptions "can_do_things?" `shouldBe` ["can_do_things?", ".", "-c", "-Q", "--ackmate"] commandLineOptions "can_do_things?" `shouldBe` ["can_do_things?", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
commandLineOptions "no_way!" `shouldBe` ["no_way!", ".", "-c", "-Q", "--ackmate"] commandLineOptions "no_way!" `shouldBe` ["no_way!", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
commandLineOptions "[]=" `shouldBe` ["[]=", ".", "-c", "-Q", "--ackmate"] commandLineOptions "[]=" `shouldBe` ["[]=", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
commandLineOptions "window.globalOverride" `shouldBe` ["window.globalOverride", ".", "-c", "-Q", "--ackmate"] commandLineOptions "window.globalOverride" `shouldBe` ["window.globalOverride", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
it "uses regular expression match with surrounding non-word matches for accuracy" $ it "uses regular expression match with surrounding non-word matches for accuracy" $
commandLineOptions "awesome_method" `shouldBe` ["(\\W|^)awesome_method(\\W|$)", ".", "-c", "--ackmate"] commandLineOptions "awesome_method" `shouldBe` ["(\\W|^)awesome_method(\\W|$)", ".", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
describe "parseSearchResult" $ do
it "parses normal results from `ag` to a TermMatch" $
parseSearchResult "method_name" ":app/models/foo.rb:123" `shouldBe` (Just $ TermMatch "method_name" "app/models/foo.rb" 123)
it "returns Nothing when it cannot parse" $
parseSearchResult "method_name" "" `shouldBe` Nothing

View File

@ -17,9 +17,9 @@ data-files: data/config.yml
library library
hs-source-dirs: src hs-source-dirs: src
exposed-modules: Unused.TermSearch exposed-modules: Unused.TermSearch
, Unused.TermSearch.Types
, Unused.TermSearch.Internal , Unused.TermSearch.Internal
, Unused.Parser , Unused.Parser
, Unused.Parser.Internal
, Unused.Types , Unused.Types
, Unused.Util , Unused.Util
, Unused.Regex , Unused.Regex
@ -37,7 +37,6 @@ library
, Unused.TagsSource , Unused.TagsSource
, Unused.CLI , Unused.CLI
, Unused.CLI.Search , Unused.CLI.Search
, Unused.CLI.SearchError
, Unused.CLI.MissingTagsFileError , Unused.CLI.MissingTagsFileError
, Unused.CLI.SearchResult , Unused.CLI.SearchResult
, Unused.CLI.SearchResult.ColumnFormatter , Unused.CLI.SearchResult.ColumnFormatter
@ -48,7 +47,6 @@ library
other-modules: Paths_unused other-modules: Paths_unused
build-depends: base >= 4.7 && < 5 build-depends: base >= 4.7 && < 5
, process , process
, parsec
, containers , containers
, filepath , filepath
, directory , directory
@ -61,6 +59,10 @@ library
, bytestring , bytestring
, text , text
, unordered-containers , unordered-containers
, cassava
, vector
, mtl
, transformers
ghc-options: -Wall -Werror -O2 ghc-options: -Wall -Werror -O2
default-language: Haskell2010 default-language: Haskell2010
@ -82,7 +84,6 @@ test-suite unused-test
, hspec , hspec
, containers , containers
other-modules: Unused.ParserSpec other-modules: Unused.ParserSpec
, Unused.Parser.InternalSpec
, Unused.ResponseFilterSpec , Unused.ResponseFilterSpec
, Unused.TypesSpec , Unused.TypesSpec
, Unused.LikelihoodCalculatorSpec , Unused.LikelihoodCalculatorSpec