Simplify parsing and caching of results

Why?
====

Parsec is overkill when all that's really needed is splitting on
semicolons and converting a string to a non-negative Int.

One side-effect of this is to convert the caching mechanism from flat
text to CSV, with cassava handling (de-)serialization.

Additional
==========

Introduce ReaderT to calculate sha once per cache interaction

Previously, we were calculating the fingerprint (SHA) for match results
potentially twice, once when reading from the cache, and a second time
if no cache was found. This introduces a ReaderT to manage cache
interaction with a single fingerprint calculation.

This also abstracts what's being cached to only care about the fact that
the data can be converted to/from csv.
This commit is contained in:
Joshua Clayton 2016-05-23 17:21:08 -04:00
parent f618d8a796
commit 0d2470815d
15 changed files with 118 additions and 167 deletions

View File

@ -3,12 +3,13 @@ module Main where
import Options.Applicative
import System.IO (hSetBuffering, BufferMode(NoBuffering), stdout)
import Data.Maybe (fromMaybe)
import Unused.Parser (parseLines)
import Unused.Parser (parseResults)
import Unused.Types (TermMatchSet, RemovalLikelihood(..))
import Unused.TermSearch (SearchResults(..), fromResults)
import Unused.ResultsClassifier
import Unused.ResponseFilter (withOneOccurrence, withLikelihoods, ignoringPaths)
import Unused.Grouping (CurrentGrouping(..), groupedResponses)
import Unused.CLI (SearchRunner(..), withoutCursor, renderHeader, executeSearch, printParseError, printMissingTagsFileError, printSearchResults, resetScreen, withInterruptHandler)
import Unused.CLI (SearchRunner(..), withoutCursor, renderHeader, executeSearch, printMissingTagsFileError, printSearchResults, resetScreen, withInterruptHandler)
import Unused.Cache
import Unused.TagsSource
@ -47,13 +48,13 @@ run options = withoutCursor $ do
languageConfig <- loadLanguageConfig
results <- withCache options $ unlines <$> executeSearch (oSearchRunner options) terms
results <- withCache options $ executeSearch (oSearchRunner options) terms
let response = parseLines languageConfig results
let response = parseResults languageConfig results
resetScreen
either printParseError (printResults options) response
printResults options response
return ()
@ -67,8 +68,8 @@ calculateTagInput :: Options -> IO (Either TagSearchOutcome [String])
calculateTagInput Options{ oFromStdIn = True } = loadTagsFromPipe
calculateTagInput Options{ oFromStdIn = False } = loadTagsFromFile
withCache :: Options -> IO String -> IO String
withCache Options{ oWithCache = True } = cached
withCache :: Options -> IO SearchResults -> IO SearchResults
withCache Options{ oWithCache = True } = fmap SearchResults . cached "term-matches" . fmap fromResults
withCache Options{ oWithCache = False } = id
withInfo :: Parser a -> String -> String -> String -> ParserInfo a

View File

@ -3,7 +3,6 @@ module Unused.CLI
) where
import Unused.CLI.Search as X
import Unused.CLI.SearchError as X
import Unused.CLI.MissingTagsFileError as X
import Unused.CLI.SearchResult as X
import Unused.CLI.Util as X

View File

@ -4,7 +4,7 @@ module Unused.CLI.Search
, executeSearch
) where
import Unused.TermSearch (search)
import Unused.TermSearch (SearchResults, search)
import Unused.CLI.Util
import Unused.CLI.ProgressIndicator
@ -15,7 +15,7 @@ renderHeader terms = do
resetScreen
printAnalysisHeader terms
executeSearch :: SearchRunner -> [String] -> IO [String]
executeSearch :: SearchRunner -> [String] -> IO SearchResults
executeSearch runner terms = do
renderHeader terms
runSearch runner terms <* resetScreen
@ -33,6 +33,6 @@ printAnalysisHeader terms = do
setSGR [Reset]
putStr " terms"
runSearch :: SearchRunner -> [String] -> IO [String]
runSearch :: SearchRunner -> [String] -> IO SearchResults
runSearch SearchWithProgress = progressWithIndicator search createProgressBar
runSearch SearchWithoutProgress = progressWithIndicator search createSpinner

View File

@ -1,24 +0,0 @@
module Unused.CLI.SearchError
( printParseError
) where
import Unused.Parser (ParseError)
import Unused.CLI.Util
printParseError :: ParseError -> IO ()
printParseError e = do
setSGR [SetColor Background Vivid Red]
setSGR [SetColor Foreground Vivid White]
setSGR [SetConsoleIntensity BoldIntensity]
putStrLn "\nThere was a problem parsing the data:\n"
setSGR [Reset]
setSGR [SetColor Foreground Vivid Red]
setSGR [SetConsoleIntensity BoldIntensity]
print e
putStr "\n"
setSGR [Reset]

View File

@ -2,33 +2,45 @@ module Unused.Cache
( cached
) where
import Control.Monad.Trans.Reader
import System.Directory
import Data.Csv (FromRecord, ToRecord, HasHeader(..), encode, decode)
import Data.Vector (toList)
import qualified Data.ByteString.Lazy as BS
import Unused.Cache.DirectoryFingerprint (sha)
cached :: IO String -> IO String
cached f = maybe (writeCache =<< f) return =<< readCache
cached :: (FromRecord a, ToRecord a) => String -> IO [a] -> IO [a]
cached context f =
runReaderT fromCache =<< cacheFileName context
where
fromCache = maybe (writeCache =<< liftReaderT f) return =<< readCache
writeCache :: String -> IO String
writeCache :: ToRecord a => [a] -> ReaderT String IO [a]
writeCache contents = do
createDirectoryIfMissing True cacheDirectory
fileName <- cacheFileName
writeFile fileName contents
liftReaderT $ createDirectoryIfMissing True cacheDirectory
fileName <- ask
liftReaderT $ BS.writeFile fileName $ encode contents
return contents
readCache :: IO (Maybe String)
readCache :: FromRecord a => ReaderT String IO (Maybe [a])
readCache = do
putStrLn "\n\nCalculating cache fingerprint... "
fileName <- cacheFileName
exists <- doesFileExist fileName
fileName <- ask
exists <- liftReaderT $ doesFileExist fileName
if exists
then Just <$> readFile fileName
then fmap processCsv (decode NoHeader <$> liftReaderT (BS.readFile fileName))
else return Nothing
where
processCsv = either (const Nothing) (Just . toList)
cacheFileName :: IO String
cacheFileName = do
cacheFileName :: String -> IO String
cacheFileName context = do
putStrLn "\n\nCalculating cache fingerprint... "
currentSha <- sha
return $ cacheDirectory ++ "/" ++ currentSha ++ ".cache"
return $ cacheDirectory ++ "/" ++ context ++ "-" ++ currentSha ++ ".csv"
cacheDirectory :: String
cacheDirectory = "tmp/unused"
liftReaderT :: m a -> ReaderT r m a
liftReaderT m = ReaderT $ const m

View File

@ -1,19 +1,14 @@
module Unused.Parser
( parseLines
, ParseError
( parseResults
) where
import Data.Bifunctor (second)
import qualified Data.Map.Strict as Map
import Unused.Util (groupBy)
import Unused.Types (ParseResponse, TermMatchSet, TermMatch, resultsFromMatches, tmTerm)
import Unused.TermSearch (SearchResults, fromResults)
import Unused.Types (TermMatchSet, resultsFromMatches, tmTerm)
import Unused.LikelihoodCalculator
import Unused.Parser.Internal
parseLines :: [LanguageConfiguration] -> String -> ParseResponse
parseLines lcs =
fmap (matchesToMatchSet lcs) . parse parseTermMatches "matches"
matchesToMatchSet :: [LanguageConfiguration] -> [TermMatch] -> TermMatchSet
matchesToMatchSet lcs =
Map.fromList . map (second $ calculateLikelihood lcs . resultsFromMatches) . groupBy tmTerm
parseResults :: [LanguageConfiguration] -> SearchResults -> TermMatchSet
parseResults lcs =
Map.fromList . map (second $ calculateLikelihood lcs . resultsFromMatches) . groupBy tmTerm . fromResults

View File

@ -1,47 +0,0 @@
module Unused.Parser.Internal
( parseTermMatches
, parseTermMatch
, parse
, ParseError
) where
import Control.Monad (void)
import Data.Maybe (fromMaybe)
import Text.Parsec
import Text.Parsec.String (Parser)
import Unused.Types (TermMatch(..))
import Unused.Util (stringToInt)
parseTermMatches :: Parser [TermMatch]
parseTermMatches = many1 parseTermMatch <* eof
parseTermMatch :: Parser TermMatch
parseTermMatch = do
colonSep
path <- pathParser
colonSep
occurrences <- occurrenceParser
colonSep
term <- termParser
void eol
return $ TermMatch term path occurrences
where
colonSep = void $ try $ char ':'
termParser :: Parser String
termParser = many1 (noneOf "\n")
pathParser :: Parser String
pathParser = many1 (noneOf ":")
occurrenceParser :: Parser Int
occurrenceParser =
fromMaybe 0 . stringToInt <$> many1 digit
eol :: Parser String
eol = try (string "\n\r")
<|> try (string "\r\n")
<|> string "\n"
<|> string "\r"
<?> "end of line"

View File

@ -1,22 +1,18 @@
module Unused.TermSearch
( search
( SearchResults(..)
, fromResults
, search
) where
import System.Process
import Unused.TermSearch.Internal (commandLineOptions)
import Data.Maybe (mapMaybe)
import Unused.TermSearch.Types
import Unused.TermSearch.Internal
search :: String -> IO [String]
search :: String -> IO SearchResults
search t = do
results <- ag t
return $ linesMap suffixTerm results
where
suffixTerm = (++ (":" ++ t))
linesMap :: (String -> String) -> String -> [String]
linesMap f =
filter empty . map f . lines
where
empty = not . null
results <- lines <$> ag t
return $ SearchResults $ mapMaybe (parseSearchResult t) results
ag :: String -> IO String
ag t = do

View File

@ -1,14 +1,31 @@
{-# LANGUAGE OverloadedStrings #-}
module Unused.TermSearch.Internal
( commandLineOptions
, parseSearchResult
) where
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
import Unused.Types (TermMatch(..))
import Unused.Regex
import Unused.Util (stringToInt)
commandLineOptions :: String -> [String]
commandLineOptions t =
case regexSafeTerm t of
True -> ["(\\W|^)" ++ t ++ "(\\W|$)", ".", "-c", "--ackmate"]
False -> [t, ".", "-c", "-Q", "--ackmate"]
True -> ["(\\W|^)" ++ t ++ "(\\W|$)", "."] ++ baseFlags
False -> [t, ".", "-Q"] ++ baseFlags
where
baseFlags = ["-c", "--ackmate", "--ignore-dir", "tmp/unused"]
parseSearchResult :: String -> String -> Maybe TermMatch
parseSearchResult term s =
toTermMatch $ map T.unpack $ T.splitOn ":" $ T.pack s
where
toTermMatch [_, path, count] = Just $ TermMatch term path (countInt count)
toTermMatch _ = Nothing
countInt = fromMaybe 0 . stringToInt
regexSafeTerm :: String -> Bool
regexSafeTerm =

View File

@ -0,0 +1,13 @@
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
module Unused.TermSearch.Types
( SearchResults(..)
, fromResults
) where
import Unused.Types (TermMatch)
newtype SearchResults = SearchResults [TermMatch] deriving (Monoid)
fromResults :: SearchResults -> [TermMatch]
fromResults (SearchResults a) = a

View File

@ -1,8 +1,9 @@
{-# LANGUAGE DeriveGeneric #-}
module Unused.Types
( TermMatch(..)
, TermResults(..)
, TermMatchSet
, ParseResponse
, RemovalLikelihood(..)
, Removal(..)
, Occurrences(..)
@ -12,15 +13,19 @@ module Unused.Types
, appOccurrenceCount
) where
import Text.Parsec (ParseError)
import qualified Data.Map.Strict as Map
import Data.Csv
import GHC.Generics
import Unused.Regex
data TermMatch = TermMatch
{ tmTerm :: String
, tmPath :: String
, tmOccurrences :: Int
} deriving (Eq, Show)
} deriving (Eq, Show, Generic)
instance FromRecord TermMatch
instance ToRecord TermMatch
data Occurrences = Occurrences
{ oFiles :: Int
@ -45,8 +50,6 @@ data RemovalLikelihood = High | Medium | Low | Unknown | NotCalculated deriving
type TermMatchSet = Map.Map String TermResults
type ParseResponse = Either ParseError TermMatchSet
totalFileCount :: TermResults -> Int
totalFileCount = oFiles . trTotalOccurrences

View File

@ -1,21 +0,0 @@
module Unused.Parser.InternalSpec where
import Test.Hspec
import Unused.Types
import Unused.Parser.Internal
main :: IO ()
main = hspec spec
spec :: Spec
spec = parallel $
describe "parseTermMatch" $ do
it "parses normal lines" $ do
let (Right result) = parse parseTermMatch "source" ":app/files/location:12:simple\n"
result `shouldBe` TermMatch "simple" "app/files/location" 12
it "parses weird lines" $ do
let (Right result) = parse parseTermMatch "source" ":app/files/location:12:Foo::Bar\n"
result `shouldBe` TermMatch "Foo::Bar" "app/files/location" 12

View File

@ -3,6 +3,7 @@ module Unused.ParserSpec where
import Test.Hspec
import Unused.Types
import Unused.Parser
import Unused.TermSearch
import Unused.ResultsClassifier
import qualified Data.Map.Strict as Map
@ -11,13 +12,8 @@ main = hspec spec
spec :: Spec
spec = parallel $
describe "parseLines" $ do
describe "parseResults" $ do
it "parses from the correct format" $ do
let input = ":app/path/foo.rb:1:method_name\n\
\:app/path/other.rb:1:other\n\
\:app/path/other.rb:5:method_name\n\
\:spec/path/foo_spec.rb:10:method_name\n"
let r1Matches = [ TermMatch "method_name" "app/path/foo.rb" 1
, TermMatch "method_name" "app/path/other.rb" 5
, TermMatch "method_name" "spec/path/foo_spec.rb" 10
@ -28,12 +24,14 @@ spec = parallel $
let r2Results = TermResults "other" r2Matches (Occurrences 0 0) (Occurrences 1 1) (Occurrences 1 1) (Removal High "used once")
(Right config) <- loadConfig
let (Right result) = parseLines config input
let searchResults = r1Matches ++ r2Matches
let result = parseResults config $ SearchResults searchResults
result `shouldBe`
Map.fromList [ ("method_name", r1Results), ("other", r2Results) ]
it "handles empty input" $ do
(Right config) <- loadConfig
let (Left result) = parseLines config ""
show result `shouldContain` "unexpected end of input"
let result = parseResults config $ SearchResults []
result `shouldBe` Map.fromList []

View File

@ -4,19 +4,27 @@ module Unused.TermSearch.InternalSpec
) where
import Test.Hspec
import Unused.Types
import Unused.TermSearch.Internal
main :: IO ()
main = hspec spec
spec :: Spec
spec = parallel $
spec = parallel $ do
describe "commandLineOptions" $ do
it "does not use regular expressions when the term contains non-word characters" $ do
commandLineOptions "can_do_things?" `shouldBe` ["can_do_things?", ".", "-c", "-Q", "--ackmate"]
commandLineOptions "no_way!" `shouldBe` ["no_way!", ".", "-c", "-Q", "--ackmate"]
commandLineOptions "[]=" `shouldBe` ["[]=", ".", "-c", "-Q", "--ackmate"]
commandLineOptions "window.globalOverride" `shouldBe` ["window.globalOverride", ".", "-c", "-Q", "--ackmate"]
commandLineOptions "can_do_things?" `shouldBe` ["can_do_things?", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
commandLineOptions "no_way!" `shouldBe` ["no_way!", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
commandLineOptions "[]=" `shouldBe` ["[]=", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
commandLineOptions "window.globalOverride" `shouldBe` ["window.globalOverride", ".", "-Q", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
it "uses regular expression match with surrounding non-word matches for accuracy" $
commandLineOptions "awesome_method" `shouldBe` ["(\\W|^)awesome_method(\\W|$)", ".", "-c", "--ackmate"]
commandLineOptions "awesome_method" `shouldBe` ["(\\W|^)awesome_method(\\W|$)", ".", "-c", "--ackmate", "--ignore-dir", "tmp/unused"]
describe "parseSearchResult" $ do
it "parses normal results from `ag` to a TermMatch" $
parseSearchResult "method_name" ":app/models/foo.rb:123" `shouldBe` (Just $ TermMatch "method_name" "app/models/foo.rb" 123)
it "returns Nothing when it cannot parse" $
parseSearchResult "method_name" "" `shouldBe` Nothing

View File

@ -17,9 +17,9 @@ data-files: data/config.yml
library
hs-source-dirs: src
exposed-modules: Unused.TermSearch
, Unused.TermSearch.Types
, Unused.TermSearch.Internal
, Unused.Parser
, Unused.Parser.Internal
, Unused.Types
, Unused.Util
, Unused.Regex
@ -37,7 +37,6 @@ library
, Unused.TagsSource
, Unused.CLI
, Unused.CLI.Search
, Unused.CLI.SearchError
, Unused.CLI.MissingTagsFileError
, Unused.CLI.SearchResult
, Unused.CLI.SearchResult.ColumnFormatter
@ -48,7 +47,6 @@ library
other-modules: Paths_unused
build-depends: base >= 4.7 && < 5
, process
, parsec
, containers
, filepath
, directory
@ -61,6 +59,10 @@ library
, bytestring
, text
, unordered-containers
, cassava
, vector
, mtl
, transformers
ghc-options: -Wall -Werror -O2
default-language: Haskell2010
@ -82,7 +84,6 @@ test-suite unused-test
, hspec
, containers
other-modules: Unused.ParserSpec
, Unused.Parser.InternalSpec
, Unused.ResponseFilterSpec
, Unused.TypesSpec
, Unused.LikelihoodCalculatorSpec