From 2650e1f040898768f008b148246b6a4ce6fcce23 Mon Sep 17 00:00:00 2001 From: Joshua Clayton Date: Sun, 8 May 2016 08:16:32 -0400 Subject: [PATCH] Improve likelihood calculation and include reasons for evaluation Why? ==== A simple calculation ("yes, this should be removed" or "no, this is probably fine") is frankly not enough information for someone evaluating their codebase to understand why we made the decision. This introduces a removal reason, so a user understands why we ranked it the way we did, and adds additional logic around a method and its tests to determine if a method exists and is only being used in the tests (if so, it should probably be deleted). This is done with an Occurrances record, which is created for total files, test code, and non-test code. The test code logic is somewhat naive but works in most cases. It doesn't ensure a particular directory, in the case that tests live alongside source code (e.g. Go), and captures RSpec cases as well. --- src/Unused/CLI/SearchResult.hs | 9 ++- .../CLI/SearchResult/ColumnFormatter.hs | 6 +- src/Unused/LikelihoodCalculator.hs | 32 +++++--- src/Unused/ResponseFilter.hs | 6 +- src/Unused/Types.hs | 74 ++++++++++++++++--- test/Unused/LikelihoodCalculatorSpec.hs | 55 ++++++++++++++ test/Unused/ParserSpec.hs | 6 +- test/Unused/TypesSpec.hs | 2 +- unused.cabal | 1 + 9 files changed, 160 insertions(+), 31 deletions(-) create mode 100644 test/Unused/LikelihoodCalculatorSpec.hs diff --git a/src/Unused/CLI/SearchResult.hs b/src/Unused/CLI/SearchResult.hs index 41b5b74..058d5e8 100644 --- a/src/Unused/CLI/SearchResult.hs +++ b/src/Unused/CLI/SearchResult.hs @@ -44,26 +44,31 @@ likelihoodColor High = Red likelihoodColor Medium = Yellow likelihoodColor Low = Green likelihoodColor Unknown = Black +likelihoodColor NotCalculated = Magenta printMatches :: ColumnFormat -> TermResults -> [TermMatch] -> IO () printMatches cf r ms = forM_ ms $ \m -> do - setSGR [SetColor Foreground Dull (likelihoodColor $ trRemovalLikelihood r)] + setSGR [SetColor Foreground Dull (termColor r)] setSGR [SetConsoleIntensity NormalIntensity] putStr $ " " ++ printTerm (tmTerm m) setSGR [Reset] setSGR [SetColor Foreground Vivid Cyan] setSGR [SetConsoleIntensity NormalIntensity] - putStr $ " " ++ printNumber (trTotalFiles r) ++ ", " ++ printNumber (trTotalOccurrences r) + putStr $ " " ++ printNumber (totalFileCount r) ++ ", " ++ printNumber (totalOccurrenceCount r) setSGR [Reset] setSGR [SetColor Foreground Dull Cyan] setSGR [SetConsoleIntensity FaintIntensity] putStr $ " " ++ printPath (tmPath m) setSGR [Reset] + + putStr $ " " ++ removalReason r putStr "\n" where printTerm = cfPrintTerm cf printPath = cfPrintPath cf printNumber = cfPrintNumber cf + termColor = likelihoodColor . rLikelihood . trRemoval + removalReason = rReason . trRemoval diff --git a/src/Unused/CLI/SearchResult/ColumnFormatter.hs b/src/Unused/CLI/SearchResult/ColumnFormatter.hs index 116dc92..958e432 100644 --- a/src/Unused/CLI/SearchResult/ColumnFormatter.hs +++ b/src/Unused/CLI/SearchResult/ColumnFormatter.hs @@ -4,7 +4,7 @@ module Unused.CLI.SearchResult.ColumnFormatter ) where import Text.Printf -import Unused.Types (TermResults(..), TermMatch(..)) +import Unused.Types (TermResults(..), TermMatch(..), totalFileCount, totalOccurrenceCount) data ColumnFormat = ColumnFormat { cfPrintTerm :: String -> String @@ -37,8 +37,8 @@ numberFormat rs = numberWidth = maximum [fileWidth, occurrenceWidth] fileWidth = maximum $ fileLength =<< rs occurrenceWidth = maximum $ occurrenceLength =<< rs - fileLength = return . numberLength . trTotalFiles - occurrenceLength = return . numberLength . trTotalOccurrences + fileLength = return . numberLength . totalFileCount + occurrenceLength = return . numberLength . totalOccurrenceCount numberLength :: Int -> Int numberLength i = diff --git a/src/Unused/LikelihoodCalculator.hs b/src/Unused/LikelihoodCalculator.hs index 936dfca..297e705 100644 --- a/src/Unused/LikelihoodCalculator.hs +++ b/src/Unused/LikelihoodCalculator.hs @@ -2,19 +2,31 @@ module Unused.LikelihoodCalculator ( calculateLikelihood ) where -import Unused.Types (TermResults, RemovalLikelihood(..), trRemovalLikelihood, trTotalOccurrences) +import Control.Monad (ap) +import Unused.Types import Unused.ResponseFilter (railsSingleOkay, elixirSingleOkay) calculateLikelihood :: TermResults -> TermResults calculateLikelihood r = - r { trRemovalLikelihood = newLikelihood } + r { trRemoval = uncurry Removal newLikelihood } where - baseScore = trTotalOccurrences r - railsScore = if railsSingleOkay r then 5 else 0 - elixirScore = if elixirSingleOkay r then 5 else 0 - totalScore = baseScore + railsScore + elixirScore + baseScore = totalOccurrenceCount r + totalScore = baseScore newLikelihood - | totalScore < 3 = High - | totalScore < 6 = Medium - | totalScore < 9 = Low - | otherwise = Low + | railsSingleOkay r = (Low, "a class, module, or migration that often occurs in only one file") + | elixirSingleOkay r = (Low, "a class, module, or migration that often occurs in only one file") + | singleNonTestUsage r && testsExist r = (High, "only the definition and corresponding tests exist") + | doubleNonTestUsage r && testsExist r = (Medium, "only the definition and one other use, along with tests, exists") + | totalScore < 2 = (High, "used infrequently") + | totalScore < 6 = (Medium, "used semi-frequently") + | totalScore >= 6 = (Low, "used frequently") + | otherwise = (Unknown, "could not determine likelihood") + +singleNonTestUsage :: TermResults -> Bool +singleNonTestUsage = (1 ==) . oOccurrences . trAppOccurrences + +doubleNonTestUsage :: TermResults -> Bool +doubleNonTestUsage = (2 ==) . oOccurrences . trAppOccurrences + +testsExist :: TermResults -> Bool +testsExist = (> 0) . oOccurrences . trTestOccurrences diff --git a/src/Unused/ResponseFilter.hs b/src/Unused/ResponseFilter.hs index 2dfeafa..ddf611c 100644 --- a/src/Unused/ResponseFilter.hs +++ b/src/Unused/ResponseFilter.hs @@ -23,7 +23,7 @@ withOneOccurrence :: ParseResponse -> ParseResponse withOneOccurrence = applyFilter (const oneOccurence) oneOccurence :: TermResults -> Bool -oneOccurence = (== 1) . trTotalOccurrences +oneOccurence = (== 1) . totalOccurrenceCount withLikelihoods :: [RemovalLikelihood] -> ParseResponse -> ParseResponse withLikelihoods [] = id @@ -37,10 +37,10 @@ ignoringPaths xs = matchesPath p = any (`isInfixOf` p) xs oneFile :: TermResults -> Bool -oneFile = (== 1) . trTotalFiles +oneFile = (== 1) . totalFileCount includesLikelihood :: [RemovalLikelihood] -> TermResults -> Bool -includesLikelihood l = (`elem` l) . trRemovalLikelihood +includesLikelihood l = (`elem` l) . rLikelihood . trRemoval isClassOrModule :: TermResults -> Bool isClassOrModule = matchRegex "^[A-Z]" . trTerm diff --git a/src/Unused/Types.hs b/src/Unused/Types.hs index 8900c7a..b39fec7 100644 --- a/src/Unused/Types.hs +++ b/src/Unused/Types.hs @@ -4,11 +4,16 @@ module Unused.Types , TermMatchSet , ParseResponse , RemovalLikelihood(..) + , Removal(..) + , Occurrences(..) , resultsFromMatches + , totalFileCount + , totalOccurrenceCount ) where import Text.Parsec (ParseError) import qualified Data.Map.Strict as Map +import Unused.Regex data TermMatch = TermMatch { tmTerm :: String @@ -16,32 +21,83 @@ data TermMatch = TermMatch , tmOccurrences :: Int } deriving (Eq, Show) +data Occurrences = Occurrences + { oFiles :: Int + , oOccurrences :: Int + } deriving (Eq, Show) + data TermResults = TermResults { trTerm :: String , trMatches :: [TermMatch] - , trTotalFiles :: Int - , trTotalOccurrences :: Int - , trRemovalLikelihood :: RemovalLikelihood + , trTestOccurrences :: Occurrences + , trAppOccurrences :: Occurrences + , trTotalOccurrences :: Occurrences + , trRemoval :: Removal } deriving (Eq, Show) -data RemovalLikelihood = High | Medium | Low | Unknown deriving (Eq, Show) +data Removal = Removal + { rLikelihood :: RemovalLikelihood + , rReason :: String + } deriving (Eq, Show) + +data RemovalLikelihood = High | Medium | Low | Unknown | NotCalculated deriving (Eq, Show) type TermMatchSet = Map.Map String TermResults type ParseResponse = Either ParseError TermMatchSet +totalFileCount :: TermResults -> Int +totalFileCount = oFiles . trTotalOccurrences + +totalOccurrenceCount :: TermResults -> Int +totalOccurrenceCount = oOccurrences . trTotalOccurrences + resultsFromMatches :: [TermMatch] -> TermResults resultsFromMatches m = TermResults { trTerm = resultTerm terms , trMatches = m - , trTotalFiles = totalFiles - , trTotalOccurrences = totalOccurrences - , trRemovalLikelihood = Unknown + , trAppOccurrences = appOccurrence + , trTestOccurrences = testOccurrence + , trTotalOccurrences = Occurrences (sum $ map oFiles [appOccurrence, testOccurrence]) (sum $ map oOccurrences [appOccurrence, testOccurrence]) + , trRemoval = Removal NotCalculated "Likelihood not calculated" } where - totalFiles = length m - totalOccurrences = sum $ fmap tmOccurrences m + testOccurrence = testOccurrences m + appOccurrence = appOccurrences m terms = map tmTerm m resultTerm (x:_) = x resultTerm _ = "" + +appOccurrences :: [TermMatch] -> Occurrences +appOccurrences ms = + Occurrences appFiles appOccurrences' + where + totalFiles = length ms + totalOccurrences = sum $ map tmOccurrences ms + tests = testOccurrences ms + appFiles = totalFiles - oFiles tests + appOccurrences' = totalOccurrences - oOccurrences tests + +testOccurrences :: [TermMatch] -> Occurrences +testOccurrences ms = + Occurrences totalFiles totalOccurrences + where + testMatches = filter termMatchIsTest ms + totalFiles = length testMatches + totalOccurrences = sum $ map tmOccurrences testMatches + +testDir :: String -> Bool +testDir = matchRegex "(spec|tests?)\\/" + +testSnakeCaseFilename :: String -> Bool +testSnakeCaseFilename = matchRegex ".*(_spec|_test)\\." + +testCamelCaseFilename :: String -> Bool +testCamelCaseFilename = matchRegex ".*(Spec|Test)\\." + +termMatchIsTest :: TermMatch -> Bool +termMatchIsTest m = + testDir path || testSnakeCaseFilename path || testCamelCaseFilename path + where + path = tmPath m diff --git a/test/Unused/LikelihoodCalculatorSpec.hs b/test/Unused/LikelihoodCalculatorSpec.hs new file mode 100644 index 0000000..1e59bde --- /dev/null +++ b/test/Unused/LikelihoodCalculatorSpec.hs @@ -0,0 +1,55 @@ +module Unused.LikelihoodCalculatorSpec + ( main + , spec + ) where + +import Test.Hspec +import Unused.Types +import Unused.LikelihoodCalculator + +main :: IO () +main = hspec spec + +spec :: Spec +spec = parallel $ + describe "calculateLikelihood" $ do + it "prefers language-specific checks first" $ do + let railsMatches = [ TermMatch "ApplicationController" "app/controllers/application_controller.rb" 1 ] + removalLikelihood railsMatches `shouldBe` Low + + let elixirMatches = [ TermMatch "AwesomeView" "web/views/awesome_view.ex" 1 ] + removalLikelihood elixirMatches `shouldBe` Low + + it "weighs widely-used methods as low likelihood" $ do + let matches = [ TermMatch "full_name" "app/models/user.rb" 4 + , TermMatch "full_name" "app/views/application/_auth_header.rb" 1 + , TermMatch "full_name" "app/mailers/user_mailer.rb" 1 + , TermMatch "full_name" "spec/models/user_spec.rb" 10 + ] + + removalLikelihood matches `shouldBe` Low + + it "weighs only-used-once methods as high likelihood" $ do + let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1 ] + + removalLikelihood matches `shouldBe` High + + it "weighs methods that seem to only be tested and never used as high likelihood" $ do + let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1 + , TermMatch "obscure_method" "spec/models/user_spec.rb" 5 + ] + + removalLikelihood matches `shouldBe` High + + it "weighs methods that seem to only be tested and used in one other area as medium likelihood" $ do + let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1 + , TermMatch "obscure_method" "app/controllers/user_controller.rb" 1 + , TermMatch "obscure_method" "spec/models/user_spec.rb" 5 + , TermMatch "obscure_method" "spec/controllers/user_controller_spec.rb" 5 + ] + + removalLikelihood matches `shouldBe` Medium + +removalLikelihood :: [TermMatch] -> RemovalLikelihood +removalLikelihood = + rLikelihood . trRemoval . calculateLikelihood . resultsFromMatches diff --git a/test/Unused/ParserSpec.hs b/test/Unused/ParserSpec.hs index b60c512..6826d66 100644 --- a/test/Unused/ParserSpec.hs +++ b/test/Unused/ParserSpec.hs @@ -1,7 +1,7 @@ module Unused.ParserSpec where import Test.Hspec -import Unused.Types (TermResults(..), TermMatch(..), RemovalLikelihood(..)) +import Unused.Types import Unused.Parser import qualified Data.Map.Strict as Map @@ -21,10 +21,10 @@ spec = parallel $ , TermMatch "method_name" "app/path/other.rb" 5 , TermMatch "method_name" "spec/path/foo_spec.rb" 10 ] - let r1Results = TermResults "method_name" r1Matches 3 16 Low + let r1Results = TermResults "method_name" r1Matches (Occurrences 1 10) (Occurrences 2 6) (Occurrences 3 16) (Removal Low "used frequently") let r2Matches = [ TermMatch "other" "app/path/other.rb" 1 ] - let r2Results = TermResults "other" r2Matches 1 1 High + let r2Results = TermResults "other" r2Matches (Occurrences 0 0) (Occurrences 1 1) (Occurrences 1 1) (Removal High "used infrequently") let (Right result) = parseLines input diff --git a/test/Unused/TypesSpec.hs b/test/Unused/TypesSpec.hs index 71153f3..a14c373 100644 --- a/test/Unused/TypesSpec.hs +++ b/test/Unused/TypesSpec.hs @@ -15,4 +15,4 @@ spec = parallel $ ] resultsFromMatches matches `shouldBe` - TermResults "ApplicationController" matches 2 11 Unknown + TermResults "ApplicationController" matches (Occurrences 1 10) (Occurrences 1 1) (Occurrences 2 11) (Removal NotCalculated "Likelihood not calculated") diff --git a/unused.cabal b/unused.cabal index 0fe3362..b5d24ba 100644 --- a/unused.cabal +++ b/unused.cabal @@ -64,6 +64,7 @@ test-suite unused-test other-modules: Unused.ParserSpec , Unused.ResponseFilterSpec , Unused.TypesSpec + , Unused.LikelihoodCalculatorSpec ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall -Werror default-language: Haskell2010