Improve likelihood calculation and include reasons for evaluation

Why? ==== A simple calculation ("yes, this should be removed" or "no, this is probably fine") is frankly not enough information for someone evaluating their codebase to understand why we made the decision. This introduces a removal reason, so a user understands why we ranked it the way we did, and adds additional logic around a method and its tests to determine if a method exists and is only being used in the tests (if so, it should probably be deleted). This is done with an Occurrances record, which is created for total files, test code, and non-test code. The test code logic is somewhat naive but works in most cases. It doesn't ensure a particular directory, in the case that tests live alongside source code (e.g. Go), and captures RSpec cases as well.
2024-08-15 15:50:26 +03:00 · 2016-05-08 08:16:32 -04:00 · 2016-05-08 08:16:32 -04:00 · 2650e1f040
commit 2650e1f040
parent cbd5af5954
9 changed files with 160 additions and 31 deletions
--- a/src/Unused/CLI/SearchResult.hs
+++ b/src/Unused/CLI/SearchResult.hs
@ -44,26 +44,31 @@ likelihoodColor High = Red
 likelihoodColor Medium = Yellow
 likelihoodColor Low = Green
 likelihoodColor Unknown = Black
+likelihoodColor NotCalculated = Magenta

 printMatches :: ColumnFormat -> TermResults -> [TermMatch] -> IO ()
 printMatches cf r ms =
    forM_ ms $ \m -> do
-        setSGR [SetColor Foreground Dull (likelihoodColor $ trRemovalLikelihood r)]
+        setSGR [SetColor Foreground Dull (termColor r)]
        setSGR [SetConsoleIntensity NormalIntensity]
        putStr $ "     " ++ printTerm (tmTerm m)
        setSGR [Reset]

        setSGR [SetColor Foreground Vivid Cyan]
        setSGR [SetConsoleIntensity NormalIntensity]
-        putStr $ "  " ++ printNumber (trTotalFiles r) ++ ", " ++ printNumber (trTotalOccurrences r)
+        putStr $ "  " ++ printNumber (totalFileCount r) ++ ", " ++ printNumber (totalOccurrenceCount r)
        setSGR [Reset]

        setSGR [SetColor Foreground Dull Cyan]
        setSGR [SetConsoleIntensity FaintIntensity]
        putStr $ "  " ++ printPath (tmPath m)
        setSGR [Reset]
+
+        putStr $ "  " ++ removalReason r
        putStr "\n"
  where
    printTerm = cfPrintTerm cf
    printPath = cfPrintPath cf
    printNumber = cfPrintNumber cf
+    termColor = likelihoodColor . rLikelihood . trRemoval
+    removalReason = rReason . trRemoval
--- a/src/Unused/CLI/SearchResult/ColumnFormatter.hs
+++ b/src/Unused/CLI/SearchResult/ColumnFormatter.hs
@ -4,7 +4,7 @@ module Unused.CLI.SearchResult.ColumnFormatter
    ) where

 import Text.Printf
-import Unused.Types (TermResults(..), TermMatch(..))
+import Unused.Types (TermResults(..), TermMatch(..), totalFileCount, totalOccurrenceCount)

 data ColumnFormat = ColumnFormat
    { cfPrintTerm :: String -> String
@ -37,8 +37,8 @@ numberFormat rs =
    numberWidth = maximum [fileWidth, occurrenceWidth]
    fileWidth = maximum $ fileLength =<< rs
    occurrenceWidth = maximum $ occurrenceLength =<< rs
-    fileLength = return . numberLength . trTotalFiles
-    occurrenceLength = return . numberLength . trTotalOccurrences
+    fileLength = return . numberLength . totalFileCount
+    occurrenceLength = return . numberLength . totalOccurrenceCount

 numberLength :: Int -> Int
 numberLength i =
--- a/src/Unused/LikelihoodCalculator.hs
+++ b/src/Unused/LikelihoodCalculator.hs
@ -2,19 +2,31 @@ module Unused.LikelihoodCalculator
    ( calculateLikelihood
    ) where

-import Unused.Types (TermResults, RemovalLikelihood(..), trRemovalLikelihood, trTotalOccurrences)
+import Control.Monad (ap)
+import Unused.Types
 import Unused.ResponseFilter (railsSingleOkay, elixirSingleOkay)

 calculateLikelihood :: TermResults -> TermResults
 calculateLikelihood r =
-    r { trRemovalLikelihood = newLikelihood }
+    r { trRemoval = uncurry Removal newLikelihood }
  where
-    baseScore = trTotalOccurrences r
-    railsScore = if railsSingleOkay r then 5 else 0
-    elixirScore = if elixirSingleOkay r then 5 else 0
-    totalScore = baseScore + railsScore + elixirScore
+    baseScore = totalOccurrenceCount r
+    totalScore = baseScore
    newLikelihood
-        | totalScore < 3 = High
-        | totalScore < 6 = Medium
-        | totalScore < 9 = Low
-        | otherwise = Low
+        | railsSingleOkay r = (Low, "a class, module, or migration that often occurs in only one file")
+        | elixirSingleOkay r = (Low, "a class, module, or migration that often occurs in only one file")
+        | singleNonTestUsage r && testsExist r = (High, "only the definition and corresponding tests exist")
+        | doubleNonTestUsage r && testsExist r = (Medium, "only the definition and one other use, along with tests, exists")
+        | totalScore < 2 = (High, "used infrequently")
+        | totalScore < 6 = (Medium, "used semi-frequently")
+        | totalScore >= 6 = (Low, "used frequently")
+        | otherwise = (Unknown, "could not determine likelihood")
+
+singleNonTestUsage :: TermResults -> Bool
+singleNonTestUsage = (1 ==) . oOccurrences . trAppOccurrences
+
+doubleNonTestUsage :: TermResults -> Bool
+doubleNonTestUsage = (2 ==) . oOccurrences . trAppOccurrences
+
+testsExist :: TermResults -> Bool
+testsExist = (> 0) . oOccurrences . trTestOccurrences
--- a/src/Unused/ResponseFilter.hs
+++ b/src/Unused/ResponseFilter.hs
@ -23,7 +23,7 @@ withOneOccurrence :: ParseResponse -> ParseResponse
 withOneOccurrence = applyFilter (const oneOccurence)

 oneOccurence :: TermResults -> Bool
-oneOccurence = (== 1) . trTotalOccurrences
+oneOccurence = (== 1) . totalOccurrenceCount

 withLikelihoods :: [RemovalLikelihood] -> ParseResponse -> ParseResponse
 withLikelihoods [] = id
@ -37,10 +37,10 @@ ignoringPaths xs =
    matchesPath p = any (`isInfixOf` p) xs

 oneFile :: TermResults -> Bool
-oneFile = (== 1) . trTotalFiles
+oneFile = (== 1) . totalFileCount

 includesLikelihood :: [RemovalLikelihood] -> TermResults -> Bool
-includesLikelihood l = (`elem` l) . trRemovalLikelihood
+includesLikelihood l = (`elem` l) . rLikelihood . trRemoval

 isClassOrModule :: TermResults -> Bool
 isClassOrModule = matchRegex "^[A-Z]" . trTerm
--- a/src/Unused/Types.hs
+++ b/src/Unused/Types.hs
@ -4,11 +4,16 @@ module Unused.Types
    , TermMatchSet
    , ParseResponse
    , RemovalLikelihood(..)
+    , Removal(..)
+    , Occurrences(..)
    , resultsFromMatches
+    , totalFileCount
+    , totalOccurrenceCount
    ) where

 import Text.Parsec (ParseError)
 import qualified Data.Map.Strict as Map
+import Unused.Regex

 data TermMatch = TermMatch
    { tmTerm :: String
@ -16,32 +21,83 @@ data TermMatch = TermMatch
    , tmOccurrences :: Int
    } deriving (Eq, Show)

+data Occurrences = Occurrences
+    { oFiles :: Int
+    , oOccurrences :: Int
+    } deriving (Eq, Show)
+
 data TermResults = TermResults
    { trTerm :: String
    , trMatches :: [TermMatch]
-    , trTotalFiles :: Int
-    , trTotalOccurrences :: Int
-    , trRemovalLikelihood :: RemovalLikelihood
+    , trTestOccurrences :: Occurrences
+    , trAppOccurrences :: Occurrences
+    , trTotalOccurrences :: Occurrences
+    , trRemoval :: Removal
    } deriving (Eq, Show)

-data RemovalLikelihood = High | Medium | Low | Unknown deriving (Eq, Show)
+data Removal = Removal
+    { rLikelihood :: RemovalLikelihood
+    , rReason :: String
+    } deriving (Eq, Show)
+
+data RemovalLikelihood = High | Medium | Low | Unknown | NotCalculated deriving (Eq, Show)

 type TermMatchSet = Map.Map String TermResults

 type ParseResponse = Either ParseError TermMatchSet

+totalFileCount :: TermResults -> Int
+totalFileCount = oFiles . trTotalOccurrences
+
+totalOccurrenceCount :: TermResults -> Int
+totalOccurrenceCount = oOccurrences . trTotalOccurrences
+
 resultsFromMatches :: [TermMatch] -> TermResults
 resultsFromMatches m =
    TermResults
        { trTerm = resultTerm terms
        , trMatches = m
-        , trTotalFiles = totalFiles
-        , trTotalOccurrences = totalOccurrences
-        , trRemovalLikelihood = Unknown
+        , trAppOccurrences = appOccurrence
+        , trTestOccurrences = testOccurrence
+        , trTotalOccurrences = Occurrences (sum $ map oFiles [appOccurrence, testOccurrence]) (sum $ map oOccurrences [appOccurrence, testOccurrence])
+        , trRemoval = Removal NotCalculated "Likelihood not calculated"
        }
  where
-    totalFiles = length m
-    totalOccurrences = sum $ fmap tmOccurrences m
+    testOccurrence = testOccurrences m
+    appOccurrence = appOccurrences m
    terms = map tmTerm m
    resultTerm (x:_) = x
    resultTerm _ = ""
+
+appOccurrences :: [TermMatch] -> Occurrences
+appOccurrences ms =
+    Occurrences appFiles appOccurrences'
+  where
+    totalFiles = length ms
+    totalOccurrences = sum $ map tmOccurrences ms
+    tests = testOccurrences ms
+    appFiles = totalFiles - oFiles tests
+    appOccurrences' = totalOccurrences - oOccurrences tests
+
+testOccurrences :: [TermMatch] -> Occurrences
+testOccurrences ms =
+    Occurrences totalFiles totalOccurrences
+  where
+    testMatches = filter termMatchIsTest ms
+    totalFiles = length testMatches
+    totalOccurrences = sum $ map tmOccurrences testMatches
+
+testDir :: String -> Bool
+testDir = matchRegex "(spec|tests?)\\/"
+
+testSnakeCaseFilename :: String -> Bool
+testSnakeCaseFilename = matchRegex ".*(_spec|_test)\\."
+
+testCamelCaseFilename :: String -> Bool
+testCamelCaseFilename = matchRegex ".*(Spec|Test)\\."
+
+termMatchIsTest :: TermMatch -> Bool
+termMatchIsTest m =
+    testDir path || testSnakeCaseFilename path || testCamelCaseFilename path
+  where
+    path = tmPath m
--- a/test/Unused/LikelihoodCalculatorSpec.hs
+++ b/test/Unused/LikelihoodCalculatorSpec.hs
@ -0,0 +1,55 @@
+module Unused.LikelihoodCalculatorSpec
+    ( main
+    , spec
+    ) where
+
+import Test.Hspec
+import Unused.Types
+import Unused.LikelihoodCalculator
+
+main :: IO ()
+main = hspec spec
+
+spec :: Spec
+spec = parallel $
+    describe "calculateLikelihood" $ do
+        it "prefers language-specific checks first" $ do
+            let railsMatches = [ TermMatch "ApplicationController" "app/controllers/application_controller.rb" 1 ]
+            removalLikelihood railsMatches `shouldBe` Low
+
+            let elixirMatches = [ TermMatch "AwesomeView" "web/views/awesome_view.ex" 1 ]
+            removalLikelihood elixirMatches `shouldBe` Low
+
+        it "weighs widely-used methods as low likelihood" $ do
+            let matches = [ TermMatch "full_name" "app/models/user.rb" 4
+                          , TermMatch "full_name" "app/views/application/_auth_header.rb" 1
+                          , TermMatch "full_name" "app/mailers/user_mailer.rb" 1
+                          , TermMatch "full_name" "spec/models/user_spec.rb" 10
+                          ]
+
+            removalLikelihood matches `shouldBe` Low
+
+        it "weighs only-used-once methods as high likelihood" $ do
+            let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1 ]
+
+            removalLikelihood matches `shouldBe` High
+
+        it "weighs methods that seem to only be tested and never used as high likelihood" $ do
+            let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1
+                          , TermMatch "obscure_method" "spec/models/user_spec.rb" 5
+                          ]
+
+            removalLikelihood matches `shouldBe` High
+
+        it "weighs methods that seem to only be tested and used in one other area as medium likelihood" $ do
+            let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1
+                          , TermMatch "obscure_method" "app/controllers/user_controller.rb" 1
+                          , TermMatch "obscure_method" "spec/models/user_spec.rb" 5
+                          , TermMatch "obscure_method" "spec/controllers/user_controller_spec.rb" 5
+                          ]
+
+            removalLikelihood matches `shouldBe` Medium
+
+removalLikelihood :: [TermMatch] -> RemovalLikelihood
+removalLikelihood =
+    rLikelihood . trRemoval . calculateLikelihood . resultsFromMatches
--- a/test/Unused/ParserSpec.hs
+++ b/test/Unused/ParserSpec.hs
@ -1,7 +1,7 @@
 module Unused.ParserSpec where

 import Test.Hspec
-import Unused.Types (TermResults(..), TermMatch(..), RemovalLikelihood(..))
+import Unused.Types
 import Unused.Parser
 import qualified Data.Map.Strict as Map

@ -21,10 +21,10 @@ spec = parallel $
                            , TermMatch "method_name" "app/path/other.rb" 5
                            , TermMatch "method_name" "spec/path/foo_spec.rb" 10
                            ]
-            let r1Results = TermResults "method_name" r1Matches 3 16 Low
+            let r1Results = TermResults "method_name" r1Matches (Occurrences 1 10) (Occurrences 2 6) (Occurrences 3 16) (Removal Low "used frequently")

            let r2Matches = [ TermMatch "other" "app/path/other.rb" 1 ]
-            let r2Results = TermResults "other" r2Matches 1 1 High
+            let r2Results = TermResults "other" r2Matches (Occurrences 0 0) (Occurrences 1 1) (Occurrences 1 1) (Removal High "used infrequently")

            let (Right result) = parseLines input

--- a/test/Unused/TypesSpec.hs
+++ b/test/Unused/TypesSpec.hs
@ -15,4 +15,4 @@ spec = parallel $
                          ]

            resultsFromMatches matches `shouldBe`
-                TermResults "ApplicationController" matches 2 11 Unknown
+                TermResults "ApplicationController" matches (Occurrences 1 10) (Occurrences 1 1) (Occurrences 2 11) (Removal NotCalculated "Likelihood not calculated")
--- a/unused.cabal
+++ b/unused.cabal
@ -64,6 +64,7 @@ test-suite unused-test
  other-modules:       Unused.ParserSpec
                     , Unused.ResponseFilterSpec
                     , Unused.TypesSpec
+                     , Unused.LikelihoodCalculatorSpec
  ghc-options:         -threaded -rtsopts -with-rtsopts=-N -Wall -Werror
  default-language:    Haskell2010