Improve likelihood calculation and include reasons for evaluation

Why?
====

A simple calculation ("yes, this should be removed" or "no, this is
probably fine") is frankly not enough information for someone evaluating
their codebase to understand why we made the decision.

This introduces a removal reason, so a user understands why we ranked it
the way we did, and adds additional logic around a method and its tests
to determine if a method exists and is only being used in the tests (if
so, it should probably be deleted).

This is done with an Occurrances record, which is created for total
files, test code, and non-test code. The test code logic is somewhat
naive but works in most cases. It doesn't ensure a particular directory,
in the case that tests live alongside source code (e.g. Go), and
captures RSpec cases as well.
This commit is contained in:
Joshua Clayton 2016-05-08 08:16:32 -04:00
parent cbd5af5954
commit 2650e1f040
9 changed files with 160 additions and 31 deletions

View File

@ -44,26 +44,31 @@ likelihoodColor High = Red
likelihoodColor Medium = Yellow
likelihoodColor Low = Green
likelihoodColor Unknown = Black
likelihoodColor NotCalculated = Magenta
printMatches :: ColumnFormat -> TermResults -> [TermMatch] -> IO ()
printMatches cf r ms =
forM_ ms $ \m -> do
setSGR [SetColor Foreground Dull (likelihoodColor $ trRemovalLikelihood r)]
setSGR [SetColor Foreground Dull (termColor r)]
setSGR [SetConsoleIntensity NormalIntensity]
putStr $ " " ++ printTerm (tmTerm m)
setSGR [Reset]
setSGR [SetColor Foreground Vivid Cyan]
setSGR [SetConsoleIntensity NormalIntensity]
putStr $ " " ++ printNumber (trTotalFiles r) ++ ", " ++ printNumber (trTotalOccurrences r)
putStr $ " " ++ printNumber (totalFileCount r) ++ ", " ++ printNumber (totalOccurrenceCount r)
setSGR [Reset]
setSGR [SetColor Foreground Dull Cyan]
setSGR [SetConsoleIntensity FaintIntensity]
putStr $ " " ++ printPath (tmPath m)
setSGR [Reset]
putStr $ " " ++ removalReason r
putStr "\n"
where
printTerm = cfPrintTerm cf
printPath = cfPrintPath cf
printNumber = cfPrintNumber cf
termColor = likelihoodColor . rLikelihood . trRemoval
removalReason = rReason . trRemoval

View File

@ -4,7 +4,7 @@ module Unused.CLI.SearchResult.ColumnFormatter
) where
import Text.Printf
import Unused.Types (TermResults(..), TermMatch(..))
import Unused.Types (TermResults(..), TermMatch(..), totalFileCount, totalOccurrenceCount)
data ColumnFormat = ColumnFormat
{ cfPrintTerm :: String -> String
@ -37,8 +37,8 @@ numberFormat rs =
numberWidth = maximum [fileWidth, occurrenceWidth]
fileWidth = maximum $ fileLength =<< rs
occurrenceWidth = maximum $ occurrenceLength =<< rs
fileLength = return . numberLength . trTotalFiles
occurrenceLength = return . numberLength . trTotalOccurrences
fileLength = return . numberLength . totalFileCount
occurrenceLength = return . numberLength . totalOccurrenceCount
numberLength :: Int -> Int
numberLength i =

View File

@ -2,19 +2,31 @@ module Unused.LikelihoodCalculator
( calculateLikelihood
) where
import Unused.Types (TermResults, RemovalLikelihood(..), trRemovalLikelihood, trTotalOccurrences)
import Control.Monad (ap)
import Unused.Types
import Unused.ResponseFilter (railsSingleOkay, elixirSingleOkay)
calculateLikelihood :: TermResults -> TermResults
calculateLikelihood r =
r { trRemovalLikelihood = newLikelihood }
r { trRemoval = uncurry Removal newLikelihood }
where
baseScore = trTotalOccurrences r
railsScore = if railsSingleOkay r then 5 else 0
elixirScore = if elixirSingleOkay r then 5 else 0
totalScore = baseScore + railsScore + elixirScore
baseScore = totalOccurrenceCount r
totalScore = baseScore
newLikelihood
| totalScore < 3 = High
| totalScore < 6 = Medium
| totalScore < 9 = Low
| otherwise = Low
| railsSingleOkay r = (Low, "a class, module, or migration that often occurs in only one file")
| elixirSingleOkay r = (Low, "a class, module, or migration that often occurs in only one file")
| singleNonTestUsage r && testsExist r = (High, "only the definition and corresponding tests exist")
| doubleNonTestUsage r && testsExist r = (Medium, "only the definition and one other use, along with tests, exists")
| totalScore < 2 = (High, "used infrequently")
| totalScore < 6 = (Medium, "used semi-frequently")
| totalScore >= 6 = (Low, "used frequently")
| otherwise = (Unknown, "could not determine likelihood")
singleNonTestUsage :: TermResults -> Bool
singleNonTestUsage = (1 ==) . oOccurrences . trAppOccurrences
doubleNonTestUsage :: TermResults -> Bool
doubleNonTestUsage = (2 ==) . oOccurrences . trAppOccurrences
testsExist :: TermResults -> Bool
testsExist = (> 0) . oOccurrences . trTestOccurrences

View File

@ -23,7 +23,7 @@ withOneOccurrence :: ParseResponse -> ParseResponse
withOneOccurrence = applyFilter (const oneOccurence)
oneOccurence :: TermResults -> Bool
oneOccurence = (== 1) . trTotalOccurrences
oneOccurence = (== 1) . totalOccurrenceCount
withLikelihoods :: [RemovalLikelihood] -> ParseResponse -> ParseResponse
withLikelihoods [] = id
@ -37,10 +37,10 @@ ignoringPaths xs =
matchesPath p = any (`isInfixOf` p) xs
oneFile :: TermResults -> Bool
oneFile = (== 1) . trTotalFiles
oneFile = (== 1) . totalFileCount
includesLikelihood :: [RemovalLikelihood] -> TermResults -> Bool
includesLikelihood l = (`elem` l) . trRemovalLikelihood
includesLikelihood l = (`elem` l) . rLikelihood . trRemoval
isClassOrModule :: TermResults -> Bool
isClassOrModule = matchRegex "^[A-Z]" . trTerm

View File

@ -4,11 +4,16 @@ module Unused.Types
, TermMatchSet
, ParseResponse
, RemovalLikelihood(..)
, Removal(..)
, Occurrences(..)
, resultsFromMatches
, totalFileCount
, totalOccurrenceCount
) where
import Text.Parsec (ParseError)
import qualified Data.Map.Strict as Map
import Unused.Regex
data TermMatch = TermMatch
{ tmTerm :: String
@ -16,32 +21,83 @@ data TermMatch = TermMatch
, tmOccurrences :: Int
} deriving (Eq, Show)
data Occurrences = Occurrences
{ oFiles :: Int
, oOccurrences :: Int
} deriving (Eq, Show)
data TermResults = TermResults
{ trTerm :: String
, trMatches :: [TermMatch]
, trTotalFiles :: Int
, trTotalOccurrences :: Int
, trRemovalLikelihood :: RemovalLikelihood
, trTestOccurrences :: Occurrences
, trAppOccurrences :: Occurrences
, trTotalOccurrences :: Occurrences
, trRemoval :: Removal
} deriving (Eq, Show)
data RemovalLikelihood = High | Medium | Low | Unknown deriving (Eq, Show)
data Removal = Removal
{ rLikelihood :: RemovalLikelihood
, rReason :: String
} deriving (Eq, Show)
data RemovalLikelihood = High | Medium | Low | Unknown | NotCalculated deriving (Eq, Show)
type TermMatchSet = Map.Map String TermResults
type ParseResponse = Either ParseError TermMatchSet
totalFileCount :: TermResults -> Int
totalFileCount = oFiles . trTotalOccurrences
totalOccurrenceCount :: TermResults -> Int
totalOccurrenceCount = oOccurrences . trTotalOccurrences
resultsFromMatches :: [TermMatch] -> TermResults
resultsFromMatches m =
TermResults
{ trTerm = resultTerm terms
, trMatches = m
, trTotalFiles = totalFiles
, trTotalOccurrences = totalOccurrences
, trRemovalLikelihood = Unknown
, trAppOccurrences = appOccurrence
, trTestOccurrences = testOccurrence
, trTotalOccurrences = Occurrences (sum $ map oFiles [appOccurrence, testOccurrence]) (sum $ map oOccurrences [appOccurrence, testOccurrence])
, trRemoval = Removal NotCalculated "Likelihood not calculated"
}
where
totalFiles = length m
totalOccurrences = sum $ fmap tmOccurrences m
testOccurrence = testOccurrences m
appOccurrence = appOccurrences m
terms = map tmTerm m
resultTerm (x:_) = x
resultTerm _ = ""
appOccurrences :: [TermMatch] -> Occurrences
appOccurrences ms =
Occurrences appFiles appOccurrences'
where
totalFiles = length ms
totalOccurrences = sum $ map tmOccurrences ms
tests = testOccurrences ms
appFiles = totalFiles - oFiles tests
appOccurrences' = totalOccurrences - oOccurrences tests
testOccurrences :: [TermMatch] -> Occurrences
testOccurrences ms =
Occurrences totalFiles totalOccurrences
where
testMatches = filter termMatchIsTest ms
totalFiles = length testMatches
totalOccurrences = sum $ map tmOccurrences testMatches
testDir :: String -> Bool
testDir = matchRegex "(spec|tests?)\\/"
testSnakeCaseFilename :: String -> Bool
testSnakeCaseFilename = matchRegex ".*(_spec|_test)\\."
testCamelCaseFilename :: String -> Bool
testCamelCaseFilename = matchRegex ".*(Spec|Test)\\."
termMatchIsTest :: TermMatch -> Bool
termMatchIsTest m =
testDir path || testSnakeCaseFilename path || testCamelCaseFilename path
where
path = tmPath m

View File

@ -0,0 +1,55 @@
module Unused.LikelihoodCalculatorSpec
( main
, spec
) where
import Test.Hspec
import Unused.Types
import Unused.LikelihoodCalculator
main :: IO ()
main = hspec spec
spec :: Spec
spec = parallel $
describe "calculateLikelihood" $ do
it "prefers language-specific checks first" $ do
let railsMatches = [ TermMatch "ApplicationController" "app/controllers/application_controller.rb" 1 ]
removalLikelihood railsMatches `shouldBe` Low
let elixirMatches = [ TermMatch "AwesomeView" "web/views/awesome_view.ex" 1 ]
removalLikelihood elixirMatches `shouldBe` Low
it "weighs widely-used methods as low likelihood" $ do
let matches = [ TermMatch "full_name" "app/models/user.rb" 4
, TermMatch "full_name" "app/views/application/_auth_header.rb" 1
, TermMatch "full_name" "app/mailers/user_mailer.rb" 1
, TermMatch "full_name" "spec/models/user_spec.rb" 10
]
removalLikelihood matches `shouldBe` Low
it "weighs only-used-once methods as high likelihood" $ do
let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1 ]
removalLikelihood matches `shouldBe` High
it "weighs methods that seem to only be tested and never used as high likelihood" $ do
let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1
, TermMatch "obscure_method" "spec/models/user_spec.rb" 5
]
removalLikelihood matches `shouldBe` High
it "weighs methods that seem to only be tested and used in one other area as medium likelihood" $ do
let matches = [ TermMatch "obscure_method" "app/models/user.rb" 1
, TermMatch "obscure_method" "app/controllers/user_controller.rb" 1
, TermMatch "obscure_method" "spec/models/user_spec.rb" 5
, TermMatch "obscure_method" "spec/controllers/user_controller_spec.rb" 5
]
removalLikelihood matches `shouldBe` Medium
removalLikelihood :: [TermMatch] -> RemovalLikelihood
removalLikelihood =
rLikelihood . trRemoval . calculateLikelihood . resultsFromMatches

View File

@ -1,7 +1,7 @@
module Unused.ParserSpec where
import Test.Hspec
import Unused.Types (TermResults(..), TermMatch(..), RemovalLikelihood(..))
import Unused.Types
import Unused.Parser
import qualified Data.Map.Strict as Map
@ -21,10 +21,10 @@ spec = parallel $
, TermMatch "method_name" "app/path/other.rb" 5
, TermMatch "method_name" "spec/path/foo_spec.rb" 10
]
let r1Results = TermResults "method_name" r1Matches 3 16 Low
let r1Results = TermResults "method_name" r1Matches (Occurrences 1 10) (Occurrences 2 6) (Occurrences 3 16) (Removal Low "used frequently")
let r2Matches = [ TermMatch "other" "app/path/other.rb" 1 ]
let r2Results = TermResults "other" r2Matches 1 1 High
let r2Results = TermResults "other" r2Matches (Occurrences 0 0) (Occurrences 1 1) (Occurrences 1 1) (Removal High "used infrequently")
let (Right result) = parseLines input

View File

@ -15,4 +15,4 @@ spec = parallel $
]
resultsFromMatches matches `shouldBe`
TermResults "ApplicationController" matches 2 11 Unknown
TermResults "ApplicationController" matches (Occurrences 1 10) (Occurrences 1 1) (Occurrences 2 11) (Removal NotCalculated "Likelihood not calculated")

View File

@ -64,6 +64,7 @@ test-suite unused-test
other-modules: Unused.ParserSpec
, Unused.ResponseFilterSpec
, Unused.TypesSpec
, Unused.LikelihoodCalculatorSpec
ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall -Werror
default-language: Haskell2010