From 97f083fc2c231ec5927fed67bca3f704531e7333 Mon Sep 17 00:00:00 2001 From: Joshua Clayton Date: Sat, 14 May 2016 08:04:35 -0400 Subject: [PATCH] Use regex in ag for simple words Why? ==== ag supports using regular expressions for searches; however, the -Q flag, which was previously always used, resulted in literal search results. By searching literal matches, it would potentially return too many results. For example, with a `me` method in a controller, it'd match words like `awesome` or `method`. This introduces a check where, if the token being searched is only composed of word characters (`[A-Za-z0-9_]`), it'll switch over to use regular expressions with ag and surround the token with non-word matches on either end. The goal here is to reduce false-positives in matches. --- src/Unused/TermSearch.hs | 3 ++- src/Unused/TermSearch/Internal.hs | 15 +++++++++++++++ test/Unused/TermSearch/InternalSpec.hs | 22 ++++++++++++++++++++++ unused.cabal | 2 ++ 4 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 src/Unused/TermSearch/Internal.hs create mode 100644 test/Unused/TermSearch/InternalSpec.hs diff --git a/src/Unused/TermSearch.hs b/src/Unused/TermSearch.hs index 2e85119..a5a5b79 100644 --- a/src/Unused/TermSearch.hs +++ b/src/Unused/TermSearch.hs @@ -3,6 +3,7 @@ module Unused.TermSearch ) where import System.Process +import Unused.TermSearch.Internal (commandLineOptions) search :: String -> IO [String] search t = do @@ -19,5 +20,5 @@ linesMap f = ag :: String -> IO String ag t = do - (_, results, _) <- readProcessWithExitCode "ag" [t, ".", "-c", "-Q", "--ackmate"] "" + (_, results, _) <- readProcessWithExitCode "ag" (commandLineOptions t) "" return results diff --git a/src/Unused/TermSearch/Internal.hs b/src/Unused/TermSearch/Internal.hs new file mode 100644 index 0000000..60a5844 --- /dev/null +++ b/src/Unused/TermSearch/Internal.hs @@ -0,0 +1,15 @@ +module Unused.TermSearch.Internal + ( commandLineOptions + ) where + +import Unused.Regex + +commandLineOptions :: String -> [String] +commandLineOptions t = + case regexSafeTerm t of + True -> ["\\W" ++ t ++ "\\W", ".", "-c", "--ackmate"] + False -> [t, ".", "-c", "-Q", "--ackmate"] + +regexSafeTerm :: String -> Bool +regexSafeTerm = + matchRegex "^[[:word:]]+$" diff --git a/test/Unused/TermSearch/InternalSpec.hs b/test/Unused/TermSearch/InternalSpec.hs new file mode 100644 index 0000000..83ffb8a --- /dev/null +++ b/test/Unused/TermSearch/InternalSpec.hs @@ -0,0 +1,22 @@ +module Unused.TermSearch.InternalSpec + ( main + , spec + ) where + +import Test.Hspec +import Unused.TermSearch.Internal + +main :: IO () +main = hspec spec + +spec :: Spec +spec = parallel $ + describe "commandLineOptions" $ do + it "does not use regular expressions when the term contains non-word characters" $ do + commandLineOptions "can_do_things?" `shouldBe` ["can_do_things?", ".", "-c", "-Q", "--ackmate"] + commandLineOptions "no_way!" `shouldBe` ["no_way!", ".", "-c", "-Q", "--ackmate"] + commandLineOptions "[]=" `shouldBe` ["[]=", ".", "-c", "-Q", "--ackmate"] + commandLineOptions "window.globalOverride" `shouldBe` ["window.globalOverride", ".", "-c", "-Q", "--ackmate"] + + it "uses regular expression match with surrounding non-word matches for accuracy" $ + commandLineOptions "awesome_method" `shouldBe` ["\\Wawesome_method\\W", ".", "-c", "--ackmate"] diff --git a/unused.cabal b/unused.cabal index 02e33b5..64c6b85 100644 --- a/unused.cabal +++ b/unused.cabal @@ -16,6 +16,7 @@ cabal-version: >=1.10 library hs-source-dirs: src exposed-modules: Unused.TermSearch + , Unused.TermSearch.Internal , Unused.Parser , Unused.Parser.Internal , Unused.Types @@ -71,6 +72,7 @@ test-suite unused-test , Unused.TypesSpec , Unused.LikelihoodCalculatorSpec , Unused.Grouping.InternalSpec + , Unused.TermSearch.InternalSpec ghc-options: -threaded -rtsopts -with-rtsopts=-N -Wall -Werror default-language: Haskell2010