Fix bug with reversed alignments. Add test.

This commit is contained in:
Rúnar 2021-04-13 21:50:23 -04:00
parent b606ee5fc6
commit 03d2bb276b
4 changed files with 58 additions and 24 deletions

View File

@ -41,3 +41,16 @@ executable bench
criterion ==1.5.*, criterion ==1.5.*,
deepseq ==1.4.*, deepseq ==1.4.*,
fuzzyfind fuzzyfind
test-suite spec
type: exitcode-stdio-1.0
default-language: Haskell2010
hs-source-dirs: tests
ghc-options: -Wall
main-is: Spec.hs
other-modules: FuzzyFindSpec
build-depends: base == 4.*
, hspec == 2.7.*
, containers == 0.6.*
, QuickCheck == 2.*
, fuzzyfind

View File

@ -36,6 +36,10 @@ import Data.Char (isAlphaNum, isLower, isUpper, toLower)
import Data.Foldable (maximumBy, toList, foldl') import Data.Foldable (maximumBy, toList, foldl')
import Data.Function (on) import Data.Function (on)
import Data.Maybe (fromMaybe) import Data.Maybe (fromMaybe)
import GHC.Generics (Generic)
import Data.Text (Text)
import qualified Data.Text as Text
import Control.Monad.ST (runST)
import Data.Sequence import Data.Sequence
( Seq (..), ( Seq (..),
ViewL (..), ViewL (..),
@ -45,10 +49,7 @@ import Data.Sequence
(<|) (<|)
) )
import qualified Data.Sequence as Seq import qualified Data.Sequence as Seq
import GHC.Generics (Generic)
import Data.Text (Text)
import qualified Data.Text as Text
import Control.Monad.ST (runST)
-- | @bestMatch query string@ will return 'Nothing' if @query@ is not a -- | @bestMatch query string@ will return 'Nothing' if @query@ is not a
-- subsequence of @string@. Otherwise, it will return the "best" way to line up -- subsequence of @string@. Otherwise, it will return the "best" way to line up
@ -182,19 +183,21 @@ defaultGapPenalty = 3
defaultConsecutiveBonus :: Int defaultConsecutiveBonus :: Int
defaultConsecutiveBonus = 11 defaultConsecutiveBonus = 11
segmentToString :: ResultSegment -> String
segmentToString (Gap xs) = xs
segmentToString (Match xs) = xs
-- | Renders an 'Alignment' as a pair of lines with "*" on the lower line -- | Renders an 'Alignment' as a pair of lines with "*" on the lower line
-- indicating the location of pattern matches. -- indicating the location of pattern matches.
-- highlight' :: Alignment -> Text highlight :: Alignment -> String
-- highlight' (Alignment s (Result segments)) = highlight (Alignment s (Result segments)) =
-- foldMap prettySegment segments <> "\n" <> foldMap showGaps segments foldMap segmentToString segments <> "\n" <> foldMap showGaps segments
-- where where
-- prettySegment (Gap xs) = xs showGaps (Gap xs) = replicate (length xs) ' '
-- prettySegment (Match xs) = xs showGaps (Match xs) = replicate (length xs) '*'
-- showGaps (Gap xs) = Text.pack $ replicate (Text.length xs) ' '
-- showGaps (Match xs) = Text.pack $ replicate (Text.length xs) '*'
-- highlight :: Alignment -> String highlight' :: Alignment -> Text
-- highlight = Text.unpack . highlight' highlight' = Text.pack . highlight
-- | A highly configurable version of 'bestMatch'. -- | A highly configurable version of 'bestMatch'.
bestMatch' bestMatch'
@ -211,7 +214,7 @@ bestMatch'
-> String -- ^ The input string. -> String -- ^ The input string.
-> Maybe Alignment -> Maybe Alignment
bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firstCharBonusMultiplier consecutiveBonus query str bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firstCharBonusMultiplier consecutiveBonus query str
= Alignment (totalScore m nx) . Result <$> traceback = Alignment (totalScore m nx) . (Result . Seq.fromList) <$> traceback
where where
totalScore i j = totalScore i j =
if i > m then 0 else (A.index' hs (i :. j)) + (A.index' bonuses (i :. j)) if i > m then 0 else (A.index' hs (i :. j)) + (A.index' bonuses (i :. j))
@ -223,23 +226,23 @@ bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firs
-- ] -- ]
similarity a b = similarity a b =
if a == b || a == toLower b then matchScore else mismatchScore if a == b || a == toLower b then matchScore else mismatchScore
traceback :: Maybe (Seq ResultSegment) traceback :: Maybe [ResultSegment]
traceback = (<> gaps (drop nx str)) <$> go [] [] (-1) m nx traceback = go [Gap $ drop nx str] [] (-1) m nx
go r m currOp 0 j = (gaps (take j str) <>) <$> case m of go r m currOp 0 j = (Gap (take j str) :) <$> case m of
[] -> Just r [] -> Just r
_ -> case currOp of _ -> case currOp of
1 -> Just (r :|> Match (reverse m)) 1 -> Just (Match m : r)
0 -> Just (r :|> Gap (reverse m)) 0 -> Just (Gap m : r)
-1 -> Nothing -1 -> Nothing
go _ _ _ _ 0 = Nothing go _ _ _ _ 0 = Nothing
go r m currOp i j = go r m currOp i j =
if similarity (A.index' query' (i - 1)) (A.index' str' (j - 1)) > 0 if similarity (A.index' query' (i - 1)) (A.index' str' (j - 1)) > 0
then case currOp of then case currOp of
0 -> 0 ->
go (r :|> Gap (reverse m)) [A.index' str' (j - 1)] 1 (i - 1) (j - 1) go (Gap m : r) [A.index' str' (j - 1)] 1 (i - 1) (j - 1)
_ -> go r (A.index' str' (j - 1) : m) 1 (i - 1) (j - 1) _ -> go r (A.index' str' (j - 1) : m) 1 (i - 1) (j - 1)
else case currOp of else case currOp of
1 -> go (r :|> Match (reverse m)) [A.index' str' (j - 1)] 0 i (j - 1) 1 -> go (Match m : r) [A.index' str' (j - 1)] 0 i (j - 1)
_ -> go r (A.index' str' (j - 1) : m) 0 i (j - 1) _ -> go r (A.index' str' (j - 1) : m) 0 i (j - 1)
nx = localMax m n 1 0 0 nx = localMax m n 1 0 0
localMax m n j r s = if j > n localMax m n j r s = if j > n
@ -311,8 +314,8 @@ bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firs
in in
if similar && (afterMatch || beforeMatch) then consecutiveBonus else 0 if similar && (afterMatch || beforeMatch) then consecutiveBonus else 0
gaps :: String -> Seq ResultSegment -- gaps :: String -> Seq ResultSegment
gaps s = [Gap s] -- gaps s = [Gap s]
data ResultSegment = Gap !String | Match !String data ResultSegment = Gap !String | Match !String
deriving (Eq, Ord, Show, Generic) deriving (Eq, Ord, Show, Generic)

16
tests/FuzzyFindSpec.hs Normal file
View File

@ -0,0 +1,16 @@
module FuzzyFindSpec (spec) where
import Test.Hspec
import Test.Hspec.QuickCheck
import Text.FuzzyFind
import Data.Sequence()
spec :: Spec
spec = do
describe "FuzzyFind" $ do
modifyMaxSuccess (const 1000)
$ prop "Alignment contains original"
$ \q d -> case bestMatch q d of
Nothing -> True
Just (Alignment _ (Result r)) -> foldMap segmentToString r == d

2
tests/Spec.hs Normal file
View File

@ -0,0 +1,2 @@
{-# OPTIONS_GHC -F -pgmF hspec-discover #-}