mirror of
https://github.com/runarorama/fuzzyfind.git
synced 2024-10-26 07:22:31 +03:00
Fix bug with reversed alignments. Add test.
This commit is contained in:
parent
b606ee5fc6
commit
03d2bb276b
@ -41,3 +41,16 @@ executable bench
|
|||||||
criterion ==1.5.*,
|
criterion ==1.5.*,
|
||||||
deepseq ==1.4.*,
|
deepseq ==1.4.*,
|
||||||
fuzzyfind
|
fuzzyfind
|
||||||
|
|
||||||
|
test-suite spec
|
||||||
|
type: exitcode-stdio-1.0
|
||||||
|
default-language: Haskell2010
|
||||||
|
hs-source-dirs: tests
|
||||||
|
ghc-options: -Wall
|
||||||
|
main-is: Spec.hs
|
||||||
|
other-modules: FuzzyFindSpec
|
||||||
|
build-depends: base == 4.*
|
||||||
|
, hspec == 2.7.*
|
||||||
|
, containers == 0.6.*
|
||||||
|
, QuickCheck == 2.*
|
||||||
|
, fuzzyfind
|
||||||
|
@ -36,6 +36,10 @@ import Data.Char (isAlphaNum, isLower, isUpper, toLower)
|
|||||||
import Data.Foldable (maximumBy, toList, foldl')
|
import Data.Foldable (maximumBy, toList, foldl')
|
||||||
import Data.Function (on)
|
import Data.Function (on)
|
||||||
import Data.Maybe (fromMaybe)
|
import Data.Maybe (fromMaybe)
|
||||||
|
import GHC.Generics (Generic)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
import Control.Monad.ST (runST)
|
||||||
import Data.Sequence
|
import Data.Sequence
|
||||||
( Seq (..),
|
( Seq (..),
|
||||||
ViewL (..),
|
ViewL (..),
|
||||||
@ -45,10 +49,7 @@ import Data.Sequence
|
|||||||
(<|)
|
(<|)
|
||||||
)
|
)
|
||||||
import qualified Data.Sequence as Seq
|
import qualified Data.Sequence as Seq
|
||||||
import GHC.Generics (Generic)
|
|
||||||
import Data.Text (Text)
|
|
||||||
import qualified Data.Text as Text
|
|
||||||
import Control.Monad.ST (runST)
|
|
||||||
|
|
||||||
-- | @bestMatch query string@ will return 'Nothing' if @query@ is not a
|
-- | @bestMatch query string@ will return 'Nothing' if @query@ is not a
|
||||||
-- subsequence of @string@. Otherwise, it will return the "best" way to line up
|
-- subsequence of @string@. Otherwise, it will return the "best" way to line up
|
||||||
@ -182,19 +183,21 @@ defaultGapPenalty = 3
|
|||||||
defaultConsecutiveBonus :: Int
|
defaultConsecutiveBonus :: Int
|
||||||
defaultConsecutiveBonus = 11
|
defaultConsecutiveBonus = 11
|
||||||
|
|
||||||
|
segmentToString :: ResultSegment -> String
|
||||||
|
segmentToString (Gap xs) = xs
|
||||||
|
segmentToString (Match xs) = xs
|
||||||
|
|
||||||
-- | Renders an 'Alignment' as a pair of lines with "*" on the lower line
|
-- | Renders an 'Alignment' as a pair of lines with "*" on the lower line
|
||||||
-- indicating the location of pattern matches.
|
-- indicating the location of pattern matches.
|
||||||
-- highlight' :: Alignment -> Text
|
highlight :: Alignment -> String
|
||||||
-- highlight' (Alignment s (Result segments)) =
|
highlight (Alignment s (Result segments)) =
|
||||||
-- foldMap prettySegment segments <> "\n" <> foldMap showGaps segments
|
foldMap segmentToString segments <> "\n" <> foldMap showGaps segments
|
||||||
-- where
|
where
|
||||||
-- prettySegment (Gap xs) = xs
|
showGaps (Gap xs) = replicate (length xs) ' '
|
||||||
-- prettySegment (Match xs) = xs
|
showGaps (Match xs) = replicate (length xs) '*'
|
||||||
-- showGaps (Gap xs) = Text.pack $ replicate (Text.length xs) ' '
|
|
||||||
-- showGaps (Match xs) = Text.pack $ replicate (Text.length xs) '*'
|
|
||||||
|
|
||||||
-- highlight :: Alignment -> String
|
highlight' :: Alignment -> Text
|
||||||
-- highlight = Text.unpack . highlight'
|
highlight' = Text.pack . highlight
|
||||||
|
|
||||||
-- | A highly configurable version of 'bestMatch'.
|
-- | A highly configurable version of 'bestMatch'.
|
||||||
bestMatch'
|
bestMatch'
|
||||||
@ -211,7 +214,7 @@ bestMatch'
|
|||||||
-> String -- ^ The input string.
|
-> String -- ^ The input string.
|
||||||
-> Maybe Alignment
|
-> Maybe Alignment
|
||||||
bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firstCharBonusMultiplier consecutiveBonus query str
|
bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firstCharBonusMultiplier consecutiveBonus query str
|
||||||
= Alignment (totalScore m nx) . Result <$> traceback
|
= Alignment (totalScore m nx) . (Result . Seq.fromList) <$> traceback
|
||||||
where
|
where
|
||||||
totalScore i j =
|
totalScore i j =
|
||||||
if i > m then 0 else (A.index' hs (i :. j)) + (A.index' bonuses (i :. j))
|
if i > m then 0 else (A.index' hs (i :. j)) + (A.index' bonuses (i :. j))
|
||||||
@ -223,23 +226,23 @@ bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firs
|
|||||||
-- ]
|
-- ]
|
||||||
similarity a b =
|
similarity a b =
|
||||||
if a == b || a == toLower b then matchScore else mismatchScore
|
if a == b || a == toLower b then matchScore else mismatchScore
|
||||||
traceback :: Maybe (Seq ResultSegment)
|
traceback :: Maybe [ResultSegment]
|
||||||
traceback = (<> gaps (drop nx str)) <$> go [] [] (-1) m nx
|
traceback = go [Gap $ drop nx str] [] (-1) m nx
|
||||||
go r m currOp 0 j = (gaps (take j str) <>) <$> case m of
|
go r m currOp 0 j = (Gap (take j str) :) <$> case m of
|
||||||
[] -> Just r
|
[] -> Just r
|
||||||
_ -> case currOp of
|
_ -> case currOp of
|
||||||
1 -> Just (r :|> Match (reverse m))
|
1 -> Just (Match m : r)
|
||||||
0 -> Just (r :|> Gap (reverse m))
|
0 -> Just (Gap m : r)
|
||||||
-1 -> Nothing
|
-1 -> Nothing
|
||||||
go _ _ _ _ 0 = Nothing
|
go _ _ _ _ 0 = Nothing
|
||||||
go r m currOp i j =
|
go r m currOp i j =
|
||||||
if similarity (A.index' query' (i - 1)) (A.index' str' (j - 1)) > 0
|
if similarity (A.index' query' (i - 1)) (A.index' str' (j - 1)) > 0
|
||||||
then case currOp of
|
then case currOp of
|
||||||
0 ->
|
0 ->
|
||||||
go (r :|> Gap (reverse m)) [A.index' str' (j - 1)] 1 (i - 1) (j - 1)
|
go (Gap m : r) [A.index' str' (j - 1)] 1 (i - 1) (j - 1)
|
||||||
_ -> go r (A.index' str' (j - 1) : m) 1 (i - 1) (j - 1)
|
_ -> go r (A.index' str' (j - 1) : m) 1 (i - 1) (j - 1)
|
||||||
else case currOp of
|
else case currOp of
|
||||||
1 -> go (r :|> Match (reverse m)) [A.index' str' (j - 1)] 0 i (j - 1)
|
1 -> go (Match m : r) [A.index' str' (j - 1)] 0 i (j - 1)
|
||||||
_ -> go r (A.index' str' (j - 1) : m) 0 i (j - 1)
|
_ -> go r (A.index' str' (j - 1) : m) 0 i (j - 1)
|
||||||
nx = localMax m n 1 0 0
|
nx = localMax m n 1 0 0
|
||||||
localMax m n j r s = if j > n
|
localMax m n j r s = if j > n
|
||||||
@ -311,8 +314,8 @@ bestMatch' matchScore mismatchScore gapPenalty boundaryBonus camelCaseBonus firs
|
|||||||
in
|
in
|
||||||
if similar && (afterMatch || beforeMatch) then consecutiveBonus else 0
|
if similar && (afterMatch || beforeMatch) then consecutiveBonus else 0
|
||||||
|
|
||||||
gaps :: String -> Seq ResultSegment
|
-- gaps :: String -> Seq ResultSegment
|
||||||
gaps s = [Gap s]
|
-- gaps s = [Gap s]
|
||||||
|
|
||||||
data ResultSegment = Gap !String | Match !String
|
data ResultSegment = Gap !String | Match !String
|
||||||
deriving (Eq, Ord, Show, Generic)
|
deriving (Eq, Ord, Show, Generic)
|
||||||
|
16
tests/FuzzyFindSpec.hs
Normal file
16
tests/FuzzyFindSpec.hs
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
module FuzzyFindSpec (spec) where
|
||||||
|
|
||||||
|
import Test.Hspec
|
||||||
|
import Test.Hspec.QuickCheck
|
||||||
|
import Text.FuzzyFind
|
||||||
|
import Data.Sequence()
|
||||||
|
|
||||||
|
spec :: Spec
|
||||||
|
spec = do
|
||||||
|
describe "FuzzyFind" $ do
|
||||||
|
modifyMaxSuccess (const 1000)
|
||||||
|
$ prop "Alignment contains original"
|
||||||
|
$ \q d -> case bestMatch q d of
|
||||||
|
Nothing -> True
|
||||||
|
Just (Alignment _ (Result r)) -> foldMap segmentToString r == d
|
||||||
|
|
2
tests/Spec.hs
Normal file
2
tests/Spec.hs
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
{-# OPTIONS_GHC -F -pgmF hspec-discover #-}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user