1
1
mirror of https://github.com/github/semantic.git synced 2025-01-03 21:16:12 +03:00

Inline mostSimilarMatching.

This commit is contained in:
Rob Rix 2017-10-24 13:00:18 -04:00
parent 8e6ec02a37
commit fb39252959

View File

@ -87,6 +87,14 @@ rws canCompare equivalent as bs
(as, bs) = (zip [0..] as', zip [0..] bs')
(kdMapA, kdMapB) = (toKdMap as, toKdMap bs)
-- Find the most similar term matching a predicate, if any.
--
-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which dont match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance.
--
-- cf §4.2 of RWS-Diff
mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo defaultM term . snd) candidates)
where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract term)))
data Options = Options
{ optionsLookaheadPlaces :: {-# UNPACK #-} !Int -- ^ How many places ahead should we look for similar terms?
}
@ -96,19 +104,6 @@ defaultOptions = Options
{ optionsLookaheadPlaces = 0
}
-- | Finds the most-similar term to the passed-in term, if any.
--
-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which dont match the predicate, and select the minimum of the remaining by (a constant-time approximation of) edit distance.
--
-- cf §4.2 of RWS-Diff
mostSimilarMatching :: (Foldable syntax, Functor syntax, GAlign syntax)
=> (Int -> Term syntax ann1 -> Bool) -- ^ A predicate selecting terms eligible for matching against.
-> KdMap.KdMap Double FeatureVector (Int, Term syntax ann1) -- ^ The k-d map to look up nearest neighbours within.
-> Term syntax (Record (FeatureVector ': fields2)) -- ^ The term to find the nearest neighbour to.
-> Maybe (Int, Term syntax ann1) -- ^ The most similar term matched by the predicate, if any.
mostSimilarMatching isEligible tree term = listToMaybe (sortOn (editDistanceUpTo defaultM term . snd) candidates)
where candidates = filter (uncurry isEligible) (snd <$> KdMap.kNearest tree defaultL (rhead (extract term)))
defaultD, defaultL, defaultM, defaultP, defaultQ :: Int
defaultD = 15
defaultL = 2