mirror of
https://github.com/github/semantic.git
synced 2024-12-21 05:41:54 +03:00
Rename constantTimeEditDistance to editDistanceUpTo.
This commit is contained in:
parent
627ae0552c
commit
2706ed262e
@ -3,7 +3,7 @@ module Data.RandomWalkSimilarity
|
||||
( rws
|
||||
, pqGramDecorator
|
||||
, featureVectorDecorator
|
||||
, constantTimeEditDistance
|
||||
, editDistanceUpTo
|
||||
, stripDiff
|
||||
, stripTerm
|
||||
, Gram(..)
|
||||
@ -61,7 +61,7 @@ rws compare as bs
|
||||
-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance.
|
||||
--
|
||||
-- cf §4.2 of RWS-Diff
|
||||
nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (constantTimeEditDistance m) . compare (term key) . term) (intersectBy ((==) `on` termIndex) unmapped (KdTree.kNearest tree l key)))
|
||||
nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (editDistanceUpTo m) . compare (term key) . term) (intersectBy ((==) `on` termIndex) unmapped (KdTree.kNearest tree l key)))
|
||||
|
||||
insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do
|
||||
put (previous, unmappedA, List.delete kv unmappedB)
|
||||
@ -75,8 +75,8 @@ rws compare as bs
|
||||
m = 10
|
||||
|
||||
-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost.
|
||||
constantTimeEditDistance :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int
|
||||
constantTimeEditDistance m = diffSum (patchSum termSize) . cutoff m
|
||||
editDistanceUpTo :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int
|
||||
editDistanceUpTo m = diffSum (patchSum termSize) . cutoff m
|
||||
where diffSum patchCost diff = sum $ fmap (maybe 0 patchCost) diff
|
||||
|
||||
-- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index.
|
||||
|
Loading…
Reference in New Issue
Block a user