1
1
mirror of https://github.com/github/semantic.git synced 2025-01-02 04:10:29 +03:00

Rename constantTimeEditDistance to editDistanceUpTo.

This commit is contained in:
Rob Rix 2016-08-18 13:44:27 -04:00
parent 627ae0552c
commit 2706ed262e

View File

@ -3,7 +3,7 @@ module Data.RandomWalkSimilarity
( rws ( rws
, pqGramDecorator , pqGramDecorator
, featureVectorDecorator , featureVectorDecorator
, constantTimeEditDistance , editDistanceUpTo
, stripDiff , stripDiff
, stripTerm , stripTerm
, Gram(..) , Gram(..)
@ -61,7 +61,7 @@ rws compare as bs
-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance. -- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance.
-- --
-- cf §4.2 of RWS-Diff -- cf §4.2 of RWS-Diff
nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (constantTimeEditDistance m) . compare (term key) . term) (intersectBy ((==) `on` termIndex) unmapped (KdTree.kNearest tree l key))) nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (editDistanceUpTo m) . compare (term key) . term) (intersectBy ((==) `on` termIndex) unmapped (KdTree.kNearest tree l key)))
insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do
put (previous, unmappedA, List.delete kv unmappedB) put (previous, unmappedA, List.delete kv unmappedB)
@ -75,8 +75,8 @@ rws compare as bs
m = 10 m = 10
-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost. -- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost.
constantTimeEditDistance :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int editDistanceUpTo :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int
constantTimeEditDistance m = diffSum (patchSum termSize) . cutoff m editDistanceUpTo m = diffSum (patchSum termSize) . cutoff m
where diffSum patchCost diff = sum $ fmap (maybe 0 patchCost) diff where diffSum patchCost diff = sum $ fmap (maybe 0 patchCost) diff
-- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index. -- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index.