mirror of
https://github.com/github/semantic.git
synced 2024-12-30 10:27:45 +03:00
Rename constantTimeEditDistance to editDistanceUpTo.
This commit is contained in:
parent
627ae0552c
commit
2706ed262e
@ -3,7 +3,7 @@ module Data.RandomWalkSimilarity
|
|||||||
( rws
|
( rws
|
||||||
, pqGramDecorator
|
, pqGramDecorator
|
||||||
, featureVectorDecorator
|
, featureVectorDecorator
|
||||||
, constantTimeEditDistance
|
, editDistanceUpTo
|
||||||
, stripDiff
|
, stripDiff
|
||||||
, stripTerm
|
, stripTerm
|
||||||
, Gram(..)
|
, Gram(..)
|
||||||
@ -61,7 +61,7 @@ rws compare as bs
|
|||||||
-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance.
|
-- RWS can produce false positives in the case of e.g. hash collisions. Therefore, we find the _l_ nearest candidates, filter out any which have already been mapped, and select the minimum of the remaining by (a constant-time approximation of) edit distance.
|
||||||
--
|
--
|
||||||
-- cf §4.2 of RWS-Diff
|
-- cf §4.2 of RWS-Diff
|
||||||
nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (constantTimeEditDistance m) . compare (term key) . term) (intersectBy ((==) `on` termIndex) unmapped (KdTree.kNearest tree l key)))
|
nearestUnmapped unmapped tree key = getFirst $ foldMap (First . Just) (sortOn (maybe maxBound (editDistanceUpTo m) . compare (term key) . term) (intersectBy ((==) `on` termIndex) unmapped (KdTree.kNearest tree l key)))
|
||||||
|
|
||||||
insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do
|
insertion previous unmappedA unmappedB kv@(UnmappedTerm _ _ b) = do
|
||||||
put (previous, unmappedA, List.delete kv unmappedB)
|
put (previous, unmappedA, List.delete kv unmappedB)
|
||||||
@ -75,8 +75,8 @@ rws compare as bs
|
|||||||
m = 10
|
m = 10
|
||||||
|
|
||||||
-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost.
|
-- | Computes a constant-time approximation to the edit distance of a diff. This is done by comparing at most _m_ nodes, & assuming the rest are zero-cost.
|
||||||
constantTimeEditDistance :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int
|
editDistanceUpTo :: (Prologue.Foldable f, Functor f) => Integer -> Free (CofreeF f (Both a)) (Patch (Cofree f a)) -> Int
|
||||||
constantTimeEditDistance m = diffSum (patchSum termSize) . cutoff m
|
editDistanceUpTo m = diffSum (patchSum termSize) . cutoff m
|
||||||
where diffSum patchCost diff = sum $ fmap (maybe 0 patchCost) diff
|
where diffSum patchCost diff = sum $ fmap (maybe 0 patchCost) diff
|
||||||
|
|
||||||
-- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index.
|
-- | A term which has not yet been mapped by `rws`, along with its feature vector summary & index.
|
||||||
|
Loading…
Reference in New Issue
Block a user