1
1
mirror of https://github.com/github/semantic.git synced 2025-01-09 00:56:32 +03:00

🔥 the old rws code path.

This commit is contained in:
Rob Rix 2017-10-23 23:00:45 -04:00
parent d77aed7c2b
commit 171decf71f

View File

@ -25,7 +25,6 @@ import Data.Function ((&))
import Data.Functor.Classes import Data.Functor.Classes
import Data.Functor.Foldable import Data.Functor.Foldable
import Data.Hashable import Data.Hashable
import qualified Data.IntMap as IntMap
import Data.KdMap.Static hiding (elems, empty, inRange, null) import Data.KdMap.Static hiding (elems, empty, inRange, null)
import Data.List (sortOn) import Data.List (sortOn)
import Data.Maybe import Data.Maybe
@ -54,9 +53,6 @@ data UnmappedTerm syntax ann = UnmappedTerm
, term :: Term syntax ann -- ^ The unmapped term , term :: Term syntax ann -- ^ The unmapped term
} }
-- | Either a `term`, an index of a matched term, or nil.
data TermOrIndexOrNone term = Term term | Index {-# UNPACK #-} !Int | None
rws :: (Foldable syntax, Functor syntax, GAlign syntax) rws :: (Foldable syntax, Functor syntax, GAlign syntax)
=> ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
-> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool) -> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool)
@ -70,17 +66,6 @@ rws canCompare equivalent as bs
= ses equivalent as bs = ses equivalent as bs
& mapContiguous canCompare & mapContiguous canCompare
& fmap (bimap snd snd) & fmap (bimap snd snd)
rws canCompare equivalent as bs
= ses equivalent as bs
& genFeaturizedTermsAndDiffs
& \ (featureAs, featureBs, mappedDiffs, allDiffs) ->
findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs
& uncurry deleteRemaining
& insertMapped mappedDiffs
& fmap (bimap snd snd)
-- | An IntMap of unmapped terms keyed by their position in a list of terms.
type UnmappedTerms syntax ann = IntMap.IntMap (UnmappedTerm syntax ann)
type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2) type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2)
@ -89,98 +74,13 @@ type MappedDiff syntax ann1 ann2 = These (Int, Term syntax ann1) (Int, Term synt
type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2] type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2]
insertMapped :: Foldable t
=> t (MappedDiff syntax ann1 ann2)
-> [MappedDiff syntax ann1 ann2]
-> [MappedDiff syntax ann1 ann2]
insertMapped diffs into = foldl' (flip insertDiff) into diffs
deleteRemaining :: Traversable t
=> [MappedDiff syntax ann1 ann2]
-> t (UnmappedTerm syntax ann1)
-> [MappedDiff syntax ann1 ann2]
deleteRemaining diffs remaining = insertMapped (This . (termIndex &&& term) <$> remaining) diffs
-- | Inserts an index and diff pair into a list of indices and diffs.
insertDiff :: MappedDiff syntax ann1 ann2
-> [MappedDiff syntax ann1 ann2]
-> [MappedDiff syntax ann1 ann2]
insertDiff inserted [] = [ inserted ]
insertDiff a (b:rest) = case (bimap fst fst a, bimap fst fst b) of
(These i1 i2, These j1 j2) -> if i1 <= j1 && i2 <= j2 then a : b : rest else b : insertDiff a rest
(This i, This j) -> if i <= j then a : b : rest else b : insertDiff a rest
(That i, That j) -> if i <= j then a : b : rest else b : insertDiff a rest
(This i, These j _) -> if i <= j then a : b : rest else b : insertDiff a rest
(That i, These _ j) -> if i <= j then a : b : rest else b : insertDiff a rest
(This _, That _) -> b : insertDiff a rest
(That _, This _) -> b : insertDiff a rest
(These i1 i2, _) -> case break isThese rest of
(rest, tail) -> let (before, after) = foldr' (combine i1 i2) ([], []) (b : rest) in
case after of
[] -> before <> insertDiff a tail
_ -> before <> (a : after) <> tail
where
combine i1 i2 each (before, after) = case bimap fst fst each of
This j1 -> if i1 <= j1 then (before, each : after) else (each : before, after)
That j2 -> if i2 <= j2 then (before, each : after) else (each : before, after)
These _ _ -> (before, after)
findNearestNeighboursToDiff :: (Foldable syntax, Functor syntax, GAlign syntax)
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
-> [TermOrIndexOrNone (UnmappedTerm syntax ann2)]
-> [UnmappedTerm syntax ann1]
-> [UnmappedTerm syntax ann2]
-> ([MappedDiff syntax ann1 ann2], UnmappedTerms syntax ann1)
findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs = (diffs, remaining)
where
(diffs, (_, remaining, _)) =
traverse (findNearestNeighbourToDiff' canCompare (toKdMap featureAs) (toKdMap featureBs)) allDiffs &
fmap catMaybes &
(`runState` (pred (maybe 0 termIndex (listToMaybe featureAs)), toMap featureAs, toMap featureBs))
findNearestNeighbourToDiff' :: (Foldable syntax, Functor syntax, GAlign syntax)
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
-> KdMap Double FeatureVector (UnmappedTerm syntax ann1)
-> KdMap Double FeatureVector (UnmappedTerm syntax ann2)
-> TermOrIndexOrNone (UnmappedTerm syntax ann2)
-> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2)
(Maybe (MappedDiff syntax ann1 ann2))
findNearestNeighbourToDiff' canCompare kdTreeA kdTreeB termThing = case termThing of
None -> pure Nothing
RWS.Term term -> Just <$> findNearestNeighbourTo canCompare kdTreeA kdTreeB term
Index i -> modify' (\ (_, unA, unB) -> (i, unA, unB)) >> pure Nothing
-- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches. -- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches.
findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax) findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax)
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared. => ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
-> KdMap Double FeatureVector (UnmappedTerm syntax ann1) -> [UnmappedTerm syntax ann1]
-> KdMap Double FeatureVector (UnmappedTerm syntax ann2) -> [UnmappedTerm syntax ann2]
-> UnmappedTerm syntax ann2 -> [MappedDiff syntax ann1 ann2]
-> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2) findNearestNeighbourTo canCompare as bs = go (pred (termIndex (head as))) as bs
(MappedDiff syntax ann1 ann2)
findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do
(previous, unmappedA, unmappedB) <- get
fromMaybe (insertion previous unmappedA unmappedB term) $ do
guard (not (null unmappedA))
let (minA, _) = IntMap.findMin unmappedA
-- Look up the nearest unmapped term in `unmappedA`.
foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= minA) kdTreeA term
-- Look up the nearest `foundA` in `unmappedB`
UnmappedTerm j' _ _ <- nearestUnmapped (\ b -> isNearAndComparableTo (flip canCompare) (pred j) a b && termIndex b >= j) kdTreeB foundA
-- Return Nothing if their indices don't match
guard (j == j')
pure $! do
put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB)
pure (These (i, a) (j, b))
findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax)
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
-> [UnmappedTerm syntax ann1]
-> [UnmappedTerm syntax ann2]
-> [MappedDiff syntax ann1 ann2]
findNearestNeighbourTo' canCompare as bs = go (pred (termIndex (head as))) as bs
where go _ as [] = This . (termIndex &&& term) <$> as where go _ as [] = This . (termIndex &&& term) <$> as
go _ [] bs = That . (termIndex &&& term) <$> bs go _ [] bs = That . (termIndex &&& term) <$> bs
go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) = go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) =
@ -221,18 +121,6 @@ defaultQ = 3
defaultMoveBound = 1 defaultMoveBound = 1
-- Returns a state (insertion index, old unmapped terms, new unmapped terms), and value of (index, inserted diff),
-- given a previous index, two sets of umapped terms, and an unmapped term to insert.
insertion :: Int
-> UnmappedTerms syntax ann1
-> UnmappedTerms syntax ann2
-> UnmappedTerm syntax ann2
-> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2)
(MappedDiff syntax ann1 ann2)
insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do
put (previous, unmappedA, IntMap.delete j unmappedB)
pure (That (j, b))
mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax) mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax)
=> ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) => ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
-> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2)) -> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
@ -245,31 +133,9 @@ mapContiguous canCompare = go 0 0 [] []
These a b -> mapChunk as bs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest) These a b -> mapChunk as bs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest)
mapChunk as [] = This . (termIndex &&& term) <$> reverse as mapChunk as [] = This . (termIndex &&& term) <$> reverse as
mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs
mapChunk as bs = findNearestNeighbourTo' canCompare (reverse as) (reverse bs) mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs)
genFeaturizedTermsAndDiffs :: Functor syntax
=> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
-> ( [UnmappedTerm syntax (Record (FeatureVector ': fields1))]
, [UnmappedTerm syntax (Record (FeatureVector ': fields2))]
, [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))]
, [TermOrIndexOrNone (UnmappedTerm syntax (Record (FeatureVector ': fields2)))]
)
genFeaturizedTermsAndDiffs sesDiffs = let Mapping _ _ a b c d = foldl' combine (Mapping 0 0 [] [] [] []) sesDiffs in (reverse a, reverse b, reverse c, reverse d)
where combine (Mapping counterA counterB as bs mappedDiffs allDiffs) diff = case diff of
This term -> Mapping (succ counterA) counterB (featurize counterA term : as) bs mappedDiffs (None : allDiffs)
That term -> Mapping counterA (succ counterB) as (featurize counterB term : bs) mappedDiffs (RWS.Term (featurize counterB term) : allDiffs)
These a b -> Mapping (succ counterA) (succ counterB) as bs ((These (counterA, a) (counterB, b)) : mappedDiffs) (Index counterA : allDiffs)
data Mapping syntax ann1 ann2
= Mapping
{-# UNPACK #-} !Int
{-# UNPACK #-} !Int
![UnmappedTerm syntax ann1]
![UnmappedTerm syntax ann2]
![MappedDiff syntax ann1 ann2]
![TermOrIndexOrNone (UnmappedTerm syntax ann2)]
featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields)) featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields))
featurize index term = UnmappedTerm index (getField (extract term)) (eraseFeatureVector term) featurize index term = UnmappedTerm index (getField (extract term)) (eraseFeatureVector term)
@ -282,8 +148,6 @@ nullFeatureVector = FV $ listArray (0, 0) [0]
setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record (FeatureVector ': fields) setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record (FeatureVector ': fields)
setFeatureVector = setField setFeatureVector = setField
toMap :: [UnmappedTerm syntax ann] -> IntMap.IntMap (UnmappedTerm syntax ann)
toMap = IntMap.fromList . fmap (termIndex &&& id)
toKdMap :: [UnmappedTerm syntax ann] -> KdMap Double FeatureVector (UnmappedTerm syntax ann) toKdMap :: [UnmappedTerm syntax ann] -> KdMap Double FeatureVector (UnmappedTerm syntax ann)
toKdMap = build (elems . unFV) . fmap (feature &&& id) toKdMap = build (elems . unFV) . fmap (feature &&& id)