mirror of
https://github.com/github/semantic.git
synced 2025-01-09 00:56:32 +03:00
🔥 the old rws code path.
This commit is contained in:
parent
d77aed7c2b
commit
171decf71f
146
src/RWS.hs
146
src/RWS.hs
@ -25,7 +25,6 @@ import Data.Function ((&))
|
|||||||
import Data.Functor.Classes
|
import Data.Functor.Classes
|
||||||
import Data.Functor.Foldable
|
import Data.Functor.Foldable
|
||||||
import Data.Hashable
|
import Data.Hashable
|
||||||
import qualified Data.IntMap as IntMap
|
|
||||||
import Data.KdMap.Static hiding (elems, empty, inRange, null)
|
import Data.KdMap.Static hiding (elems, empty, inRange, null)
|
||||||
import Data.List (sortOn)
|
import Data.List (sortOn)
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
@ -54,9 +53,6 @@ data UnmappedTerm syntax ann = UnmappedTerm
|
|||||||
, term :: Term syntax ann -- ^ The unmapped term
|
, term :: Term syntax ann -- ^ The unmapped term
|
||||||
}
|
}
|
||||||
|
|
||||||
-- | Either a `term`, an index of a matched term, or nil.
|
|
||||||
data TermOrIndexOrNone term = Term term | Index {-# UNPACK #-} !Int | None
|
|
||||||
|
|
||||||
rws :: (Foldable syntax, Functor syntax, GAlign syntax)
|
rws :: (Foldable syntax, Functor syntax, GAlign syntax)
|
||||||
=> ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
|
=> ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
|
||||||
-> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool)
|
-> (Term syntax (Record (FeatureVector ': fields1)) -> Term syntax (Record (FeatureVector ': fields2)) -> Bool)
|
||||||
@ -70,17 +66,6 @@ rws canCompare equivalent as bs
|
|||||||
= ses equivalent as bs
|
= ses equivalent as bs
|
||||||
& mapContiguous canCompare
|
& mapContiguous canCompare
|
||||||
& fmap (bimap snd snd)
|
& fmap (bimap snd snd)
|
||||||
rws canCompare equivalent as bs
|
|
||||||
= ses equivalent as bs
|
|
||||||
& genFeaturizedTermsAndDiffs
|
|
||||||
& \ (featureAs, featureBs, mappedDiffs, allDiffs) ->
|
|
||||||
findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs
|
|
||||||
& uncurry deleteRemaining
|
|
||||||
& insertMapped mappedDiffs
|
|
||||||
& fmap (bimap snd snd)
|
|
||||||
|
|
||||||
-- | An IntMap of unmapped terms keyed by their position in a list of terms.
|
|
||||||
type UnmappedTerms syntax ann = IntMap.IntMap (UnmappedTerm syntax ann)
|
|
||||||
|
|
||||||
type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2)
|
type Edit syntax ann1 ann2 = These (Term syntax ann1) (Term syntax ann2)
|
||||||
|
|
||||||
@ -89,98 +74,13 @@ type MappedDiff syntax ann1 ann2 = These (Int, Term syntax ann1) (Int, Term synt
|
|||||||
|
|
||||||
type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2]
|
type RWSEditScript syntax ann1 ann2 = [Edit syntax ann1 ann2]
|
||||||
|
|
||||||
insertMapped :: Foldable t
|
|
||||||
=> t (MappedDiff syntax ann1 ann2)
|
|
||||||
-> [MappedDiff syntax ann1 ann2]
|
|
||||||
-> [MappedDiff syntax ann1 ann2]
|
|
||||||
insertMapped diffs into = foldl' (flip insertDiff) into diffs
|
|
||||||
|
|
||||||
deleteRemaining :: Traversable t
|
|
||||||
=> [MappedDiff syntax ann1 ann2]
|
|
||||||
-> t (UnmappedTerm syntax ann1)
|
|
||||||
-> [MappedDiff syntax ann1 ann2]
|
|
||||||
deleteRemaining diffs remaining = insertMapped (This . (termIndex &&& term) <$> remaining) diffs
|
|
||||||
|
|
||||||
-- | Inserts an index and diff pair into a list of indices and diffs.
|
|
||||||
insertDiff :: MappedDiff syntax ann1 ann2
|
|
||||||
-> [MappedDiff syntax ann1 ann2]
|
|
||||||
-> [MappedDiff syntax ann1 ann2]
|
|
||||||
insertDiff inserted [] = [ inserted ]
|
|
||||||
insertDiff a (b:rest) = case (bimap fst fst a, bimap fst fst b) of
|
|
||||||
(These i1 i2, These j1 j2) -> if i1 <= j1 && i2 <= j2 then a : b : rest else b : insertDiff a rest
|
|
||||||
(This i, This j) -> if i <= j then a : b : rest else b : insertDiff a rest
|
|
||||||
(That i, That j) -> if i <= j then a : b : rest else b : insertDiff a rest
|
|
||||||
(This i, These j _) -> if i <= j then a : b : rest else b : insertDiff a rest
|
|
||||||
(That i, These _ j) -> if i <= j then a : b : rest else b : insertDiff a rest
|
|
||||||
|
|
||||||
(This _, That _) -> b : insertDiff a rest
|
|
||||||
(That _, This _) -> b : insertDiff a rest
|
|
||||||
|
|
||||||
(These i1 i2, _) -> case break isThese rest of
|
|
||||||
(rest, tail) -> let (before, after) = foldr' (combine i1 i2) ([], []) (b : rest) in
|
|
||||||
case after of
|
|
||||||
[] -> before <> insertDiff a tail
|
|
||||||
_ -> before <> (a : after) <> tail
|
|
||||||
where
|
|
||||||
combine i1 i2 each (before, after) = case bimap fst fst each of
|
|
||||||
This j1 -> if i1 <= j1 then (before, each : after) else (each : before, after)
|
|
||||||
That j2 -> if i2 <= j2 then (before, each : after) else (each : before, after)
|
|
||||||
These _ _ -> (before, after)
|
|
||||||
|
|
||||||
findNearestNeighboursToDiff :: (Foldable syntax, Functor syntax, GAlign syntax)
|
|
||||||
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
|
|
||||||
-> [TermOrIndexOrNone (UnmappedTerm syntax ann2)]
|
|
||||||
-> [UnmappedTerm syntax ann1]
|
|
||||||
-> [UnmappedTerm syntax ann2]
|
|
||||||
-> ([MappedDiff syntax ann1 ann2], UnmappedTerms syntax ann1)
|
|
||||||
findNearestNeighboursToDiff canCompare allDiffs featureAs featureBs = (diffs, remaining)
|
|
||||||
where
|
|
||||||
(diffs, (_, remaining, _)) =
|
|
||||||
traverse (findNearestNeighbourToDiff' canCompare (toKdMap featureAs) (toKdMap featureBs)) allDiffs &
|
|
||||||
fmap catMaybes &
|
|
||||||
(`runState` (pred (maybe 0 termIndex (listToMaybe featureAs)), toMap featureAs, toMap featureBs))
|
|
||||||
|
|
||||||
findNearestNeighbourToDiff' :: (Foldable syntax, Functor syntax, GAlign syntax)
|
|
||||||
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
|
|
||||||
-> KdMap Double FeatureVector (UnmappedTerm syntax ann1)
|
|
||||||
-> KdMap Double FeatureVector (UnmappedTerm syntax ann2)
|
|
||||||
-> TermOrIndexOrNone (UnmappedTerm syntax ann2)
|
|
||||||
-> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2)
|
|
||||||
(Maybe (MappedDiff syntax ann1 ann2))
|
|
||||||
findNearestNeighbourToDiff' canCompare kdTreeA kdTreeB termThing = case termThing of
|
|
||||||
None -> pure Nothing
|
|
||||||
RWS.Term term -> Just <$> findNearestNeighbourTo canCompare kdTreeA kdTreeB term
|
|
||||||
Index i -> modify' (\ (_, unA, unB) -> (i, unA, unB)) >> pure Nothing
|
|
||||||
|
|
||||||
-- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches.
|
-- | Construct a diff for a term in B by matching it against the most similar eligible term in A (if any), marking both as ineligible for future matches.
|
||||||
findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax)
|
findNearestNeighbourTo :: (Foldable syntax, Functor syntax, GAlign syntax)
|
||||||
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
|
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
|
||||||
-> KdMap Double FeatureVector (UnmappedTerm syntax ann1)
|
-> [UnmappedTerm syntax ann1]
|
||||||
-> KdMap Double FeatureVector (UnmappedTerm syntax ann2)
|
-> [UnmappedTerm syntax ann2]
|
||||||
-> UnmappedTerm syntax ann2
|
-> [MappedDiff syntax ann1 ann2]
|
||||||
-> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2)
|
findNearestNeighbourTo canCompare as bs = go (pred (termIndex (head as))) as bs
|
||||||
(MappedDiff syntax ann1 ann2)
|
|
||||||
findNearestNeighbourTo canCompare kdTreeA kdTreeB term@(UnmappedTerm j _ b) = do
|
|
||||||
(previous, unmappedA, unmappedB) <- get
|
|
||||||
fromMaybe (insertion previous unmappedA unmappedB term) $ do
|
|
||||||
guard (not (null unmappedA))
|
|
||||||
let (minA, _) = IntMap.findMin unmappedA
|
|
||||||
-- Look up the nearest unmapped term in `unmappedA`.
|
|
||||||
foundA@(UnmappedTerm i _ a) <- nearestUnmapped (\ a -> isNearAndComparableTo canCompare previous b a && termIndex a >= minA) kdTreeA term
|
|
||||||
-- Look up the nearest `foundA` in `unmappedB`
|
|
||||||
UnmappedTerm j' _ _ <- nearestUnmapped (\ b -> isNearAndComparableTo (flip canCompare) (pred j) a b && termIndex b >= j) kdTreeB foundA
|
|
||||||
-- Return Nothing if their indices don't match
|
|
||||||
guard (j == j')
|
|
||||||
pure $! do
|
|
||||||
put (i, IntMap.delete i unmappedA, IntMap.delete j unmappedB)
|
|
||||||
pure (These (i, a) (j, b))
|
|
||||||
|
|
||||||
findNearestNeighbourTo' :: (Foldable syntax, Functor syntax, GAlign syntax)
|
|
||||||
=> ComparabilityRelation syntax ann1 ann2 -- ^ A relation determining whether two terms can be compared.
|
|
||||||
-> [UnmappedTerm syntax ann1]
|
|
||||||
-> [UnmappedTerm syntax ann2]
|
|
||||||
-> [MappedDiff syntax ann1 ann2]
|
|
||||||
findNearestNeighbourTo' canCompare as bs = go (pred (termIndex (head as))) as bs
|
|
||||||
where go _ as [] = This . (termIndex &&& term) <$> as
|
where go _ as [] = This . (termIndex &&& term) <$> as
|
||||||
go _ [] bs = That . (termIndex &&& term) <$> bs
|
go _ [] bs = That . (termIndex &&& term) <$> bs
|
||||||
go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) =
|
go previous unmappedA@(UnmappedTerm minA _ _ : _) (termB@(UnmappedTerm j _ b) : restUnmappedB) =
|
||||||
@ -221,18 +121,6 @@ defaultQ = 3
|
|||||||
defaultMoveBound = 1
|
defaultMoveBound = 1
|
||||||
|
|
||||||
|
|
||||||
-- Returns a state (insertion index, old unmapped terms, new unmapped terms), and value of (index, inserted diff),
|
|
||||||
-- given a previous index, two sets of umapped terms, and an unmapped term to insert.
|
|
||||||
insertion :: Int
|
|
||||||
-> UnmappedTerms syntax ann1
|
|
||||||
-> UnmappedTerms syntax ann2
|
|
||||||
-> UnmappedTerm syntax ann2
|
|
||||||
-> State (Int, UnmappedTerms syntax ann1, UnmappedTerms syntax ann2)
|
|
||||||
(MappedDiff syntax ann1 ann2)
|
|
||||||
insertion previous unmappedA unmappedB (UnmappedTerm j _ b) = do
|
|
||||||
put (previous, unmappedA, IntMap.delete j unmappedB)
|
|
||||||
pure (That (j, b))
|
|
||||||
|
|
||||||
mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax)
|
mapContiguous :: (Foldable syntax, Functor syntax, GAlign syntax)
|
||||||
=> ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
|
=> ComparabilityRelation syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
|
||||||
-> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
|
-> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
|
||||||
@ -245,31 +133,9 @@ mapContiguous canCompare = go 0 0 [] []
|
|||||||
These a b -> mapChunk as bs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest)
|
These a b -> mapChunk as bs <> (These (i, a) (j, b) : go (succ i) (succ j) [] [] rest)
|
||||||
mapChunk as [] = This . (termIndex &&& term) <$> reverse as
|
mapChunk as [] = This . (termIndex &&& term) <$> reverse as
|
||||||
mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs
|
mapChunk [] bs = That . (termIndex &&& term) <$> reverse bs
|
||||||
mapChunk as bs = findNearestNeighbourTo' canCompare (reverse as) (reverse bs)
|
mapChunk as bs = findNearestNeighbourTo canCompare (reverse as) (reverse bs)
|
||||||
|
|
||||||
|
|
||||||
genFeaturizedTermsAndDiffs :: Functor syntax
|
|
||||||
=> RWSEditScript syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))
|
|
||||||
-> ( [UnmappedTerm syntax (Record (FeatureVector ': fields1))]
|
|
||||||
, [UnmappedTerm syntax (Record (FeatureVector ': fields2))]
|
|
||||||
, [MappedDiff syntax (Record (FeatureVector ': fields1)) (Record (FeatureVector ': fields2))]
|
|
||||||
, [TermOrIndexOrNone (UnmappedTerm syntax (Record (FeatureVector ': fields2)))]
|
|
||||||
)
|
|
||||||
genFeaturizedTermsAndDiffs sesDiffs = let Mapping _ _ a b c d = foldl' combine (Mapping 0 0 [] [] [] []) sesDiffs in (reverse a, reverse b, reverse c, reverse d)
|
|
||||||
where combine (Mapping counterA counterB as bs mappedDiffs allDiffs) diff = case diff of
|
|
||||||
This term -> Mapping (succ counterA) counterB (featurize counterA term : as) bs mappedDiffs (None : allDiffs)
|
|
||||||
That term -> Mapping counterA (succ counterB) as (featurize counterB term : bs) mappedDiffs (RWS.Term (featurize counterB term) : allDiffs)
|
|
||||||
These a b -> Mapping (succ counterA) (succ counterB) as bs ((These (counterA, a) (counterB, b)) : mappedDiffs) (Index counterA : allDiffs)
|
|
||||||
|
|
||||||
data Mapping syntax ann1 ann2
|
|
||||||
= Mapping
|
|
||||||
{-# UNPACK #-} !Int
|
|
||||||
{-# UNPACK #-} !Int
|
|
||||||
![UnmappedTerm syntax ann1]
|
|
||||||
![UnmappedTerm syntax ann2]
|
|
||||||
![MappedDiff syntax ann1 ann2]
|
|
||||||
![TermOrIndexOrNone (UnmappedTerm syntax ann2)]
|
|
||||||
|
|
||||||
featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields))
|
featurize :: Functor syntax => Int -> Term syntax (Record (FeatureVector ': fields)) -> UnmappedTerm syntax (Record (FeatureVector ': fields))
|
||||||
featurize index term = UnmappedTerm index (getField (extract term)) (eraseFeatureVector term)
|
featurize index term = UnmappedTerm index (getField (extract term)) (eraseFeatureVector term)
|
||||||
|
|
||||||
@ -282,8 +148,6 @@ nullFeatureVector = FV $ listArray (0, 0) [0]
|
|||||||
setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record (FeatureVector ': fields)
|
setFeatureVector :: Record (FeatureVector ': fields) -> FeatureVector -> Record (FeatureVector ': fields)
|
||||||
setFeatureVector = setField
|
setFeatureVector = setField
|
||||||
|
|
||||||
toMap :: [UnmappedTerm syntax ann] -> IntMap.IntMap (UnmappedTerm syntax ann)
|
|
||||||
toMap = IntMap.fromList . fmap (termIndex &&& id)
|
|
||||||
|
|
||||||
toKdMap :: [UnmappedTerm syntax ann] -> KdMap Double FeatureVector (UnmappedTerm syntax ann)
|
toKdMap :: [UnmappedTerm syntax ann] -> KdMap Double FeatureVector (UnmappedTerm syntax ann)
|
||||||
toKdMap = build (elems . unFV) . fmap (feature &&& id)
|
toKdMap = build (elems . unFV) . fmap (feature &&& id)
|
||||||
|
Loading…
Reference in New Issue
Block a user