1
1
mirror of https://github.com/github/semantic.git synced 2025-01-06 06:46:07 +03:00

Move the p,q-gram decorator to the RWS module.

This commit is contained in:
Rob Rix 2016-08-02 15:08:21 -04:00
parent 3d3874ee14
commit f546b21ea2
2 changed files with 13 additions and 10 deletions

View File

@ -1,4 +1,4 @@
{-# LANGUAGE RankNTypes #-} {-# LANGUAGE DataKinds, RankNTypes, TypeOperators #-}
module Data.RandomWalkSimilarity module Data.RandomWalkSimilarity
( rws ( rws
, pqGrams , pqGrams
@ -15,6 +15,7 @@ import Data.Functor.Foldable as Foldable
import Data.Hashable import Data.Hashable
import qualified Data.KdTree.Static as KdTree import qualified Data.KdTree.Static as KdTree
import qualified Data.List as List import qualified Data.List as List
import Data.Record
import qualified Data.Vector as Vector import qualified Data.Vector as Vector
import Patch import Patch
import Prologue import Prologue
@ -80,6 +81,17 @@ pqGrams getLabel p q = uncurry DList.cons . cata merge . setRootBase . setRootSt
setRootBase term = let (a :< f) = runCofree term in cofree (setBase a (base a) :< f) setRootBase term = let (a :< f) = runCofree term in cofree (setBase a (base a) :< f)
setRootStem = foldr (\ p rest -> assignParent Nothing p . rest) identity [0..p] setRootStem = foldr (\ p rest -> assignParent Nothing p . rest) identity [0..p]
type TermDecorator f fields field = CofreeF f (Record fields) (Record (field ': fields)) -> field
pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label))
pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just label ], foldMap (childGrams label) s)
where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => label -> Record fields -> DList.DList (Gram label)
childGrams label record = let (child, grandchildren) = getField record in
DList.singleton (prependParent label child) <> grandchildren
prependParent label gram = gram { stem = Just label : stem gram }
label = getLabel c
-- | A sliding-window fold over _n_ items of a list per iteration. -- | A sliding-window fold over _n_ items of a list per iteration.
windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b windowed :: Int -> (a -> [a] -> b -> b) -> b -> [a] -> b
windowed n f seed = para alg windowed n f seed = para alg

View File

@ -4,10 +4,8 @@ module Diffing where
import qualified Prologue import qualified Prologue
import Prologue hiding (fst, snd) import Prologue hiding (fst, snd)
import qualified Data.ByteString.Char8 as B1 import qualified Data.ByteString.Char8 as B1
import qualified Data.DList as DList
import Data.Functor.Both import Data.Functor.Both
import Data.Functor.Foldable import Data.Functor.Foldable
import Data.RandomWalkSimilarity
import Data.Record import Data.Record
import qualified Data.Text.IO as TextIO import qualified Data.Text.IO as TextIO
import qualified Data.Text.ICU.Detect as Detect import qualified Data.Text.ICU.Detect as Detect
@ -128,13 +126,6 @@ compareCategoryEq = (==) `on` category . extract
termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost termCostDecorator :: (Prologue.Foldable f, Functor f) => TermDecorator f a Cost
termCostDecorator c = 1 + sum (cost <$> tailF c) termCostDecorator c = 1 + sum (cost <$> tailF c)
pqGramDecorator :: (Prologue.Foldable f, Functor f) => (forall b. CofreeF f (Record a) b -> label) -> Int -> Int -> TermDecorator f a (Gram label, DList.DList (Gram label))
pqGramDecorator getLabel p q c@(a :< s) = (Gram [] [ Just label ], foldMap (childGrams label) s)
where childGrams :: HasField fields (Gram label, DList.DList (Gram label)) => label -> Record fields -> DList.DList (Gram label)
childGrams label record = let (child, grandchildren) = getField record in
DList.singleton (prependParent label child) <> grandchildren
prependParent label gram = gram { stem = Just label : stem gram }
label = getLabel c
-- | The sum of the node count of the diffs patches. -- | The sum of the node count of the diffs patches.
diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer diffCostWithCachedTermCosts :: HasField fields Cost => Diff leaf (Record fields) -> Integer