1
1
mirror of https://github.com/github/semantic.git synced 2025-01-03 04:51:57 +03:00

Compute the label in the p,q-gram decorator.

This commit is contained in:
Rob Rix 2016-08-09 16:05:42 -04:00
parent 404c0b7fd5
commit 6012dfd354

View File

@ -63,7 +63,7 @@ data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] }
-- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term. -- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term.
pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label) pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label)
pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram p q . labelDecorator getLabel pqGrams getLabel p q = foldMap (pure . getField) . decorateTermWithPQGram getLabel p q
-- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree. -- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree.
@ -76,10 +76,9 @@ labelDecorator :: Functor f => (forall b. CofreeF f (Record fields) b -> label)
labelDecorator getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c) labelDecorator getLabel = cata $ \ c -> cofree ((getLabel c .: headF c) :< tailF c)
-- | Replaces labels in a terms annotations with corresponding p,q-grams. -- | Replaces labels in a terms annotations with corresponding p,q-grams.
decorateTermWithPQGram :: Traversable f => Int -> Int -> Cofree f (Record (label ': fields)) -> Cofree f (Record (Gram label ': fields)) decorateTermWithPQGram :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Gram label ': fields))
decorateTermWithPQGram p q = cata algebra decorateTermWithPQGram getLabel p q = cata algebra
where algebra :: Traversable f => CofreeF f (Record (label ': fields)) (Cofree f (Record (Gram label ': fields))) -> Cofree f (Record (Gram label ': fields)) where algebra (rest :< functor) = let label = getLabel (rest :< functor) in cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label)))
algebra (RCons label rest :< functor) = cofree ((Gram (padToSize p []) (padToSize q (pure (Just label))) .: rest) :< (`evalState` (siblingLabels functor)) (for functor (assignLabels label)))
assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields))) assignLabels :: label -> Cofree f (Record (Gram label ': fields)) -> State [Maybe label] (Cofree f (Record (Gram label ': fields)))
assignLabels label a = case runCofree a of assignLabels label a = case runCofree a of
RCons gram rest :< functor -> do RCons gram rest :< functor -> do
@ -104,8 +103,7 @@ unitVector d hash = normalize ((`evalRand` mkQCGen hash) (sequenceA (Vector.repl
featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields)) featureVectorDecorator :: (Hashable label, Traversable f) => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Int -> Cofree f (Record fields) -> Cofree f (Record (Vector.Vector Double ': fields))
featureVectorDecorator getLabel p q d featureVectorDecorator getLabel p q d
= decorateTermWithFeatureVector d = decorateTermWithFeatureVector d
. decorateTermWithPQGram p q . decorateTermWithPQGram getLabel p q
. labelDecorator getLabel
-- | Pads a list of Alternative values to exactly n elements. -- | Pads a list of Alternative values to exactly n elements.
padToSize :: Alternative f => Int -> [f a] -> [f a] padToSize :: Alternative f => Int -> [f a] -> [f a]