1
1
mirror of https://github.com/github/semantic.git synced 2024-12-22 14:21:31 +03:00

🔥 pqGrams.

This commit is contained in:
Rob Rix 2016-08-09 16:24:05 -04:00
parent 46c2e4d8e0
commit 67034c067d

View File

@ -1,7 +1,6 @@
{-# LANGUAGE DataKinds, GADTs, RankNTypes, TypeOperators #-}
module Data.RandomWalkSimilarity
( rws
, pqGrams
, featureVector
, pqGramDecorator
, featureVectorDecorator
@ -62,11 +61,6 @@ data UnmappedTerm a = UnmappedTerm { termIndex :: {-# UNPACK #-} !Int, feature :
data Gram label = Gram { stem :: [Maybe label], base :: [Maybe label] }
deriving (Eq, Show)
-- | Compute the bag of grams with stems of length _p_ and bases of length _q_, with labels computed from annotations, which summarize the entire subtree of a term.
pqGrams :: Traversable f => (forall b. CofreeF f (Record fields) b -> label) -> Int -> Int -> Cofree f (Record fields) -> DList.DList (Gram label)
pqGrams getLabel p q = foldMap (pure . getField) . pqGramDecorator getLabel p q
-- | Compute a vector with the specified number of dimensions, as an approximation of a bag of `Gram`s summarizing a tree.
featureVector :: Hashable label => Int -> DList.DList (Gram label) -> Vector.Vector Double
featureVector d bag = sumVectors $ unitVector d . hash <$> bag