mirror of
https://github.com/github/semantic.git
synced 2024-12-24 15:35:14 +03:00
Compute feature vectors from Bags of p,q-grams.
This commit is contained in:
parent
226be486c8
commit
686f94d53e
@ -1,9 +1,11 @@
|
||||
module Data.Gram where
|
||||
|
||||
import Data.DList
|
||||
import Control.Monad.Random
|
||||
import Data.DList as DList
|
||||
import Data.Hashable
|
||||
import Data.Vector as Vector
|
||||
import Prologue
|
||||
import Test.QuickCheck.Random
|
||||
|
||||
data Gram label = Gram { stem :: [label], base :: [label] }
|
||||
|
||||
@ -13,6 +15,14 @@ serialize gram = stem gram <> base gram
|
||||
|
||||
type Bag = DList
|
||||
|
||||
|
||||
featureVector :: Hashable label => Bag (Gram label) -> Int -> Vector Rational
|
||||
featureVector bag d = sumVectors $ unitDVector . hash <$> bag
|
||||
where unitDVector hash = normalize . (`evalRand` mkQCGen hash) $ Prologue.sequence (Vector.replicate d getRandom)
|
||||
normalize vec = fmap (/ magnitude vec) vec
|
||||
magnitude vec = toRational (sqrtDouble (fromRational (Vector.sum (fmap (^^ (2 :: Integer)) vec))))
|
||||
sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0)
|
||||
|
||||
instance Hashable label => Hashable (Gram label) where
|
||||
hashWithSalt _ = hash
|
||||
hash = hash . serialize
|
||||
|
Loading…
Reference in New Issue
Block a user