diff --git a/src/Data/Gram.hs b/src/Data/Gram.hs index 319c0049d..782f609fb 100644 --- a/src/Data/Gram.hs +++ b/src/Data/Gram.hs @@ -16,16 +16,13 @@ serialize gram = stem gram <> base gram type Bag = DList -featureVector :: Hashable label => Bag (Gram label) -> Int -> Vector Rational +featureVector :: Hashable label => Bag (Gram label) -> Int -> Vector Double featureVector bag d = sumVectors $ unitDVector . hash <$> bag where unitDVector hash = normalize . (`evalRand` mkQCGen hash) $ Prologue.sequence (Vector.replicate d getRandom) normalize vec = fmap (/ magnitude vec) vec - magnitude vec = toRational (sqrtDouble (fromRational (Vector.sum (fmap (^^ (2 :: Integer)) vec)))) + magnitude vec = sqrtDouble (Vector.sum (fmap (** 2) vec)) sumVectors = DList.foldr (Vector.zipWith (+)) (Vector.replicate d 0) instance Hashable label => Hashable (Gram label) where hashWithSalt _ = hash hash = hash . serialize - -instance (Random a, Integral a) => Random (Ratio a) where - random = first (% 1) . random