duckling/Duckling/Ranking/Types.hs
Steven Troxler 81ab073acf Move Candidate to Ranking/Types.hs
Summary:
In my opinion putting `Candidate` into the core `Types.hs`
is a mistake - it's used exclusively in the ranking stage, so cluttering
the core tokenizing and recursive parsing / value resolution logic in
`Duckling.Types` with this irrelevant datatype makes things less clear
than if we keep it in the `Ranking` modules.

Reviewed By: chessai

Differential Revision: D28462902

fbshipit-source-id: cd4bb88c4a16945265e8f21c8808b06ae3383559
2021-05-18 11:50:17 -07:00

106 lines
2.5 KiB
Haskell

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Ranking.Types
( Feature
, BagOfFeatures
, Class
, Datum
, Dataset
, Classifier(..)
, Classifiers
, ClassData(..)
, Candidate(..)
, infinity
) where
import Data.HashMap.Strict (HashMap)
import Data.Text (Text)
import Prelude
import Duckling.Types
( Node(..)
, Range(..)
, ResolvedToken(..)
, Token(..)
, isDimension
)
-- -----------------------------------------------------------------
-- Aliases
type Feature = Text
type BagOfFeatures = HashMap Feature Int
type Class = Bool
type Datum = (BagOfFeatures, Class)
type Dataset = HashMap Text [Datum]
-- -----------------------------------------------------------------
-- Classification
data Classifier = Classifier
{ okData :: ClassData
, koData :: ClassData
}
deriving (Eq, Show)
type Classifiers = HashMap Text Classifier
data ClassData = ClassData
{ prior :: Double
, unseen :: Double
, likelihoods :: HashMap Feature Double
, n :: Int
}
deriving (Eq, Show)
infinity :: Double
infinity = 1 / 0
-- -----------------------------------------------------------------
-- Candidate
-- |A Candidate represents a potential match going into the ranker
data Candidate = Candidate
ResolvedToken -- ^ The actual resolved token we are considering
Double -- ^ naive Bayes log-likelihood - sum of LL of all rules used
Bool -- ^ Does the ResolvedToken's dimension match the caller's request?
deriving (Eq, Show)
instance Ord Candidate where
compare (Candidate Resolved{range = Range s1 e1, node = Node{token = Token d1 _}} score1 t1)
(Candidate Resolved{range = Range s2 e2, node = Node{token = tok2}} score2 t2)
| isDimension d1 tok2 = case starts of
EQ -> case ends of
EQ -> compare score1 score2
z -> z
LT -> case ends of
LT -> EQ
_ -> GT
GT -> case ends of
GT -> EQ
_ -> LT
| t1 == t2 = compRange
| t1 && compRange == GT = GT
| t2 && compRange == LT = LT
| otherwise = EQ
where
starts = compare s1 s2
ends = compare e1 e2
-- a > b if a recovers b
compRange = case starts of
EQ -> ends
LT -> case ends of
LT -> EQ
_ -> GT
GT -> case ends of
GT -> EQ
_ -> LT