duckling/Duckling/Types.hs
Bartosz Nitka 8db73688d7 Move Document and helpers to a fresh module
Summary:
Document had its internal details leaked over 2 files.
This consolidates it.

It took a long time to make this perf neutral (now it's even a tiny
win), for reasons I don't completely understand.
The INLINE pragma on byteStringFromPos I semi-understand,
but I also had to move isRangeValid to Document and that's
a bit of a mystery.

Reviewed By: patapizza

Differential Revision: D4948449

fbshipit-source-id: ffb251a
2017-04-25 16:49:18 -07:00

154 lines
4.0 KiB
Haskell

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE StandaloneDeriving #-}
module Duckling.Types where
import Control.DeepSeq
import Data.Aeson
import qualified Data.ByteString.Lazy as LB
import Data.GADT.Compare
import Data.Hashable
import Data.Maybe
import Data.String
import Data.Text (Text)
import qualified Data.Text.Encoding as Text
import Data.Typeable ((:~:)(Refl), Typeable)
import GHC.Generics
import Prelude
import qualified Text.Regex.Base as R
import qualified Text.Regex.PCRE as PCRE
import Duckling.Dimensions.Types
import Duckling.Resolve
-- -----------------------------------------------------------------
-- Token
data Token = forall a . (Resolve a, Eq a, Hashable a, Show a, NFData a) =>
Token (Dimension a) a
deriving instance Show Token
instance Eq Token where
Token d1 v1 == Token d2 v2 = case geq d1 d2 of
Just Refl -> v1 == v2
Nothing -> False
instance Hashable Token where
hashWithSalt s (Token dim v) = hashWithSalt s (dim, v)
instance NFData Token where
rnf (Token _ v) = rnf v
isDimension :: Dimension a -> Token -> Bool
isDimension dim (Token dim' _) = isJust $ geq dim dim'
data Node = Node
{ nodeRange :: Range
, token :: Token
, children :: [Node]
, rule :: Maybe Text
} deriving (Eq, Generic, Hashable, Show, NFData)
data ResolvedToken = Resolved
{ range :: Range
, node :: Node
, jsonValue :: Value
} deriving (Eq, Show)
instance Ord ResolvedToken where
compare (Resolved range1 _ json1) (Resolved range2 _ json2) =
case compare range1 range2 of
EQ -> compare (toJText json1) (toJText json2)
z -> z
data Candidate = Candidate ResolvedToken Double Bool
deriving (Eq, Show)
instance Ord Candidate where
compare (Candidate (Resolved{range = Range s1 e1, node = Node{token = Token d1 _}}) score1 t1)
(Candidate (Resolved{range = Range s2 e2, node = Node{token = tok2}}) score2 t2)
| isDimension d1 tok2 = case starts of
EQ -> case ends of
EQ -> compare score1 score2
z -> z
LT -> case ends of
LT -> EQ
_ -> GT
GT -> case ends of
GT -> EQ
_ -> LT
| t1 == t2 = compRange
| t1 && compRange == GT = GT
| t2 && compRange == LT = LT
| otherwise = EQ
where
starts = compare s1 s2
ends = compare e1 e2
-- a > b if a recovers b
compRange = case starts of
EQ -> ends
LT -> case ends of
LT -> EQ
_ -> GT
GT -> case ends of
GT -> EQ
_ -> LT
data Range = Range Int Int
deriving (Eq, Ord, Generic, Hashable, Show, NFData)
type Production = [Token] -> Maybe Token
type Predicate = Token -> Bool
data PatternItem = Regex PCRE.Regex | Predicate Predicate
type Pattern = [PatternItem]
data Rule = Rule
{ name :: Text
, pattern :: Pattern
, prod :: Production
}
instance Show Rule where
show (Rule name _ _) = show name
data Entity = Entity
{ dim :: Text
, body :: Text
, value :: Text
, start :: Int
, end :: Int
} deriving (Eq, Generic, Show, NFData)
instance ToJSON Entity where
toEncoding = genericToEncoding defaultOptions
toJText :: ToJSON x => x -> Text
toJText j = Text.decodeUtf8 $ LB.toStrict $ encode j
-- -----------------------------------------------------------------
-- Predicates helpers
regex :: String -> PatternItem
regex = Regex . R.makeRegexOpts compOpts execOpts
where
compOpts = PCRE.defaultCompOpt + PCRE.compCaseless
execOpts = PCRE.defaultExecOpt
dimension :: Typeable a => Dimension a -> PatternItem
dimension value = Predicate $ isDimension value