mirror of
https://github.com/facebook/duckling.git
synced 2024-12-25 05:01:31 +03:00
8db73688d7
Summary: Document had its internal details leaked over 2 files. This consolidates it. It took a long time to make this perf neutral (now it's even a tiny win), for reasons I don't completely understand. The INLINE pragma on byteStringFromPos I semi-understand, but I also had to move isRangeValid to Document and that's a bit of a mystery. Reviewed By: patapizza Differential Revision: D4948449 fbshipit-source-id: ffb251a
154 lines
4.0 KiB
Haskell
154 lines
4.0 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
{-# LANGUAGE DeriveAnyClass #-}
|
|
{-# LANGUAGE DeriveGeneric #-}
|
|
{-# LANGUAGE ExistentialQuantification #-}
|
|
{-# LANGUAGE FlexibleContexts #-}
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE NoRebindableSyntax #-}
|
|
{-# LANGUAGE RankNTypes #-}
|
|
{-# LANGUAGE StandaloneDeriving #-}
|
|
|
|
module Duckling.Types where
|
|
|
|
import Control.DeepSeq
|
|
import Data.Aeson
|
|
import qualified Data.ByteString.Lazy as LB
|
|
import Data.GADT.Compare
|
|
import Data.Hashable
|
|
import Data.Maybe
|
|
import Data.String
|
|
import Data.Text (Text)
|
|
import qualified Data.Text.Encoding as Text
|
|
import Data.Typeable ((:~:)(Refl), Typeable)
|
|
import GHC.Generics
|
|
import Prelude
|
|
import qualified Text.Regex.Base as R
|
|
import qualified Text.Regex.PCRE as PCRE
|
|
|
|
import Duckling.Dimensions.Types
|
|
import Duckling.Resolve
|
|
|
|
-- -----------------------------------------------------------------
|
|
-- Token
|
|
|
|
data Token = forall a . (Resolve a, Eq a, Hashable a, Show a, NFData a) =>
|
|
Token (Dimension a) a
|
|
|
|
deriving instance Show Token
|
|
instance Eq Token where
|
|
Token d1 v1 == Token d2 v2 = case geq d1 d2 of
|
|
Just Refl -> v1 == v2
|
|
Nothing -> False
|
|
|
|
instance Hashable Token where
|
|
hashWithSalt s (Token dim v) = hashWithSalt s (dim, v)
|
|
|
|
instance NFData Token where
|
|
rnf (Token _ v) = rnf v
|
|
|
|
isDimension :: Dimension a -> Token -> Bool
|
|
isDimension dim (Token dim' _) = isJust $ geq dim dim'
|
|
|
|
data Node = Node
|
|
{ nodeRange :: Range
|
|
, token :: Token
|
|
, children :: [Node]
|
|
, rule :: Maybe Text
|
|
} deriving (Eq, Generic, Hashable, Show, NFData)
|
|
|
|
data ResolvedToken = Resolved
|
|
{ range :: Range
|
|
, node :: Node
|
|
, jsonValue :: Value
|
|
} deriving (Eq, Show)
|
|
|
|
instance Ord ResolvedToken where
|
|
compare (Resolved range1 _ json1) (Resolved range2 _ json2) =
|
|
case compare range1 range2 of
|
|
EQ -> compare (toJText json1) (toJText json2)
|
|
z -> z
|
|
|
|
data Candidate = Candidate ResolvedToken Double Bool
|
|
deriving (Eq, Show)
|
|
|
|
instance Ord Candidate where
|
|
compare (Candidate (Resolved{range = Range s1 e1, node = Node{token = Token d1 _}}) score1 t1)
|
|
(Candidate (Resolved{range = Range s2 e2, node = Node{token = tok2}}) score2 t2)
|
|
| isDimension d1 tok2 = case starts of
|
|
EQ -> case ends of
|
|
EQ -> compare score1 score2
|
|
z -> z
|
|
LT -> case ends of
|
|
LT -> EQ
|
|
_ -> GT
|
|
GT -> case ends of
|
|
GT -> EQ
|
|
_ -> LT
|
|
| t1 == t2 = compRange
|
|
| t1 && compRange == GT = GT
|
|
| t2 && compRange == LT = LT
|
|
| otherwise = EQ
|
|
where
|
|
starts = compare s1 s2
|
|
ends = compare e1 e2
|
|
-- a > b if a recovers b
|
|
compRange = case starts of
|
|
EQ -> ends
|
|
LT -> case ends of
|
|
LT -> EQ
|
|
_ -> GT
|
|
GT -> case ends of
|
|
GT -> EQ
|
|
_ -> LT
|
|
|
|
data Range = Range Int Int
|
|
deriving (Eq, Ord, Generic, Hashable, Show, NFData)
|
|
|
|
type Production = [Token] -> Maybe Token
|
|
type Predicate = Token -> Bool
|
|
data PatternItem = Regex PCRE.Regex | Predicate Predicate
|
|
|
|
type Pattern = [PatternItem]
|
|
|
|
data Rule = Rule
|
|
{ name :: Text
|
|
, pattern :: Pattern
|
|
, prod :: Production
|
|
}
|
|
|
|
instance Show Rule where
|
|
show (Rule name _ _) = show name
|
|
|
|
data Entity = Entity
|
|
{ dim :: Text
|
|
, body :: Text
|
|
, value :: Text
|
|
, start :: Int
|
|
, end :: Int
|
|
} deriving (Eq, Generic, Show, NFData)
|
|
|
|
instance ToJSON Entity where
|
|
toEncoding = genericToEncoding defaultOptions
|
|
|
|
toJText :: ToJSON x => x -> Text
|
|
toJText j = Text.decodeUtf8 $ LB.toStrict $ encode j
|
|
|
|
-- -----------------------------------------------------------------
|
|
-- Predicates helpers
|
|
|
|
regex :: String -> PatternItem
|
|
regex = Regex . R.makeRegexOpts compOpts execOpts
|
|
where
|
|
compOpts = PCRE.defaultCompOpt + PCRE.compCaseless
|
|
execOpts = PCRE.defaultExecOpt
|
|
|
|
dimension :: Typeable a => Dimension a -> PatternItem
|
|
dimension value = Predicate $ isDimension value
|