mirror of
https://github.com/facebook/duckling.git
synced 2025-01-08 15:00:59 +03:00
Duckling monad for Engine
Summary: This converts the code to monadic style, so that we can in the future: * stop threading the `Document` parameter everywhere * keep some state, like regexp match cache (I've already checked that it makes a substantial difference) There should be no difference in performance or behavior at this point. Reviewed By: patapizza Differential Revision: D4778808 fbshipit-source-id: a167ed8
This commit is contained in:
parent
78228dea83
commit
e37bb7c186
@ -14,12 +14,15 @@
|
|||||||
module Duckling.Engine
|
module Duckling.Engine
|
||||||
( parseAndResolve
|
( parseAndResolve
|
||||||
, lookupRegex
|
, lookupRegex
|
||||||
|
, runDuckling
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.DeepSeq
|
import Control.DeepSeq
|
||||||
|
import Control.Monad.Extra
|
||||||
import Data.Aeson
|
import Data.Aeson
|
||||||
import Data.ByteString (ByteString)
|
import Data.ByteString (ByteString)
|
||||||
import qualified Data.Char as Char
|
import qualified Data.Char as Char
|
||||||
|
import Data.Functor.Identity
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
@ -41,9 +44,14 @@ import Duckling.Stash (Stash)
|
|||||||
-- -----------------------------------------------------------------
|
-- -----------------------------------------------------------------
|
||||||
-- Engine
|
-- Engine
|
||||||
|
|
||||||
|
type Duckling a = Identity a
|
||||||
|
|
||||||
|
runDuckling :: Duckling a -> a
|
||||||
|
runDuckling ma = runIdentity ma
|
||||||
|
|
||||||
parseAndResolve :: [Rule] -> Text -> Context -> [ResolvedToken]
|
parseAndResolve :: [Rule] -> Text -> Context -> [ResolvedToken]
|
||||||
parseAndResolve rules input context = mapMaybe (resolveNode context) .
|
parseAndResolve rules input context = mapMaybe (resolveNode context) .
|
||||||
force $ Stash.toPosOrderedList $
|
force $ Stash.toPosOrderedList $ runDuckling $
|
||||||
parseString rules (mkDocument input) Stash.empty Stash.empty []
|
parseString rules (mkDocument input) Stash.empty Stash.empty []
|
||||||
|
|
||||||
produce :: Match -> Maybe Node
|
produce :: Match -> Maybe Node
|
||||||
@ -77,8 +85,9 @@ isRangeValid Document { indexable = s } (Range start end) =
|
|||||||
isDifferent :: Char -> Char -> Bool
|
isDifferent :: Char -> Char -> Bool
|
||||||
isDifferent a b = charClass a /= charClass b
|
isDifferent a b = charClass a /= charClass b
|
||||||
|
|
||||||
lookupRegex :: PCRE.Regex -> Int -> Document -> [Node]
|
lookupRegex :: PCRE.Regex -> Int -> Document -> Duckling [Node]
|
||||||
lookupRegex regex position Document { rawInput = s } = nodes
|
lookupRegex regex position Document { rawInput = s } =
|
||||||
|
return nodes
|
||||||
where
|
where
|
||||||
ss = Text.drop position s
|
ss = Text.drop position s
|
||||||
(nodes, _, _) = L.foldl' f ([], ss, position) $ match regex ss
|
(nodes, _, _) = L.foldl' f ([], ss, position) $ match regex ss
|
||||||
@ -98,12 +107,13 @@ lookupRegex regex position Document { rawInput = s } = nodes
|
|||||||
, rule = Nothing
|
, rule = Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
lookupItem :: Document -> PatternItem -> Stash -> Int -> [Node]
|
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
|
||||||
lookupItem s (Regex re) _ position =
|
lookupItem s (Regex re) _ position =
|
||||||
filter (\node -> isRangeValid s (nodeRange node) &&
|
filter (\node -> isRangeValid s (nodeRange node) &&
|
||||||
isPositionValid position s node) $
|
isPositionValid position s node) <$>
|
||||||
lookupRegex re position s
|
lookupRegex re position s
|
||||||
lookupItem s (Predicate p) stash position =
|
lookupItem s (Predicate p) stash position =
|
||||||
|
return $
|
||||||
filter (p . token) $
|
filter (p . token) $
|
||||||
takeWhile (isPositionValid position s) $
|
takeWhile (isPositionValid position s) $
|
||||||
Stash.toPosOrderedListFrom stash position
|
Stash.toPosOrderedListFrom stash position
|
||||||
@ -118,61 +128,62 @@ type Match = (Rule, Int, [Node])
|
|||||||
|
|
||||||
-- | Recursively augments `matches`.
|
-- | Recursively augments `matches`.
|
||||||
-- Discards partial matches stuck by a regex.
|
-- Discards partial matches stuck by a regex.
|
||||||
matchAll :: Document -> Stash -> [Match] -> [Match]
|
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
|
||||||
matchAll sentence stash matches = concatMap mkNextMatches matches
|
matchAll sentence stash matches = concatMapM mkNextMatches matches
|
||||||
where
|
where
|
||||||
mkNextMatches :: Match -> [Match]
|
mkNextMatches :: Match -> Duckling [Match]
|
||||||
mkNextMatches match@(Rule {pattern = []}, _, _) = [ match ]
|
mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
|
||||||
mkNextMatches match@(Rule {pattern = p:_}, _, _) =
|
mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
|
||||||
let nextMatches = matchAll sentence stash $ matchFirst sentence stash match
|
nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
|
||||||
in case p of
|
return $ case p of
|
||||||
Regex _ -> nextMatches
|
Regex _ -> nextMatches
|
||||||
Predicate _ -> match:nextMatches
|
Predicate _ -> match:nextMatches
|
||||||
|
|
||||||
-- | Returns all matches matching the first pattern item of `match`.
|
-- | Returns all matches matching the first pattern item of `match`.
|
||||||
matchFirst :: Document -> Stash -> Match -> [Match]
|
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
|
||||||
matchFirst _ _ (Rule {pattern = []}, _, _) = []
|
matchFirst _ _ (Rule {pattern = []}, _, _) = return []
|
||||||
matchFirst sentence stash (rule@(Rule {pattern = p:ps}), position, route) =
|
matchFirst sentence stash (rule@(Rule {pattern = p:ps}), position, route) =
|
||||||
map (\node@Node {nodeRange = Range _ pos'} ->
|
map (\node@Node {nodeRange = Range _ pos'} ->
|
||||||
(rule {pattern = ps}, pos', node:route)
|
let newRoute = node:route
|
||||||
) $ lookupItem sentence p stash position
|
in newRoute `seq` (newRule, pos', newRoute)
|
||||||
|
) <$> lookupItem sentence p stash position
|
||||||
|
where
|
||||||
|
newRule = rule { pattern = ps }
|
||||||
|
|
||||||
-- | Finds new matches resulting from newly added tokens.
|
-- | Finds new matches resulting from newly added tokens.
|
||||||
-- Produces new tokens from full matches.
|
-- Produces new tokens from full matches.
|
||||||
parseString1
|
parseString1
|
||||||
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> (Stash, [Match])
|
:: [Rule] -> Document -> Stash -> Stash -> [Match]
|
||||||
parseString1 rules sentence stash new matches =
|
-> Duckling (Stash, [Match])
|
||||||
|
parseString1 rules sentence stash new matches = do
|
||||||
|
-- Recursively match patterns.
|
||||||
|
-- Find which `matches` can advance because of `new`.
|
||||||
|
newPartial <- concatMapM (matchFirst sentence new) matches
|
||||||
|
|
||||||
|
-- Find new matches resulting from newly added tokens (`new`)
|
||||||
|
-- For the first pass, pass through all rules.
|
||||||
|
-- For subsequent passes, only try rules starting with a predicate.
|
||||||
|
let match rule = matchFirst sentence new (rule, 0, [])
|
||||||
|
newMatches <- if Stash.null stash
|
||||||
|
then concatMapM match rules
|
||||||
|
else concatMapM match
|
||||||
|
[ rule | rule@(Rule {pattern = (Predicate _:_)}) <- rules ]
|
||||||
|
|
||||||
|
(full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
|
||||||
|
<$> matchAll sentence stash (newPartial ++ newMatches)
|
||||||
|
|
||||||
-- Produce full matches as new tokens
|
-- Produce full matches as new tokens
|
||||||
( Stash.fromList $ mapMaybe produce full
|
return ( Stash.fromList $ mapMaybe produce full
|
||||||
, matches ++ partial
|
, matches ++ partial
|
||||||
)
|
)
|
||||||
where
|
|
||||||
-- Recursively match patterns.
|
|
||||||
(full, partial) = L.partition (\(Rule {pattern}, _, _) -> null pattern)
|
|
||||||
. matchAll sentence stash
|
|
||||||
$ newPartial ++ newMatches
|
|
||||||
|
|
||||||
-- Find which `matches` can advance because of `new`.
|
|
||||||
newPartial = concatMap (matchFirst sentence new) matches
|
|
||||||
|
|
||||||
-- Find new matches resulting from newly added tokens (`new`)
|
|
||||||
-- For the first pass, pass through all rules.
|
|
||||||
-- For subsequent passes, only try rules starting with a predicate.
|
|
||||||
newMatches = if Stash.null stash
|
|
||||||
then [ x | rule <- rules, x <- matchFirst sentence new (rule, 0, []) ]
|
|
||||||
else [ x
|
|
||||||
| rule@(Rule {pattern = (Predicate _:_)}) <- rules
|
|
||||||
, x <- matchFirst sentence new (rule, 0, [])
|
|
||||||
]
|
|
||||||
|
|
||||||
-- | Produces all tokens recursively.
|
-- | Produces all tokens recursively.
|
||||||
parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Stash
|
parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
|
||||||
parseString rules sentence stash new matches
|
parseString rules sentence stash new matches = do
|
||||||
| Stash.null new' = stash
|
(new', matches') <- parseString1 rules sentence stash new matches
|
||||||
| otherwise = parseString rules sentence stash' new' matches'
|
if Stash.null new'
|
||||||
where
|
then return stash
|
||||||
stash' = Stash.union stash new'
|
else parseString rules sentence (Stash.union stash new') new' matches'
|
||||||
(new', matches') = parseString1 rules sentence stash new matches
|
|
||||||
|
|
||||||
resolveNode :: Context -> Node -> Maybe ResolvedToken
|
resolveNode :: Context -> Node -> Maybe ResolvedToken
|
||||||
resolveNode context n@Node{token = (Token _ dd), nodeRange = nodeRange} = do
|
resolveNode context n@Node{token = (Token _ dd), nodeRange = nodeRange} = do
|
||||||
|
@ -438,6 +438,7 @@ library
|
|||||||
, containers >= 0.5.6.2 && < 0.6
|
, containers >= 0.5.6.2 && < 0.6
|
||||||
, deepseq >= 1.4.1.1 && < 1.5
|
, deepseq >= 1.4.1.1 && < 1.5
|
||||||
, dependent-sum >= 0.3.2.2 && < 0.5
|
, dependent-sum >= 0.3.2.2 && < 0.5
|
||||||
|
, extra >= 1.4.10 && < 1.5
|
||||||
, hashable >= 1.2.4.0 && < 1.3
|
, hashable >= 1.2.4.0 && < 1.3
|
||||||
, regex-base >= 0.93.2 && < 0.94
|
, regex-base >= 0.93.2 && < 0.94
|
||||||
, regex-pcre >= 0.94.4 && < 0.95
|
, regex-pcre >= 0.94.4 && < 0.95
|
||||||
|
@ -28,5 +28,6 @@ tests = testGroup "Engine Tests"
|
|||||||
emptyRegexTest :: TestTree
|
emptyRegexTest :: TestTree
|
||||||
emptyRegexTest = testCase "Empty Regex Test" $
|
emptyRegexTest = testCase "Empty Regex Test" $
|
||||||
case regex "()" of
|
case regex "()" of
|
||||||
Regex regex -> assertEqual "empty result" [] $ lookupRegex regex 0 "hey"
|
Regex regex -> assertEqual "empty result" [] $
|
||||||
|
runDuckling $ lookupRegex regex 0 "hey"
|
||||||
_ -> assertFailure "expected a regex"
|
_ -> assertFailure "expected a regex"
|
||||||
|
Loading…
Reference in New Issue
Block a user