Duckling monad for Engine

Summary:
This converts the code to monadic style, so that
we can in the future:
* stop threading the `Document` parameter everywhere
* keep some state, like regexp match cache (I've already checked that it makes a substantial difference)

There should be no difference in performance or behavior
at this point.

Reviewed By: patapizza

Differential Revision: D4778808

fbshipit-source-id: a167ed8
This commit is contained in:
Bartosz Nitka 2017-03-31 14:05:57 -07:00 committed by Facebook Github Bot
parent 78228dea83
commit e37bb7c186
3 changed files with 60 additions and 47 deletions

View File

@ -14,12 +14,15 @@
module Duckling.Engine module Duckling.Engine
( parseAndResolve ( parseAndResolve
, lookupRegex , lookupRegex
, runDuckling
) where ) where
import Control.DeepSeq import Control.DeepSeq
import Control.Monad.Extra
import Data.Aeson import Data.Aeson
import Data.ByteString (ByteString) import Data.ByteString (ByteString)
import qualified Data.Char as Char import qualified Data.Char as Char
import Data.Functor.Identity
import Data.Maybe import Data.Maybe
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as Text import qualified Data.Text as Text
@ -41,9 +44,14 @@ import Duckling.Stash (Stash)
-- ----------------------------------------------------------------- -- -----------------------------------------------------------------
-- Engine -- Engine
type Duckling a = Identity a
runDuckling :: Duckling a -> a
runDuckling ma = runIdentity ma
parseAndResolve :: [Rule] -> Text -> Context -> [ResolvedToken] parseAndResolve :: [Rule] -> Text -> Context -> [ResolvedToken]
parseAndResolve rules input context = mapMaybe (resolveNode context) . parseAndResolve rules input context = mapMaybe (resolveNode context) .
force $ Stash.toPosOrderedList $ force $ Stash.toPosOrderedList $ runDuckling $
parseString rules (mkDocument input) Stash.empty Stash.empty [] parseString rules (mkDocument input) Stash.empty Stash.empty []
produce :: Match -> Maybe Node produce :: Match -> Maybe Node
@ -77,8 +85,9 @@ isRangeValid Document { indexable = s } (Range start end) =
isDifferent :: Char -> Char -> Bool isDifferent :: Char -> Char -> Bool
isDifferent a b = charClass a /= charClass b isDifferent a b = charClass a /= charClass b
lookupRegex :: PCRE.Regex -> Int -> Document -> [Node] lookupRegex :: PCRE.Regex -> Int -> Document -> Duckling [Node]
lookupRegex regex position Document { rawInput = s } = nodes lookupRegex regex position Document { rawInput = s } =
return nodes
where where
ss = Text.drop position s ss = Text.drop position s
(nodes, _, _) = L.foldl' f ([], ss, position) $ match regex ss (nodes, _, _) = L.foldl' f ([], ss, position) $ match regex ss
@ -98,12 +107,13 @@ lookupRegex regex position Document { rawInput = s } = nodes
, rule = Nothing , rule = Nothing
} }
lookupItem :: Document -> PatternItem -> Stash -> Int -> [Node] lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem s (Regex re) _ position = lookupItem s (Regex re) _ position =
filter (\node -> isRangeValid s (nodeRange node) && filter (\node -> isRangeValid s (nodeRange node) &&
isPositionValid position s node) $ isPositionValid position s node) <$>
lookupRegex re position s lookupRegex re position s
lookupItem s (Predicate p) stash position = lookupItem s (Predicate p) stash position =
return $
filter (p . token) $ filter (p . token) $
takeWhile (isPositionValid position s) $ takeWhile (isPositionValid position s) $
Stash.toPosOrderedListFrom stash position Stash.toPosOrderedListFrom stash position
@ -118,61 +128,62 @@ type Match = (Rule, Int, [Node])
-- | Recursively augments `matches`. -- | Recursively augments `matches`.
-- Discards partial matches stuck by a regex. -- Discards partial matches stuck by a regex.
matchAll :: Document -> Stash -> [Match] -> [Match] matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll sentence stash matches = concatMap mkNextMatches matches matchAll sentence stash matches = concatMapM mkNextMatches matches
where where
mkNextMatches :: Match -> [Match] mkNextMatches :: Match -> Duckling [Match]
mkNextMatches match@(Rule {pattern = []}, _, _) = [ match ] mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
mkNextMatches match@(Rule {pattern = p:_}, _, _) = mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
let nextMatches = matchAll sentence stash $ matchFirst sentence stash match nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
in case p of return $ case p of
Regex _ -> nextMatches Regex _ -> nextMatches
Predicate _ -> match:nextMatches Predicate _ -> match:nextMatches
-- | Returns all matches matching the first pattern item of `match`. -- | Returns all matches matching the first pattern item of `match`.
matchFirst :: Document -> Stash -> Match -> [Match] matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst _ _ (Rule {pattern = []}, _, _) = [] matchFirst _ _ (Rule {pattern = []}, _, _) = return []
matchFirst sentence stash (rule@(Rule {pattern = p:ps}), position, route) = matchFirst sentence stash (rule@(Rule {pattern = p:ps}), position, route) =
map (\node@Node {nodeRange = Range _ pos'} -> map (\node@Node {nodeRange = Range _ pos'} ->
(rule {pattern = ps}, pos', node:route) let newRoute = node:route
) $ lookupItem sentence p stash position in newRoute `seq` (newRule, pos', newRoute)
) <$> lookupItem sentence p stash position
where
newRule = rule { pattern = ps }
-- | Finds new matches resulting from newly added tokens. -- | Finds new matches resulting from newly added tokens.
-- Produces new tokens from full matches. -- Produces new tokens from full matches.
parseString1 parseString1
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> (Stash, [Match]) :: [Rule] -> Document -> Stash -> Stash -> [Match]
parseString1 rules sentence stash new matches = -> Duckling (Stash, [Match])
parseString1 rules sentence stash new matches = do
-- Recursively match patterns.
-- Find which `matches` can advance because of `new`.
newPartial <- concatMapM (matchFirst sentence new) matches
-- Find new matches resulting from newly added tokens (`new`)
-- For the first pass, pass through all rules.
-- For subsequent passes, only try rules starting with a predicate.
let match rule = matchFirst sentence new (rule, 0, [])
newMatches <- if Stash.null stash
then concatMapM match rules
else concatMapM match
[ rule | rule@(Rule {pattern = (Predicate _:_)}) <- rules ]
(full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
<$> matchAll sentence stash (newPartial ++ newMatches)
-- Produce full matches as new tokens -- Produce full matches as new tokens
( Stash.fromList $ mapMaybe produce full return ( Stash.fromList $ mapMaybe produce full
, matches ++ partial , matches ++ partial
) )
where
-- Recursively match patterns.
(full, partial) = L.partition (\(Rule {pattern}, _, _) -> null pattern)
. matchAll sentence stash
$ newPartial ++ newMatches
-- Find which `matches` can advance because of `new`.
newPartial = concatMap (matchFirst sentence new) matches
-- Find new matches resulting from newly added tokens (`new`)
-- For the first pass, pass through all rules.
-- For subsequent passes, only try rules starting with a predicate.
newMatches = if Stash.null stash
then [ x | rule <- rules, x <- matchFirst sentence new (rule, 0, []) ]
else [ x
| rule@(Rule {pattern = (Predicate _:_)}) <- rules
, x <- matchFirst sentence new (rule, 0, [])
]
-- | Produces all tokens recursively. -- | Produces all tokens recursively.
parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Stash parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
parseString rules sentence stash new matches parseString rules sentence stash new matches = do
| Stash.null new' = stash (new', matches') <- parseString1 rules sentence stash new matches
| otherwise = parseString rules sentence stash' new' matches' if Stash.null new'
where then return stash
stash' = Stash.union stash new' else parseString rules sentence (Stash.union stash new') new' matches'
(new', matches') = parseString1 rules sentence stash new matches
resolveNode :: Context -> Node -> Maybe ResolvedToken resolveNode :: Context -> Node -> Maybe ResolvedToken
resolveNode context n@Node{token = (Token _ dd), nodeRange = nodeRange} = do resolveNode context n@Node{token = (Token _ dd), nodeRange = nodeRange} = do

View File

@ -438,6 +438,7 @@ library
, containers >= 0.5.6.2 && < 0.6 , containers >= 0.5.6.2 && < 0.6
, deepseq >= 1.4.1.1 && < 1.5 , deepseq >= 1.4.1.1 && < 1.5
, dependent-sum >= 0.3.2.2 && < 0.5 , dependent-sum >= 0.3.2.2 && < 0.5
, extra >= 1.4.10 && < 1.5
, hashable >= 1.2.4.0 && < 1.3 , hashable >= 1.2.4.0 && < 1.3
, regex-base >= 0.93.2 && < 0.94 , regex-base >= 0.93.2 && < 0.94
, regex-pcre >= 0.94.4 && < 0.95 , regex-pcre >= 0.94.4 && < 0.95

View File

@ -28,5 +28,6 @@ tests = testGroup "Engine Tests"
emptyRegexTest :: TestTree emptyRegexTest :: TestTree
emptyRegexTest = testCase "Empty Regex Test" $ emptyRegexTest = testCase "Empty Regex Test" $
case regex "()" of case regex "()" of
Regex regex -> assertEqual "empty result" [] $ lookupRegex regex 0 "hey" Regex regex -> assertEqual "empty result" [] $
runDuckling $ lookupRegex regex 0 "hey"
_ -> assertFailure "expected a regex" _ -> assertFailure "expected a regex"