Duckling monad for Engine

Summary: This converts the code to monadic style, so that we can in the future: * stop threading the `Document` parameter everywhere * keep some state, like regexp match cache (I've already checked that it makes a substantial difference) There should be no difference in performance or behavior at this point. Reviewed By: patapizza Differential Revision: D4778808 fbshipit-source-id: a167ed8
2025-01-06 04:53:13 +03:00 · 2017-03-31 14:05:57 -07:00 · 2017-03-31 14:05:57 -07:00 · e37bb7c186
commit e37bb7c186
parent 78228dea83
3 changed files with 60 additions and 47 deletions
--- a/Duckling/Engine.hs
+++ b/Duckling/Engine.hs
@ -14,12 +14,15 @@
 module Duckling.Engine
  ( parseAndResolve
  , lookupRegex
+  , runDuckling
  ) where

 import Control.DeepSeq
+import Control.Monad.Extra
 import Data.Aeson
 import Data.ByteString (ByteString)
 import qualified Data.Char as Char
+import Data.Functor.Identity
 import Data.Maybe
 import Data.Text (Text)
 import qualified Data.Text as Text
@ -41,9 +44,14 @@ import Duckling.Stash (Stash)
 -- -----------------------------------------------------------------
 -- Engine

+type Duckling a = Identity a
+
+runDuckling :: Duckling a -> a
+runDuckling ma = runIdentity ma
+
 parseAndResolve :: [Rule] -> Text -> Context -> [ResolvedToken]
 parseAndResolve rules input context = mapMaybe (resolveNode context) .
-  force $ Stash.toPosOrderedList $
+  force $ Stash.toPosOrderedList $ runDuckling $
  parseString rules (mkDocument input) Stash.empty Stash.empty []

 produce :: Match -> Maybe Node
@ -77,8 +85,9 @@ isRangeValid Document { indexable = s } (Range start end) =
    isDifferent :: Char -> Char -> Bool
    isDifferent a b = charClass a /= charClass b

-lookupRegex :: PCRE.Regex -> Int -> Document -> [Node]
-lookupRegex regex position Document { rawInput = s } = nodes
+lookupRegex :: PCRE.Regex -> Int -> Document -> Duckling [Node]
+lookupRegex regex position Document { rawInput = s } =
+  return nodes
  where
    ss = Text.drop position s
    (nodes, _, _) = L.foldl' f ([], ss, position) $ match regex ss
@ -98,12 +107,13 @@ lookupRegex regex position Document { rawInput = s } = nodes
          , rule = Nothing
          }

-lookupItem :: Document -> PatternItem -> Stash -> Int -> [Node]
+lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
 lookupItem s (Regex re) _ position =
  filter (\node -> isRangeValid s (nodeRange node) &&
-                   isPositionValid position s node) $
+                   isPositionValid position s node) <$>
  lookupRegex re position s
 lookupItem s (Predicate p) stash position =
+  return $
  filter (p . token) $
  takeWhile (isPositionValid position s) $
  Stash.toPosOrderedListFrom stash position
@ -118,61 +128,62 @@ type Match = (Rule, Int, [Node])

 -- | Recursively augments `matches`.
 -- Discards partial matches stuck by a regex.
-matchAll :: Document -> Stash -> [Match] -> [Match]
-matchAll sentence stash matches = concatMap mkNextMatches matches
+matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
+matchAll sentence stash matches = concatMapM mkNextMatches matches
  where
-    mkNextMatches :: Match -> [Match]
-    mkNextMatches match@(Rule {pattern = []}, _, _) = [ match ]
-    mkNextMatches match@(Rule {pattern = p:_}, _, _) =
-      let nextMatches = matchAll sentence stash $ matchFirst sentence stash match
-      in case p of
+    mkNextMatches :: Match -> Duckling [Match]
+    mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
+    mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
+      nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
+      return $ case p of
        Regex _ -> nextMatches
        Predicate _ -> match:nextMatches

 -- | Returns all matches matching the first pattern item of `match`.
-matchFirst :: Document -> Stash -> Match -> [Match]
-matchFirst _ _ (Rule {pattern = []}, _, _) = []
+matchFirst :: Document -> Stash -> Match -> Duckling [Match]
+matchFirst _ _ (Rule {pattern = []}, _, _) = return []
 matchFirst sentence stash (rule@(Rule {pattern = p:ps}), position, route) =
  map (\node@Node {nodeRange = Range _ pos'} ->
-    (rule {pattern = ps}, pos', node:route)
-  ) $ lookupItem sentence p stash position
+    let newRoute = node:route
+    in newRoute `seq` (newRule, pos', newRoute)
+  ) <$> lookupItem sentence p stash position
+  where
+  newRule = rule { pattern = ps }

 -- | Finds new matches resulting from newly added tokens.
 -- Produces new tokens from full matches.
 parseString1
-  :: [Rule] -> Document -> Stash -> Stash -> [Match] -> (Stash, [Match])
-parseString1 rules sentence stash new matches =
+  :: [Rule] -> Document -> Stash -> Stash -> [Match]
+  -> Duckling (Stash, [Match])
+parseString1 rules sentence stash new matches = do
+  -- Recursively match patterns.
+  -- Find which `matches` can advance because of `new`.
+  newPartial <- concatMapM (matchFirst sentence new) matches
+
+  -- Find new matches resulting from newly added tokens (`new`)
+  -- For the first pass, pass through all rules.
+  -- For subsequent passes, only try rules starting with a predicate.
+  let match rule = matchFirst sentence new (rule, 0, [])
+  newMatches <- if Stash.null stash
+    then concatMapM match rules
+    else concatMapM match
+           [ rule | rule@(Rule {pattern = (Predicate _:_)}) <- rules ]
+
+  (full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
+    <$> matchAll sentence stash (newPartial ++ newMatches)
+
  -- Produce full matches as new tokens
-  ( Stash.fromList $ mapMaybe produce full
-  , matches ++ partial
-  )
-  where
-    -- Recursively match patterns.
-    (full, partial) = L.partition (\(Rule {pattern}, _, _) -> null pattern)
-      . matchAll sentence stash
-      $ newPartial ++ newMatches
-
-    -- Find which `matches` can advance because of `new`.
-    newPartial = concatMap (matchFirst sentence new) matches
-
-    -- Find new matches resulting from newly added tokens (`new`)
-    -- For the first pass, pass through all rules.
-    -- For subsequent passes, only try rules starting with a predicate.
-    newMatches = if Stash.null stash
-      then [ x | rule <- rules, x <- matchFirst sentence new (rule, 0, []) ]
-      else [ x
-           | rule@(Rule {pattern = (Predicate _:_)}) <- rules
-           , x <- matchFirst sentence new (rule, 0, [])
-           ]
+  return ( Stash.fromList $ mapMaybe produce full
+         , matches ++ partial
+         )

 -- | Produces all tokens recursively.
-parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Stash
-parseString rules sentence stash new matches
-  | Stash.null new' = stash
-  | otherwise = parseString rules sentence stash' new' matches'
-  where
-    stash' = Stash.union stash new'
-    (new', matches') = parseString1 rules sentence stash new matches
+parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
+parseString rules sentence stash new matches = do
+  (new', matches') <- parseString1 rules sentence stash new matches
+  if Stash.null new'
+    then return stash
+    else parseString rules sentence (Stash.union stash new') new' matches'

 resolveNode :: Context -> Node -> Maybe ResolvedToken
 resolveNode context n@Node{token = (Token _ dd), nodeRange = nodeRange} = do
--- a/duckling.cabal
+++ b/duckling.cabal
@ -438,6 +438,7 @@ library
                     , containers            >= 0.5.6.2 && < 0.6
                     , deepseq               >= 1.4.1.1 && < 1.5
                     , dependent-sum         >= 0.3.2.2 && < 0.5
+                     , extra                 >= 1.4.10 && < 1.5
                     , hashable              >= 1.2.4.0 && < 1.3
                     , regex-base            >= 0.93.2 && < 0.94
                     , regex-pcre            >= 0.94.4 && < 0.95
--- a/tests/Duckling/Engine/Tests.hs
+++ b/tests/Duckling/Engine/Tests.hs
@ -28,5 +28,6 @@ tests = testGroup "Engine Tests"
 emptyRegexTest :: TestTree
 emptyRegexTest = testCase "Empty Regex Test" $
  case regex "()" of
-    Regex regex -> assertEqual "empty result" [] $ lookupRegex regex 0 "hey"
+    Regex regex -> assertEqual "empty result" [] $
+      runDuckling $ lookupRegex regex 0 "hey"
    _ -> assertFailure "expected a regex"