Duckling monad for Engine

Summary:
This converts the code to monadic style, so that
we can in the future:
* stop threading the `Document` parameter everywhere
* keep some state, like regexp match cache (I've already checked that it makes a substantial difference)

There should be no difference in performance or behavior
at this point.

Reviewed By: patapizza

Differential Revision: D4778808

fbshipit-source-id: a167ed8
This commit is contained in:
Bartosz Nitka 2017-03-31 14:05:57 -07:00 committed by Facebook Github Bot
parent 78228dea83
commit e37bb7c186
3 changed files with 60 additions and 47 deletions

View File

@ -14,12 +14,15 @@
module Duckling.Engine
( parseAndResolve
, lookupRegex
, runDuckling
) where
import Control.DeepSeq
import Control.Monad.Extra
import Data.Aeson
import Data.ByteString (ByteString)
import qualified Data.Char as Char
import Data.Functor.Identity
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
@ -41,9 +44,14 @@ import Duckling.Stash (Stash)
-- -----------------------------------------------------------------
-- Engine
type Duckling a = Identity a
runDuckling :: Duckling a -> a
runDuckling ma = runIdentity ma
parseAndResolve :: [Rule] -> Text -> Context -> [ResolvedToken]
parseAndResolve rules input context = mapMaybe (resolveNode context) .
force $ Stash.toPosOrderedList $
force $ Stash.toPosOrderedList $ runDuckling $
parseString rules (mkDocument input) Stash.empty Stash.empty []
produce :: Match -> Maybe Node
@ -77,8 +85,9 @@ isRangeValid Document { indexable = s } (Range start end) =
isDifferent :: Char -> Char -> Bool
isDifferent a b = charClass a /= charClass b
lookupRegex :: PCRE.Regex -> Int -> Document -> [Node]
lookupRegex regex position Document { rawInput = s } = nodes
lookupRegex :: PCRE.Regex -> Int -> Document -> Duckling [Node]
lookupRegex regex position Document { rawInput = s } =
return nodes
where
ss = Text.drop position s
(nodes, _, _) = L.foldl' f ([], ss, position) $ match regex ss
@ -98,12 +107,13 @@ lookupRegex regex position Document { rawInput = s } = nodes
, rule = Nothing
}
lookupItem :: Document -> PatternItem -> Stash -> Int -> [Node]
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem s (Regex re) _ position =
filter (\node -> isRangeValid s (nodeRange node) &&
isPositionValid position s node) $
isPositionValid position s node) <$>
lookupRegex re position s
lookupItem s (Predicate p) stash position =
return $
filter (p . token) $
takeWhile (isPositionValid position s) $
Stash.toPosOrderedListFrom stash position
@ -118,61 +128,62 @@ type Match = (Rule, Int, [Node])
-- | Recursively augments `matches`.
-- Discards partial matches stuck by a regex.
matchAll :: Document -> Stash -> [Match] -> [Match]
matchAll sentence stash matches = concatMap mkNextMatches matches
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll sentence stash matches = concatMapM mkNextMatches matches
where
mkNextMatches :: Match -> [Match]
mkNextMatches match@(Rule {pattern = []}, _, _) = [ match ]
mkNextMatches match@(Rule {pattern = p:_}, _, _) =
let nextMatches = matchAll sentence stash $ matchFirst sentence stash match
in case p of
mkNextMatches :: Match -> Duckling [Match]
mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
return $ case p of
Regex _ -> nextMatches
Predicate _ -> match:nextMatches
-- | Returns all matches matching the first pattern item of `match`.
matchFirst :: Document -> Stash -> Match -> [Match]
matchFirst _ _ (Rule {pattern = []}, _, _) = []
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst _ _ (Rule {pattern = []}, _, _) = return []
matchFirst sentence stash (rule@(Rule {pattern = p:ps}), position, route) =
map (\node@Node {nodeRange = Range _ pos'} ->
(rule {pattern = ps}, pos', node:route)
) $ lookupItem sentence p stash position
let newRoute = node:route
in newRoute `seq` (newRule, pos', newRoute)
) <$> lookupItem sentence p stash position
where
newRule = rule { pattern = ps }
-- | Finds new matches resulting from newly added tokens.
-- Produces new tokens from full matches.
parseString1
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> (Stash, [Match])
parseString1 rules sentence stash new matches =
:: [Rule] -> Document -> Stash -> Stash -> [Match]
-> Duckling (Stash, [Match])
parseString1 rules sentence stash new matches = do
-- Recursively match patterns.
-- Find which `matches` can advance because of `new`.
newPartial <- concatMapM (matchFirst sentence new) matches
-- Find new matches resulting from newly added tokens (`new`)
-- For the first pass, pass through all rules.
-- For subsequent passes, only try rules starting with a predicate.
let match rule = matchFirst sentence new (rule, 0, [])
newMatches <- if Stash.null stash
then concatMapM match rules
else concatMapM match
[ rule | rule@(Rule {pattern = (Predicate _:_)}) <- rules ]
(full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
<$> matchAll sentence stash (newPartial ++ newMatches)
-- Produce full matches as new tokens
( Stash.fromList $ mapMaybe produce full
, matches ++ partial
)
where
-- Recursively match patterns.
(full, partial) = L.partition (\(Rule {pattern}, _, _) -> null pattern)
. matchAll sentence stash
$ newPartial ++ newMatches
-- Find which `matches` can advance because of `new`.
newPartial = concatMap (matchFirst sentence new) matches
-- Find new matches resulting from newly added tokens (`new`)
-- For the first pass, pass through all rules.
-- For subsequent passes, only try rules starting with a predicate.
newMatches = if Stash.null stash
then [ x | rule <- rules, x <- matchFirst sentence new (rule, 0, []) ]
else [ x
| rule@(Rule {pattern = (Predicate _:_)}) <- rules
, x <- matchFirst sentence new (rule, 0, [])
]
return ( Stash.fromList $ mapMaybe produce full
, matches ++ partial
)
-- | Produces all tokens recursively.
parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Stash
parseString rules sentence stash new matches
| Stash.null new' = stash
| otherwise = parseString rules sentence stash' new' matches'
where
stash' = Stash.union stash new'
(new', matches') = parseString1 rules sentence stash new matches
parseString :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
parseString rules sentence stash new matches = do
(new', matches') <- parseString1 rules sentence stash new matches
if Stash.null new'
then return stash
else parseString rules sentence (Stash.union stash new') new' matches'
resolveNode :: Context -> Node -> Maybe ResolvedToken
resolveNode context n@Node{token = (Token _ dd), nodeRange = nodeRange} = do

View File

@ -438,6 +438,7 @@ library
, containers >= 0.5.6.2 && < 0.6
, deepseq >= 1.4.1.1 && < 1.5
, dependent-sum >= 0.3.2.2 && < 0.5
, extra >= 1.4.10 && < 1.5
, hashable >= 1.2.4.0 && < 1.3
, regex-base >= 0.93.2 && < 0.94
, regex-pcre >= 0.94.4 && < 0.95

View File

@ -28,5 +28,6 @@ tests = testGroup "Engine Tests"
emptyRegexTest :: TestTree
emptyRegexTest = testCase "Empty Regex Test" $
case regex "()" of
Regex regex -> assertEqual "empty result" [] $ lookupRegex regex 0 "hey"
Regex regex -> assertEqual "empty result" [] $
runDuckling $ lookupRegex regex 0 "hey"
_ -> assertFailure "expected a regex"