From 323a7df023d3aafc16918fd0b4387153aaa49a49 Mon Sep 17 00:00:00 2001 From: Steven Troxler Date: Fri, 14 May 2021 11:40:31 -0700 Subject: [PATCH] Rearrange Engine.hs to top-down ordering Summary: Make the code reflect the call graph, which looks roughly like this: ``` parseAndResolve runDuckling resolveNode parseString saturateParseString parseString1 matchFirst ... low level stuff matchFirstAnywhere ... low level stuff ``` I found the existing order pretty hard to untangle when I was writing some architecture notes on this module, I think the new ordering will help Reviewed By: chessai Differential Revision: D28441933 fbshipit-source-id: 07c722aa6d4038baa7f14fec84660ecc2736ed2e --- Duckling/Engine.hs | 246 ++++++++++++++++++++++----------------------- 1 file changed, 123 insertions(+), 123 deletions(-) diff --git a/Duckling/Engine.hs b/Duckling/Engine.hs index 47b28b35..7fda0eb6 100644 --- a/Duckling/Engine.hs +++ b/Duckling/Engine.hs @@ -39,11 +39,6 @@ import qualified Duckling.Types.Stash as Stash -- ----------------------------------------------------------------- -- Engine -type Duckling a = Identity a - -runDuckling :: Duckling a -> a -runDuckling ma = runIdentity ma - parseAndResolve :: [Rule] -> Text -> Context -> Options -> [ResolvedToken] parseAndResolve rules input context options = mapMaybe @@ -51,6 +46,84 @@ parseAndResolve rules input context options = $ force $ Stash.toPosOrderedList $ runDuckling $ parseString rules (Document.fromText input) +type Duckling a = Identity a + +runDuckling :: Duckling a -> a +runDuckling ma = runIdentity ma + +-- | A match is full if its rule pattern is empty. +-- (rule, endPosition, reversedRoute) +type Match = (Rule, Int, [Node]) + +resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken +resolveNode context options n@Node{token = (Token dim dd), nodeRange = r} + = do + (val, latent) <- resolve context options dd + Just Resolved + { range = r + , node = n + , rval = RVal dim val + , isLatent = latent + } + +parseString :: [Rule] -> Document -> Duckling Stash +parseString rules sentence = do + (new, partialMatches) <- + -- One the first pass we try all the rules + parseString1 rules sentence Stash.empty Stash.empty [] + if Stash.null new + then return Stash.empty + else + -- For subsequent passes, we only try rules starting with a predicate. + saturateParseString headPredicateRules sentence new new partialMatches + where + headPredicateRules = + [ rule | rule@Rule{pattern = (Predicate _ : _)} <- rules ] + +-- | Produces all tokens recursively. +saturateParseString + :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash +saturateParseString rules sentence stash new matches = do + (new', matches') <- parseString1 rules sentence stash new matches + let stash' = Stash.union stash new' + if Stash.null new' + then return stash + else saturateParseString rules sentence stash' new' matches' + +-- | Finds new matches resulting from newly added tokens. +-- Produces new tokens from full matches. +parseString1 + :: [Rule] -> Document -> Stash -> Stash -> [Match] + -> Duckling (Stash, [Match]) +parseString1 rules sentence stash new matches = do + -- Recursively match patterns. + -- Find which `matches` can advance because of `new`. + newPartial <- concatMapM (matchFirst sentence new) matches + + -- Find new matches resulting from newly added tokens (`new`) + newMatches <- concatMapM (matchFirstAnywhere sentence new) rules + + (full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern) + <$> matchAll sentence stash (newPartial ++ newMatches) + + -- Produce full matches as new tokens + return ( Stash.fromList $ mapMaybe produce full + , partial ++ matches + ) + +-- | Recursively augments `matches`. +-- Discards partial matches stuck by a regex. +matchAll :: Document -> Stash -> [Match] -> Duckling [Match] +matchAll sentence stash matches = concatMapM mkNextMatches matches + where + mkNextMatches :: Match -> Duckling [Match] + mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ] + mkNextMatches match@(Rule {pattern = p:_}, _, _) = do + nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match + return $ case p of + Regex _ -> nextMatches + Predicate _ -> match:nextMatches + produce :: Match -> Maybe Node produce (_, _, []) = Nothing produce (Rule name _ production, _, etuor@(Node {nodeRange = Range _ e}:_)) = do @@ -65,6 +138,51 @@ produce (Rule name _ production, _, etuor@(Node {nodeRange = Range _ e}:_)) = do } [] -> Nothing +-- | Returns all matches matching the first pattern item of `match`, +-- resuming from a Match position +matchFirst :: Document -> Stash -> Match -> Duckling [Match] +matchFirst _ _ (Rule {pattern = []}, _, _) = return [] +matchFirst sentence stash (rule@Rule{pattern = p : ps}, position, route) = + map (mkMatch route newRule) <$> lookupItem sentence p stash position + where + newRule = rule { pattern = ps } + +-- | Returns all matches matching the first pattern item of `match`, +-- starting anywhere +matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match] +matchFirstAnywhere _sentence _stash Rule {pattern = []} = return [] +matchFirstAnywhere sentence stash rule@Rule{pattern = p : ps} = + map (mkMatch [] newRule) <$> lookupItemAnywhere sentence p stash + where + newRule = rule { pattern = ps } + +-- | Handle one PatternItem at a given position +lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node] +lookupItem doc (Regex re) _ position = + filter (isPositionValid position doc) <$> + lookupRegex doc re position +lookupItem doc (Predicate p) stash position = + return $ + filter (p . token) $ + takeWhile (isPositionValid position doc) $ + Stash.toPosOrderedListFrom stash position + +-- | Handle one PatternItem anywhere in the text +lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node] +lookupItemAnywhere doc (Regex re) _ = lookupRegexAnywhere doc re +lookupItemAnywhere _doc (Predicate p) stash = + return $ filter (p . token) $ Stash.toPosOrderedList stash + +isPositionValid :: Int -> Document -> Node -> Bool +isPositionValid position sentence Node{nodeRange = Range start _} = + Document.isAdjacent sentence position start + +{-# INLINE mkMatch #-} +mkMatch :: [Node] -> Rule -> Node -> Match +mkMatch route newRule node@Node{nodeRange = Range _ pos'} = + newRoute `seq` (newRule, pos', newRoute) + where newRoute = node:route + -- | Handle a regex match at a given position lookupRegex :: Document -> PCRE.Regex -> Int -> Duckling [Node] lookupRegex doc _regex position | position >= Document.length doc = return [] @@ -109,121 +227,3 @@ lookupRegexCommon doc regex position matchFun = return nodes , children = [] , rule = Nothing } - --- | Handle one PatternItem at a given position -lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node] -lookupItem doc (Regex re) _ position = - filter (isPositionValid position doc) <$> - lookupRegex doc re position -lookupItem doc (Predicate p) stash position = - return $ - filter (p . token) $ - takeWhile (isPositionValid position doc) $ - Stash.toPosOrderedListFrom stash position - --- | Handle one PatternItem anywhere in the text -lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node] -lookupItemAnywhere doc (Regex re) _ = lookupRegexAnywhere doc re -lookupItemAnywhere _doc (Predicate p) stash = - return $ filter (p . token) $ Stash.toPosOrderedList stash - -isPositionValid :: Int -> Document -> Node -> Bool -isPositionValid position sentence Node{nodeRange = Range start _} = - Document.isAdjacent sentence position start - --- | A match is full if its rule pattern is empty. --- (rule, endPosition, reversedRoute) -type Match = (Rule, Int, [Node]) - --- | Recursively augments `matches`. --- Discards partial matches stuck by a regex. -matchAll :: Document -> Stash -> [Match] -> Duckling [Match] -matchAll sentence stash matches = concatMapM mkNextMatches matches - where - mkNextMatches :: Match -> Duckling [Match] - mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ] - mkNextMatches match@(Rule {pattern = p:_}, _, _) = do - nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match - return $ case p of - Regex _ -> nextMatches - Predicate _ -> match:nextMatches - --- | Returns all matches matching the first pattern item of `match`, --- resuming from a Match position -matchFirst :: Document -> Stash -> Match -> Duckling [Match] -matchFirst _ _ (Rule {pattern = []}, _, _) = return [] -matchFirst sentence stash (rule@Rule{pattern = p : ps}, position, route) = - map (mkMatch route newRule) <$> lookupItem sentence p stash position - where - newRule = rule { pattern = ps } - --- | Returns all matches matching the first pattern item of `match`, --- starting anywhere -matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match] -matchFirstAnywhere _sentence _stash Rule {pattern = []} = return [] -matchFirstAnywhere sentence stash rule@Rule{pattern = p : ps} = - map (mkMatch [] newRule) <$> lookupItemAnywhere sentence p stash - where - newRule = rule { pattern = ps } - -{-# INLINE mkMatch #-} -mkMatch :: [Node] -> Rule -> Node -> Match -mkMatch route newRule node@Node{nodeRange = Range _ pos'} = - newRoute `seq` (newRule, pos', newRoute) - where newRoute = node:route - --- | Finds new matches resulting from newly added tokens. --- Produces new tokens from full matches. -parseString1 - :: [Rule] -> Document -> Stash -> Stash -> [Match] - -> Duckling (Stash, [Match]) -parseString1 rules sentence stash new matches = do - -- Recursively match patterns. - -- Find which `matches` can advance because of `new`. - newPartial <- concatMapM (matchFirst sentence new) matches - - -- Find new matches resulting from newly added tokens (`new`) - newMatches <- concatMapM (matchFirstAnywhere sentence new) rules - - (full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern) - <$> matchAll sentence stash (newPartial ++ newMatches) - - -- Produce full matches as new tokens - return ( Stash.fromList $ mapMaybe produce full - , partial ++ matches - ) - --- | Produces all tokens recursively. -saturateParseString - :: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash -saturateParseString rules sentence stash new matches = do - (new', matches') <- parseString1 rules sentence stash new matches - let stash' = Stash.union stash new' - if Stash.null new' - then return stash - else saturateParseString rules sentence stash' new' matches' - -parseString :: [Rule] -> Document -> Duckling Stash -parseString rules sentence = do - (new, partialMatches) <- - -- One the first pass we try all the rules - parseString1 rules sentence Stash.empty Stash.empty [] - if Stash.null new - then return Stash.empty - else - -- For subsequent passes, we only try rules starting with a predicate. - saturateParseString headPredicateRules sentence new new partialMatches - where - headPredicateRules = - [ rule | rule@Rule{pattern = (Predicate _ : _)} <- rules ] - -resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken -resolveNode context options n@Node{token = (Token dim dd), nodeRange = r} - = do - (val, latent) <- resolve context options dd - Just Resolved - { range = r - , node = n - , rval = RVal dim val - , isLatent = latent - }