Rearrange Engine.hs to top-down ordering

Summary:
Make the code reflect the call graph, which looks roughly like this:
```
parseAndResolve
  runDuckling
  resolveNode
  parseString
    saturateParseString
    parseString1
      matchFirst
         ... low level stuff
      matchFirstAnywhere
         ... low level stuff
```

I found the existing order pretty hard to untangle when I was writing some architecture notes on this module, I think the new ordering will help

Reviewed By: chessai

Differential Revision: D28441933

fbshipit-source-id: 07c722aa6d4038baa7f14fec84660ecc2736ed2e
This commit is contained in:
Steven Troxler 2021-05-14 11:40:31 -07:00 committed by Facebook GitHub Bot
parent 13513d30a5
commit 323a7df023

View File

@ -39,11 +39,6 @@ import qualified Duckling.Types.Stash as Stash
-- -----------------------------------------------------------------
-- Engine
type Duckling a = Identity a
runDuckling :: Duckling a -> a
runDuckling ma = runIdentity ma
parseAndResolve :: [Rule] -> Text -> Context -> Options -> [ResolvedToken]
parseAndResolve rules input context options =
mapMaybe
@ -51,6 +46,84 @@ parseAndResolve rules input context options =
$ force $ Stash.toPosOrderedList
$ runDuckling $ parseString rules (Document.fromText input)
type Duckling a = Identity a
runDuckling :: Duckling a -> a
runDuckling ma = runIdentity ma
-- | A match is full if its rule pattern is empty.
-- (rule, endPosition, reversedRoute)
type Match = (Rule, Int, [Node])
resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
resolveNode context options n@Node{token = (Token dim dd), nodeRange = r}
= do
(val, latent) <- resolve context options dd
Just Resolved
{ range = r
, node = n
, rval = RVal dim val
, isLatent = latent
}
parseString :: [Rule] -> Document -> Duckling Stash
parseString rules sentence = do
(new, partialMatches) <-
-- One the first pass we try all the rules
parseString1 rules sentence Stash.empty Stash.empty []
if Stash.null new
then return Stash.empty
else
-- For subsequent passes, we only try rules starting with a predicate.
saturateParseString headPredicateRules sentence new new partialMatches
where
headPredicateRules =
[ rule | rule@Rule{pattern = (Predicate _ : _)} <- rules ]
-- | Produces all tokens recursively.
saturateParseString
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString rules sentence stash new matches = do
(new', matches') <- parseString1 rules sentence stash new matches
let stash' = Stash.union stash new'
if Stash.null new'
then return stash
else saturateParseString rules sentence stash' new' matches'
-- | Finds new matches resulting from newly added tokens.
-- Produces new tokens from full matches.
parseString1
:: [Rule] -> Document -> Stash -> Stash -> [Match]
-> Duckling (Stash, [Match])
parseString1 rules sentence stash new matches = do
-- Recursively match patterns.
-- Find which `matches` can advance because of `new`.
newPartial <- concatMapM (matchFirst sentence new) matches
-- Find new matches resulting from newly added tokens (`new`)
newMatches <- concatMapM (matchFirstAnywhere sentence new) rules
(full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
<$> matchAll sentence stash (newPartial ++ newMatches)
-- Produce full matches as new tokens
return ( Stash.fromList $ mapMaybe produce full
, partial ++ matches
)
-- | Recursively augments `matches`.
-- Discards partial matches stuck by a regex.
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll sentence stash matches = concatMapM mkNextMatches matches
where
mkNextMatches :: Match -> Duckling [Match]
mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
return $ case p of
Regex _ -> nextMatches
Predicate _ -> match:nextMatches
produce :: Match -> Maybe Node
produce (_, _, []) = Nothing
produce (Rule name _ production, _, etuor@(Node {nodeRange = Range _ e}:_)) = do
@ -65,6 +138,51 @@ produce (Rule name _ production, _, etuor@(Node {nodeRange = Range _ e}:_)) = do
}
[] -> Nothing
-- | Returns all matches matching the first pattern item of `match`,
-- resuming from a Match position
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst _ _ (Rule {pattern = []}, _, _) = return []
matchFirst sentence stash (rule@Rule{pattern = p : ps}, position, route) =
map (mkMatch route newRule) <$> lookupItem sentence p stash position
where
newRule = rule { pattern = ps }
-- | Returns all matches matching the first pattern item of `match`,
-- starting anywhere
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere _sentence _stash Rule {pattern = []} = return []
matchFirstAnywhere sentence stash rule@Rule{pattern = p : ps} =
map (mkMatch [] newRule) <$> lookupItemAnywhere sentence p stash
where
newRule = rule { pattern = ps }
-- | Handle one PatternItem at a given position
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem doc (Regex re) _ position =
filter (isPositionValid position doc) <$>
lookupRegex doc re position
lookupItem doc (Predicate p) stash position =
return $
filter (p . token) $
takeWhile (isPositionValid position doc) $
Stash.toPosOrderedListFrom stash position
-- | Handle one PatternItem anywhere in the text
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere doc (Regex re) _ = lookupRegexAnywhere doc re
lookupItemAnywhere _doc (Predicate p) stash =
return $ filter (p . token) $ Stash.toPosOrderedList stash
isPositionValid :: Int -> Document -> Node -> Bool
isPositionValid position sentence Node{nodeRange = Range start _} =
Document.isAdjacent sentence position start
{-# INLINE mkMatch #-}
mkMatch :: [Node] -> Rule -> Node -> Match
mkMatch route newRule node@Node{nodeRange = Range _ pos'} =
newRoute `seq` (newRule, pos', newRoute)
where newRoute = node:route
-- | Handle a regex match at a given position
lookupRegex :: Document -> PCRE.Regex -> Int -> Duckling [Node]
lookupRegex doc _regex position | position >= Document.length doc = return []
@ -109,121 +227,3 @@ lookupRegexCommon doc regex position matchFun = return nodes
, children = []
, rule = Nothing
}
-- | Handle one PatternItem at a given position
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
lookupItem doc (Regex re) _ position =
filter (isPositionValid position doc) <$>
lookupRegex doc re position
lookupItem doc (Predicate p) stash position =
return $
filter (p . token) $
takeWhile (isPositionValid position doc) $
Stash.toPosOrderedListFrom stash position
-- | Handle one PatternItem anywhere in the text
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
lookupItemAnywhere doc (Regex re) _ = lookupRegexAnywhere doc re
lookupItemAnywhere _doc (Predicate p) stash =
return $ filter (p . token) $ Stash.toPosOrderedList stash
isPositionValid :: Int -> Document -> Node -> Bool
isPositionValid position sentence Node{nodeRange = Range start _} =
Document.isAdjacent sentence position start
-- | A match is full if its rule pattern is empty.
-- (rule, endPosition, reversedRoute)
type Match = (Rule, Int, [Node])
-- | Recursively augments `matches`.
-- Discards partial matches stuck by a regex.
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
matchAll sentence stash matches = concatMapM mkNextMatches matches
where
mkNextMatches :: Match -> Duckling [Match]
mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
return $ case p of
Regex _ -> nextMatches
Predicate _ -> match:nextMatches
-- | Returns all matches matching the first pattern item of `match`,
-- resuming from a Match position
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
matchFirst _ _ (Rule {pattern = []}, _, _) = return []
matchFirst sentence stash (rule@Rule{pattern = p : ps}, position, route) =
map (mkMatch route newRule) <$> lookupItem sentence p stash position
where
newRule = rule { pattern = ps }
-- | Returns all matches matching the first pattern item of `match`,
-- starting anywhere
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
matchFirstAnywhere _sentence _stash Rule {pattern = []} = return []
matchFirstAnywhere sentence stash rule@Rule{pattern = p : ps} =
map (mkMatch [] newRule) <$> lookupItemAnywhere sentence p stash
where
newRule = rule { pattern = ps }
{-# INLINE mkMatch #-}
mkMatch :: [Node] -> Rule -> Node -> Match
mkMatch route newRule node@Node{nodeRange = Range _ pos'} =
newRoute `seq` (newRule, pos', newRoute)
where newRoute = node:route
-- | Finds new matches resulting from newly added tokens.
-- Produces new tokens from full matches.
parseString1
:: [Rule] -> Document -> Stash -> Stash -> [Match]
-> Duckling (Stash, [Match])
parseString1 rules sentence stash new matches = do
-- Recursively match patterns.
-- Find which `matches` can advance because of `new`.
newPartial <- concatMapM (matchFirst sentence new) matches
-- Find new matches resulting from newly added tokens (`new`)
newMatches <- concatMapM (matchFirstAnywhere sentence new) rules
(full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
<$> matchAll sentence stash (newPartial ++ newMatches)
-- Produce full matches as new tokens
return ( Stash.fromList $ mapMaybe produce full
, partial ++ matches
)
-- | Produces all tokens recursively.
saturateParseString
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
saturateParseString rules sentence stash new matches = do
(new', matches') <- parseString1 rules sentence stash new matches
let stash' = Stash.union stash new'
if Stash.null new'
then return stash
else saturateParseString rules sentence stash' new' matches'
parseString :: [Rule] -> Document -> Duckling Stash
parseString rules sentence = do
(new, partialMatches) <-
-- One the first pass we try all the rules
parseString1 rules sentence Stash.empty Stash.empty []
if Stash.null new
then return Stash.empty
else
-- For subsequent passes, we only try rules starting with a predicate.
saturateParseString headPredicateRules sentence new new partialMatches
where
headPredicateRules =
[ rule | rule@Rule{pattern = (Predicate _ : _)} <- rules ]
resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
resolveNode context options n@Node{token = (Token dim dd), nodeRange = r}
= do
(val, latent) <- resolve context options dd
Just Resolved
{ range = r
, node = n
, rval = RVal dim val
, isLatent = latent
}