mirror of
https://github.com/facebook/duckling.git
synced 2024-11-24 07:23:03 +03:00
Rearrange Engine.hs to top-down ordering
Summary: Make the code reflect the call graph, which looks roughly like this: ``` parseAndResolve runDuckling resolveNode parseString saturateParseString parseString1 matchFirst ... low level stuff matchFirstAnywhere ... low level stuff ``` I found the existing order pretty hard to untangle when I was writing some architecture notes on this module, I think the new ordering will help Reviewed By: chessai Differential Revision: D28441933 fbshipit-source-id: 07c722aa6d4038baa7f14fec84660ecc2736ed2e
This commit is contained in:
parent
13513d30a5
commit
323a7df023
@ -39,11 +39,6 @@ import qualified Duckling.Types.Stash as Stash
|
||||
-- -----------------------------------------------------------------
|
||||
-- Engine
|
||||
|
||||
type Duckling a = Identity a
|
||||
|
||||
runDuckling :: Duckling a -> a
|
||||
runDuckling ma = runIdentity ma
|
||||
|
||||
parseAndResolve :: [Rule] -> Text -> Context -> Options -> [ResolvedToken]
|
||||
parseAndResolve rules input context options =
|
||||
mapMaybe
|
||||
@ -51,6 +46,84 @@ parseAndResolve rules input context options =
|
||||
$ force $ Stash.toPosOrderedList
|
||||
$ runDuckling $ parseString rules (Document.fromText input)
|
||||
|
||||
type Duckling a = Identity a
|
||||
|
||||
runDuckling :: Duckling a -> a
|
||||
runDuckling ma = runIdentity ma
|
||||
|
||||
-- | A match is full if its rule pattern is empty.
|
||||
-- (rule, endPosition, reversedRoute)
|
||||
type Match = (Rule, Int, [Node])
|
||||
|
||||
resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
|
||||
resolveNode context options n@Node{token = (Token dim dd), nodeRange = r}
|
||||
= do
|
||||
(val, latent) <- resolve context options dd
|
||||
Just Resolved
|
||||
{ range = r
|
||||
, node = n
|
||||
, rval = RVal dim val
|
||||
, isLatent = latent
|
||||
}
|
||||
|
||||
parseString :: [Rule] -> Document -> Duckling Stash
|
||||
parseString rules sentence = do
|
||||
(new, partialMatches) <-
|
||||
-- One the first pass we try all the rules
|
||||
parseString1 rules sentence Stash.empty Stash.empty []
|
||||
if Stash.null new
|
||||
then return Stash.empty
|
||||
else
|
||||
-- For subsequent passes, we only try rules starting with a predicate.
|
||||
saturateParseString headPredicateRules sentence new new partialMatches
|
||||
where
|
||||
headPredicateRules =
|
||||
[ rule | rule@Rule{pattern = (Predicate _ : _)} <- rules ]
|
||||
|
||||
-- | Produces all tokens recursively.
|
||||
saturateParseString
|
||||
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
|
||||
saturateParseString rules sentence stash new matches = do
|
||||
(new', matches') <- parseString1 rules sentence stash new matches
|
||||
let stash' = Stash.union stash new'
|
||||
if Stash.null new'
|
||||
then return stash
|
||||
else saturateParseString rules sentence stash' new' matches'
|
||||
|
||||
-- | Finds new matches resulting from newly added tokens.
|
||||
-- Produces new tokens from full matches.
|
||||
parseString1
|
||||
:: [Rule] -> Document -> Stash -> Stash -> [Match]
|
||||
-> Duckling (Stash, [Match])
|
||||
parseString1 rules sentence stash new matches = do
|
||||
-- Recursively match patterns.
|
||||
-- Find which `matches` can advance because of `new`.
|
||||
newPartial <- concatMapM (matchFirst sentence new) matches
|
||||
|
||||
-- Find new matches resulting from newly added tokens (`new`)
|
||||
newMatches <- concatMapM (matchFirstAnywhere sentence new) rules
|
||||
|
||||
(full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
|
||||
<$> matchAll sentence stash (newPartial ++ newMatches)
|
||||
|
||||
-- Produce full matches as new tokens
|
||||
return ( Stash.fromList $ mapMaybe produce full
|
||||
, partial ++ matches
|
||||
)
|
||||
|
||||
-- | Recursively augments `matches`.
|
||||
-- Discards partial matches stuck by a regex.
|
||||
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
|
||||
matchAll sentence stash matches = concatMapM mkNextMatches matches
|
||||
where
|
||||
mkNextMatches :: Match -> Duckling [Match]
|
||||
mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
|
||||
mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
|
||||
nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
|
||||
return $ case p of
|
||||
Regex _ -> nextMatches
|
||||
Predicate _ -> match:nextMatches
|
||||
|
||||
produce :: Match -> Maybe Node
|
||||
produce (_, _, []) = Nothing
|
||||
produce (Rule name _ production, _, etuor@(Node {nodeRange = Range _ e}:_)) = do
|
||||
@ -65,6 +138,51 @@ produce (Rule name _ production, _, etuor@(Node {nodeRange = Range _ e}:_)) = do
|
||||
}
|
||||
[] -> Nothing
|
||||
|
||||
-- | Returns all matches matching the first pattern item of `match`,
|
||||
-- resuming from a Match position
|
||||
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
|
||||
matchFirst _ _ (Rule {pattern = []}, _, _) = return []
|
||||
matchFirst sentence stash (rule@Rule{pattern = p : ps}, position, route) =
|
||||
map (mkMatch route newRule) <$> lookupItem sentence p stash position
|
||||
where
|
||||
newRule = rule { pattern = ps }
|
||||
|
||||
-- | Returns all matches matching the first pattern item of `match`,
|
||||
-- starting anywhere
|
||||
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
|
||||
matchFirstAnywhere _sentence _stash Rule {pattern = []} = return []
|
||||
matchFirstAnywhere sentence stash rule@Rule{pattern = p : ps} =
|
||||
map (mkMatch [] newRule) <$> lookupItemAnywhere sentence p stash
|
||||
where
|
||||
newRule = rule { pattern = ps }
|
||||
|
||||
-- | Handle one PatternItem at a given position
|
||||
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
|
||||
lookupItem doc (Regex re) _ position =
|
||||
filter (isPositionValid position doc) <$>
|
||||
lookupRegex doc re position
|
||||
lookupItem doc (Predicate p) stash position =
|
||||
return $
|
||||
filter (p . token) $
|
||||
takeWhile (isPositionValid position doc) $
|
||||
Stash.toPosOrderedListFrom stash position
|
||||
|
||||
-- | Handle one PatternItem anywhere in the text
|
||||
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
|
||||
lookupItemAnywhere doc (Regex re) _ = lookupRegexAnywhere doc re
|
||||
lookupItemAnywhere _doc (Predicate p) stash =
|
||||
return $ filter (p . token) $ Stash.toPosOrderedList stash
|
||||
|
||||
isPositionValid :: Int -> Document -> Node -> Bool
|
||||
isPositionValid position sentence Node{nodeRange = Range start _} =
|
||||
Document.isAdjacent sentence position start
|
||||
|
||||
{-# INLINE mkMatch #-}
|
||||
mkMatch :: [Node] -> Rule -> Node -> Match
|
||||
mkMatch route newRule node@Node{nodeRange = Range _ pos'} =
|
||||
newRoute `seq` (newRule, pos', newRoute)
|
||||
where newRoute = node:route
|
||||
|
||||
-- | Handle a regex match at a given position
|
||||
lookupRegex :: Document -> PCRE.Regex -> Int -> Duckling [Node]
|
||||
lookupRegex doc _regex position | position >= Document.length doc = return []
|
||||
@ -109,121 +227,3 @@ lookupRegexCommon doc regex position matchFun = return nodes
|
||||
, children = []
|
||||
, rule = Nothing
|
||||
}
|
||||
|
||||
-- | Handle one PatternItem at a given position
|
||||
lookupItem :: Document -> PatternItem -> Stash -> Int -> Duckling [Node]
|
||||
lookupItem doc (Regex re) _ position =
|
||||
filter (isPositionValid position doc) <$>
|
||||
lookupRegex doc re position
|
||||
lookupItem doc (Predicate p) stash position =
|
||||
return $
|
||||
filter (p . token) $
|
||||
takeWhile (isPositionValid position doc) $
|
||||
Stash.toPosOrderedListFrom stash position
|
||||
|
||||
-- | Handle one PatternItem anywhere in the text
|
||||
lookupItemAnywhere :: Document -> PatternItem -> Stash -> Duckling [Node]
|
||||
lookupItemAnywhere doc (Regex re) _ = lookupRegexAnywhere doc re
|
||||
lookupItemAnywhere _doc (Predicate p) stash =
|
||||
return $ filter (p . token) $ Stash.toPosOrderedList stash
|
||||
|
||||
isPositionValid :: Int -> Document -> Node -> Bool
|
||||
isPositionValid position sentence Node{nodeRange = Range start _} =
|
||||
Document.isAdjacent sentence position start
|
||||
|
||||
-- | A match is full if its rule pattern is empty.
|
||||
-- (rule, endPosition, reversedRoute)
|
||||
type Match = (Rule, Int, [Node])
|
||||
|
||||
-- | Recursively augments `matches`.
|
||||
-- Discards partial matches stuck by a regex.
|
||||
matchAll :: Document -> Stash -> [Match] -> Duckling [Match]
|
||||
matchAll sentence stash matches = concatMapM mkNextMatches matches
|
||||
where
|
||||
mkNextMatches :: Match -> Duckling [Match]
|
||||
mkNextMatches match@(Rule {pattern = []}, _, _) = return [ match ]
|
||||
mkNextMatches match@(Rule {pattern = p:_}, _, _) = do
|
||||
nextMatches <- matchAll sentence stash =<< matchFirst sentence stash match
|
||||
return $ case p of
|
||||
Regex _ -> nextMatches
|
||||
Predicate _ -> match:nextMatches
|
||||
|
||||
-- | Returns all matches matching the first pattern item of `match`,
|
||||
-- resuming from a Match position
|
||||
matchFirst :: Document -> Stash -> Match -> Duckling [Match]
|
||||
matchFirst _ _ (Rule {pattern = []}, _, _) = return []
|
||||
matchFirst sentence stash (rule@Rule{pattern = p : ps}, position, route) =
|
||||
map (mkMatch route newRule) <$> lookupItem sentence p stash position
|
||||
where
|
||||
newRule = rule { pattern = ps }
|
||||
|
||||
-- | Returns all matches matching the first pattern item of `match`,
|
||||
-- starting anywhere
|
||||
matchFirstAnywhere :: Document -> Stash -> Rule -> Duckling [Match]
|
||||
matchFirstAnywhere _sentence _stash Rule {pattern = []} = return []
|
||||
matchFirstAnywhere sentence stash rule@Rule{pattern = p : ps} =
|
||||
map (mkMatch [] newRule) <$> lookupItemAnywhere sentence p stash
|
||||
where
|
||||
newRule = rule { pattern = ps }
|
||||
|
||||
{-# INLINE mkMatch #-}
|
||||
mkMatch :: [Node] -> Rule -> Node -> Match
|
||||
mkMatch route newRule node@Node{nodeRange = Range _ pos'} =
|
||||
newRoute `seq` (newRule, pos', newRoute)
|
||||
where newRoute = node:route
|
||||
|
||||
-- | Finds new matches resulting from newly added tokens.
|
||||
-- Produces new tokens from full matches.
|
||||
parseString1
|
||||
:: [Rule] -> Document -> Stash -> Stash -> [Match]
|
||||
-> Duckling (Stash, [Match])
|
||||
parseString1 rules sentence stash new matches = do
|
||||
-- Recursively match patterns.
|
||||
-- Find which `matches` can advance because of `new`.
|
||||
newPartial <- concatMapM (matchFirst sentence new) matches
|
||||
|
||||
-- Find new matches resulting from newly added tokens (`new`)
|
||||
newMatches <- concatMapM (matchFirstAnywhere sentence new) rules
|
||||
|
||||
(full, partial) <- L.partition (\(Rule {pattern}, _, _) -> null pattern)
|
||||
<$> matchAll sentence stash (newPartial ++ newMatches)
|
||||
|
||||
-- Produce full matches as new tokens
|
||||
return ( Stash.fromList $ mapMaybe produce full
|
||||
, partial ++ matches
|
||||
)
|
||||
|
||||
-- | Produces all tokens recursively.
|
||||
saturateParseString
|
||||
:: [Rule] -> Document -> Stash -> Stash -> [Match] -> Duckling Stash
|
||||
saturateParseString rules sentence stash new matches = do
|
||||
(new', matches') <- parseString1 rules sentence stash new matches
|
||||
let stash' = Stash.union stash new'
|
||||
if Stash.null new'
|
||||
then return stash
|
||||
else saturateParseString rules sentence stash' new' matches'
|
||||
|
||||
parseString :: [Rule] -> Document -> Duckling Stash
|
||||
parseString rules sentence = do
|
||||
(new, partialMatches) <-
|
||||
-- One the first pass we try all the rules
|
||||
parseString1 rules sentence Stash.empty Stash.empty []
|
||||
if Stash.null new
|
||||
then return Stash.empty
|
||||
else
|
||||
-- For subsequent passes, we only try rules starting with a predicate.
|
||||
saturateParseString headPredicateRules sentence new new partialMatches
|
||||
where
|
||||
headPredicateRules =
|
||||
[ rule | rule@Rule{pattern = (Predicate _ : _)} <- rules ]
|
||||
|
||||
resolveNode :: Context -> Options -> Node -> Maybe ResolvedToken
|
||||
resolveNode context options n@Node{token = (Token dim dd), nodeRange = r}
|
||||
= do
|
||||
(val, latent) <- resolve context options dd
|
||||
Just Resolved
|
||||
{ range = r
|
||||
, node = n
|
||||
, rval = RVal dim val
|
||||
, isLatent = latent
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user