parse latent year intervals

Summary: adds a new rule that parses year intervals such as "1960 - 1961". see inline comments for heuristics.

Reviewed By: patapizza

Differential Revision: D25840835

fbshipit-source-id: 851a5b1c78440cbf065bf9f20a05c78d4967ea3c
This commit is contained in:
Daniel Cartwright 2021-01-29 16:32:05 -08:00 committed by Facebook GitHub Bot
parent 33f0c17ee2
commit 7193caafb9
6 changed files with 55 additions and 14 deletions

View File

@ -21,7 +21,7 @@ import GHC.Generics
import Prelude
import Duckling.Resolve (Resolve(..))
data GroupMatch = GroupMatch [Text]
newtype GroupMatch = GroupMatch [Text]
deriving (Eq, Generic, Hashable, Ord, Show, NFData)
instance Resolve GroupMatch where

View File

@ -131,9 +131,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
, examples (datetime (2013, 2, 12, 10, 30, 0) Minute)
[ "ten thirty"
]
--, examples (datetime (1954, 1, 1, 0, 0, 0) Year)
-- [ "1954"
-- ]
, examples (datetime (1974, 1, 1, 0, 0, 0) Year)
[ "1974"
]
, examples (datetime (2013, 5, 1, 0, 0, 0) Month)
[ "May"
]
@ -162,6 +162,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
, "twelve ou three"
, "twelve oh three"
]
, examples (datetimeInterval ((1960, 1, 1, 0, 0, 0), (1962, 1, 1, 0, 0, 0)) Year)
[ "1960 - 1961"
]
]
diffContext :: Context

View File

@ -14,6 +14,7 @@
module Duckling.Time.EN.Rules where
import Control.Applicative ((<|>))
import Control.Monad (guard)
import Data.Maybe
import Prelude
import qualified Data.Text as Text
@ -388,10 +389,10 @@ ruleYearLatent = Rule
[ Predicate $
or . sequence [isIntegerBetween (- 10000) 0, isIntegerBetween 25 10000]
]
, prod = \tokens -> case tokens of
, prod = \case
(token:_) -> do
n <- getIntValue token
tt . mkLatent $ year n
tt $ mkLatent $ year n
_ -> Nothing
}
@ -1290,7 +1291,45 @@ ruleIntervalFromDDDDOfMonth = Rule
_ -> Nothing
}
-- Blocked for :latent time. May need to accept certain latents only, like hours
-- In order to support latent year ranges, e.g. "1960 - 1961", we impose
-- the following constraints:
-- 1. Neither year can be negative.
-- 2. The first year must be less than the second year.
-- 3. The years must be within the interval [1000,10000].
-- (1): We could try to allow negative years, but years in natural language are
-- almost never written as negative, so it's unnecessary complication.
-- (2): Year ranges in natural language are written
-- <earlier year> - <later year>. No need to derive the ordering from
-- something which is not likely a year range.
-- (3): Four+ digits prevents phone numbers (e.g. "333-444-5555") from
-- registering false positives.
-- In everyday language, people are more likely to mention years closer
-- to the present than very far in the past or future (closer times are
-- more relevant).
-- Of course, this means we do not have the ability to parse something like
-- "300 - 600" as a year interval. But, prior to implementing this
-- rule, we already did not have that ability.
--
-- These guidelines are not perfect, but they work. They can be iterated on and
-- improved going forward.
ruleIntervalYearLatent :: Rule
ruleIntervalYearLatent = Rule
{ name = "<year> (latent) - <year> (latent) (interval)"
, pattern =
[ Predicate $ isIntegerBetween 1000 10000
, regex "\\-|to|th?ru|through|(un)?til(l)?"
, Predicate $ isIntegerBetween 1000 10000
]
, prod = \case
(t1:_:t2:_) -> do
y1 <- getIntValue t1
y2 <- getIntValue t2
guard (y1 < y2)
Token Time <$> interval TTime.Closed (year y1) (year y2)
_ -> Nothing
}
-- Blocked for :latent time.
ruleIntervalDash :: Rule
ruleIntervalDash = Rule
{ name = "<datetime> - <datetime> (interval)"
@ -1299,7 +1338,7 @@ ruleIntervalDash = Rule
, regex "\\-|to|th?ru|through|(un)?til(l)?"
, Predicate isNotLatent
]
, prod = \tokens -> case tokens of
, prod = \case
(Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing
@ -1358,7 +1397,7 @@ ruleIntervalTODDash = Rule
, regex "\\-|:|to|th?ru|through|(un)?til(l)?"
, Predicate isATimeOfDay
]
, prod = \tokens -> case tokens of
, prod = \case
(Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing
@ -1415,7 +1454,7 @@ ruleIntervalTODBetween = Rule
, regex "and"
, Predicate isATimeOfDay
]
, prod = \tokens -> case tokens of
, prod = \case
(_:Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing
@ -1428,7 +1467,7 @@ ruleIntervalBy = Rule
[ regex "by"
, dimension Time
]
, prod = \tokens -> case tokens of
, prod = \case
(_:Token Time td:_) -> Token Time <$> interval TTime.Open now td
_ -> Nothing
}
@ -2693,6 +2732,7 @@ rules =
, ruleIntervalFromDDDDOfMonth
, ruleIntervalMonthDDDD
, ruleIntervalDDDDMonth
, ruleIntervalYearLatent
, ruleIntervalDash
, ruleIntervalSlash
, ruleIntervalFrom

View File

@ -15,7 +15,6 @@ import Prelude
import Duckling.Testing.Types hiding (examples)
import Duckling.Time.Corpus
import Duckling.Time.Types hiding (Month)
import Duckling.TimeGrain.Types hiding (add)
allExamples :: [Example]

View File

@ -21,7 +21,6 @@ import Duckling.Numeral.Helpers (parseInt)
import Duckling.Regex.Types
import Duckling.Time.Computed
import Duckling.Time.Helpers
import Duckling.Time.Types (TimeData (..))
import Duckling.Types
import qualified Duckling.TimeGrain.Types as TG

View File

@ -111,7 +111,7 @@ instance Resolve TimeData where
ahead:nextAhead:_
| notImmediate && isJust (timeIntersect ahead refTime) -> Just nextAhead
ahead:_ -> Just ahead
values <- Just . take 3 $ if List.null future then past else future
values <- Just $ take 3 $ if List.null future then past else future
Just $ case direction of
Nothing -> (TimeValue (timeValue tzSeries value)
(map (timeValue tzSeries) values) holiday, latent)