parse latent year intervals

Summary: adds a new rule that parses year intervals such as "1960 - 1961". see inline comments for heuristics.

Reviewed By: patapizza

Differential Revision: D25840835

fbshipit-source-id: 851a5b1c78440cbf065bf9f20a05c78d4967ea3c
This commit is contained in:
Daniel Cartwright 2021-01-29 16:32:05 -08:00 committed by Facebook GitHub Bot
parent 33f0c17ee2
commit 7193caafb9
6 changed files with 55 additions and 14 deletions

View File

@ -21,7 +21,7 @@ import GHC.Generics
import Prelude import Prelude
import Duckling.Resolve (Resolve(..)) import Duckling.Resolve (Resolve(..))
data GroupMatch = GroupMatch [Text] newtype GroupMatch = GroupMatch [Text]
deriving (Eq, Generic, Hashable, Ord, Show, NFData) deriving (Eq, Generic, Hashable, Ord, Show, NFData)
instance Resolve GroupMatch where instance Resolve GroupMatch where

View File

@ -131,9 +131,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
, examples (datetime (2013, 2, 12, 10, 30, 0) Minute) , examples (datetime (2013, 2, 12, 10, 30, 0) Minute)
[ "ten thirty" [ "ten thirty"
] ]
--, examples (datetime (1954, 1, 1, 0, 0, 0) Year) , examples (datetime (1974, 1, 1, 0, 0, 0) Year)
-- [ "1954" [ "1974"
-- ] ]
, examples (datetime (2013, 5, 1, 0, 0, 0) Month) , examples (datetime (2013, 5, 1, 0, 0, 0) Month)
[ "May" [ "May"
] ]
@ -162,6 +162,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
, "twelve ou three" , "twelve ou three"
, "twelve oh three" , "twelve oh three"
] ]
, examples (datetimeInterval ((1960, 1, 1, 0, 0, 0), (1962, 1, 1, 0, 0, 0)) Year)
[ "1960 - 1961"
]
] ]
diffContext :: Context diffContext :: Context

View File

@ -14,6 +14,7 @@
module Duckling.Time.EN.Rules where module Duckling.Time.EN.Rules where
import Control.Applicative ((<|>)) import Control.Applicative ((<|>))
import Control.Monad (guard)
import Data.Maybe import Data.Maybe
import Prelude import Prelude
import qualified Data.Text as Text import qualified Data.Text as Text
@ -388,10 +389,10 @@ ruleYearLatent = Rule
[ Predicate $ [ Predicate $
or . sequence [isIntegerBetween (- 10000) 0, isIntegerBetween 25 10000] or . sequence [isIntegerBetween (- 10000) 0, isIntegerBetween 25 10000]
] ]
, prod = \tokens -> case tokens of , prod = \case
(token:_) -> do (token:_) -> do
n <- getIntValue token n <- getIntValue token
tt . mkLatent $ year n tt $ mkLatent $ year n
_ -> Nothing _ -> Nothing
} }
@ -1290,7 +1291,45 @@ ruleIntervalFromDDDDOfMonth = Rule
_ -> Nothing _ -> Nothing
} }
-- Blocked for :latent time. May need to accept certain latents only, like hours -- In order to support latent year ranges, e.g. "1960 - 1961", we impose
-- the following constraints:
-- 1. Neither year can be negative.
-- 2. The first year must be less than the second year.
-- 3. The years must be within the interval [1000,10000].
-- (1): We could try to allow negative years, but years in natural language are
-- almost never written as negative, so it's unnecessary complication.
-- (2): Year ranges in natural language are written
-- <earlier year> - <later year>. No need to derive the ordering from
-- something which is not likely a year range.
-- (3): Four+ digits prevents phone numbers (e.g. "333-444-5555") from
-- registering false positives.
-- In everyday language, people are more likely to mention years closer
-- to the present than very far in the past or future (closer times are
-- more relevant).
-- Of course, this means we do not have the ability to parse something like
-- "300 - 600" as a year interval. But, prior to implementing this
-- rule, we already did not have that ability.
--
-- These guidelines are not perfect, but they work. They can be iterated on and
-- improved going forward.
ruleIntervalYearLatent :: Rule
ruleIntervalYearLatent = Rule
{ name = "<year> (latent) - <year> (latent) (interval)"
, pattern =
[ Predicate $ isIntegerBetween 1000 10000
, regex "\\-|to|th?ru|through|(un)?til(l)?"
, Predicate $ isIntegerBetween 1000 10000
]
, prod = \case
(t1:_:t2:_) -> do
y1 <- getIntValue t1
y2 <- getIntValue t2
guard (y1 < y2)
Token Time <$> interval TTime.Closed (year y1) (year y2)
_ -> Nothing
}
-- Blocked for :latent time.
ruleIntervalDash :: Rule ruleIntervalDash :: Rule
ruleIntervalDash = Rule ruleIntervalDash = Rule
{ name = "<datetime> - <datetime> (interval)" { name = "<datetime> - <datetime> (interval)"
@ -1299,7 +1338,7 @@ ruleIntervalDash = Rule
, regex "\\-|to|th?ru|through|(un)?til(l)?" , regex "\\-|to|th?ru|through|(un)?til(l)?"
, Predicate isNotLatent , Predicate isNotLatent
] ]
, prod = \tokens -> case tokens of , prod = \case
(Token Time td1:_:Token Time td2:_) -> (Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2 Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing _ -> Nothing
@ -1358,7 +1397,7 @@ ruleIntervalTODDash = Rule
, regex "\\-|:|to|th?ru|through|(un)?til(l)?" , regex "\\-|:|to|th?ru|through|(un)?til(l)?"
, Predicate isATimeOfDay , Predicate isATimeOfDay
] ]
, prod = \tokens -> case tokens of , prod = \case
(Token Time td1:_:Token Time td2:_) -> (Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2 Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing _ -> Nothing
@ -1415,7 +1454,7 @@ ruleIntervalTODBetween = Rule
, regex "and" , regex "and"
, Predicate isATimeOfDay , Predicate isATimeOfDay
] ]
, prod = \tokens -> case tokens of , prod = \case
(_:Token Time td1:_:Token Time td2:_) -> (_:Token Time td1:_:Token Time td2:_) ->
Token Time <$> interval TTime.Closed td1 td2 Token Time <$> interval TTime.Closed td1 td2
_ -> Nothing _ -> Nothing
@ -1428,7 +1467,7 @@ ruleIntervalBy = Rule
[ regex "by" [ regex "by"
, dimension Time , dimension Time
] ]
, prod = \tokens -> case tokens of , prod = \case
(_:Token Time td:_) -> Token Time <$> interval TTime.Open now td (_:Token Time td:_) -> Token Time <$> interval TTime.Open now td
_ -> Nothing _ -> Nothing
} }
@ -2693,6 +2732,7 @@ rules =
, ruleIntervalFromDDDDOfMonth , ruleIntervalFromDDDDOfMonth
, ruleIntervalMonthDDDD , ruleIntervalMonthDDDD
, ruleIntervalDDDDMonth , ruleIntervalDDDDMonth
, ruleIntervalYearLatent
, ruleIntervalDash , ruleIntervalDash
, ruleIntervalSlash , ruleIntervalSlash
, ruleIntervalFrom , ruleIntervalFrom

View File

@ -15,7 +15,6 @@ import Prelude
import Duckling.Testing.Types hiding (examples) import Duckling.Testing.Types hiding (examples)
import Duckling.Time.Corpus import Duckling.Time.Corpus
import Duckling.Time.Types hiding (Month)
import Duckling.TimeGrain.Types hiding (add) import Duckling.TimeGrain.Types hiding (add)
allExamples :: [Example] allExamples :: [Example]

View File

@ -21,7 +21,6 @@ import Duckling.Numeral.Helpers (parseInt)
import Duckling.Regex.Types import Duckling.Regex.Types
import Duckling.Time.Computed import Duckling.Time.Computed
import Duckling.Time.Helpers import Duckling.Time.Helpers
import Duckling.Time.Types (TimeData (..))
import Duckling.Types import Duckling.Types
import qualified Duckling.TimeGrain.Types as TG import qualified Duckling.TimeGrain.Types as TG

View File

@ -111,7 +111,7 @@ instance Resolve TimeData where
ahead:nextAhead:_ ahead:nextAhead:_
| notImmediate && isJust (timeIntersect ahead refTime) -> Just nextAhead | notImmediate && isJust (timeIntersect ahead refTime) -> Just nextAhead
ahead:_ -> Just ahead ahead:_ -> Just ahead
values <- Just . take 3 $ if List.null future then past else future values <- Just $ take 3 $ if List.null future then past else future
Just $ case direction of Just $ case direction of
Nothing -> (TimeValue (timeValue tzSeries value) Nothing -> (TimeValue (timeValue tzSeries value)
(map (timeValue tzSeries) values) holiday, latent) (map (timeValue tzSeries) values) holiday, latent)