mirror of
https://github.com/facebook/duckling.git
synced 2024-11-24 07:23:03 +03:00
parse latent year intervals
Summary: adds a new rule that parses year intervals such as "1960 - 1961". see inline comments for heuristics. Reviewed By: patapizza Differential Revision: D25840835 fbshipit-source-id: 851a5b1c78440cbf065bf9f20a05c78d4967ea3c
This commit is contained in:
parent
33f0c17ee2
commit
7193caafb9
@ -21,7 +21,7 @@ import GHC.Generics
|
||||
import Prelude
|
||||
import Duckling.Resolve (Resolve(..))
|
||||
|
||||
data GroupMatch = GroupMatch [Text]
|
||||
newtype GroupMatch = GroupMatch [Text]
|
||||
deriving (Eq, Generic, Hashable, Ord, Show, NFData)
|
||||
|
||||
instance Resolve GroupMatch where
|
||||
|
@ -131,9 +131,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
|
||||
, examples (datetime (2013, 2, 12, 10, 30, 0) Minute)
|
||||
[ "ten thirty"
|
||||
]
|
||||
--, examples (datetime (1954, 1, 1, 0, 0, 0) Year)
|
||||
-- [ "1954"
|
||||
-- ]
|
||||
, examples (datetime (1974, 1, 1, 0, 0, 0) Year)
|
||||
[ "1974"
|
||||
]
|
||||
, examples (datetime (2013, 5, 1, 0, 0, 0) Month)
|
||||
[ "May"
|
||||
]
|
||||
@ -162,6 +162,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
|
||||
, "twelve ou three"
|
||||
, "twelve oh three"
|
||||
]
|
||||
, examples (datetimeInterval ((1960, 1, 1, 0, 0, 0), (1962, 1, 1, 0, 0, 0)) Year)
|
||||
[ "1960 - 1961"
|
||||
]
|
||||
]
|
||||
|
||||
diffContext :: Context
|
||||
|
@ -14,6 +14,7 @@
|
||||
module Duckling.Time.EN.Rules where
|
||||
|
||||
import Control.Applicative ((<|>))
|
||||
import Control.Monad (guard)
|
||||
import Data.Maybe
|
||||
import Prelude
|
||||
import qualified Data.Text as Text
|
||||
@ -388,10 +389,10 @@ ruleYearLatent = Rule
|
||||
[ Predicate $
|
||||
or . sequence [isIntegerBetween (- 10000) 0, isIntegerBetween 25 10000]
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(token:_) -> do
|
||||
n <- getIntValue token
|
||||
tt . mkLatent $ year n
|
||||
tt $ mkLatent $ year n
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
@ -1290,7 +1291,45 @@ ruleIntervalFromDDDDOfMonth = Rule
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
-- Blocked for :latent time. May need to accept certain latents only, like hours
|
||||
-- In order to support latent year ranges, e.g. "1960 - 1961", we impose
|
||||
-- the following constraints:
|
||||
-- 1. Neither year can be negative.
|
||||
-- 2. The first year must be less than the second year.
|
||||
-- 3. The years must be within the interval [1000,10000].
|
||||
-- (1): We could try to allow negative years, but years in natural language are
|
||||
-- almost never written as negative, so it's unnecessary complication.
|
||||
-- (2): Year ranges in natural language are written
|
||||
-- <earlier year> - <later year>. No need to derive the ordering from
|
||||
-- something which is not likely a year range.
|
||||
-- (3): Four+ digits prevents phone numbers (e.g. "333-444-5555") from
|
||||
-- registering false positives.
|
||||
-- In everyday language, people are more likely to mention years closer
|
||||
-- to the present than very far in the past or future (closer times are
|
||||
-- more relevant).
|
||||
-- Of course, this means we do not have the ability to parse something like
|
||||
-- "300 - 600" as a year interval. But, prior to implementing this
|
||||
-- rule, we already did not have that ability.
|
||||
--
|
||||
-- These guidelines are not perfect, but they work. They can be iterated on and
|
||||
-- improved going forward.
|
||||
ruleIntervalYearLatent :: Rule
|
||||
ruleIntervalYearLatent = Rule
|
||||
{ name = "<year> (latent) - <year> (latent) (interval)"
|
||||
, pattern =
|
||||
[ Predicate $ isIntegerBetween 1000 10000
|
||||
, regex "\\-|to|th?ru|through|(un)?til(l)?"
|
||||
, Predicate $ isIntegerBetween 1000 10000
|
||||
]
|
||||
, prod = \case
|
||||
(t1:_:t2:_) -> do
|
||||
y1 <- getIntValue t1
|
||||
y2 <- getIntValue t2
|
||||
guard (y1 < y2)
|
||||
Token Time <$> interval TTime.Closed (year y1) (year y2)
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
-- Blocked for :latent time.
|
||||
ruleIntervalDash :: Rule
|
||||
ruleIntervalDash = Rule
|
||||
{ name = "<datetime> - <datetime> (interval)"
|
||||
@ -1299,7 +1338,7 @@ ruleIntervalDash = Rule
|
||||
, regex "\\-|to|th?ru|through|(un)?til(l)?"
|
||||
, Predicate isNotLatent
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(Token Time td1:_:Token Time td2:_) ->
|
||||
Token Time <$> interval TTime.Closed td1 td2
|
||||
_ -> Nothing
|
||||
@ -1358,7 +1397,7 @@ ruleIntervalTODDash = Rule
|
||||
, regex "\\-|:|to|th?ru|through|(un)?til(l)?"
|
||||
, Predicate isATimeOfDay
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(Token Time td1:_:Token Time td2:_) ->
|
||||
Token Time <$> interval TTime.Closed td1 td2
|
||||
_ -> Nothing
|
||||
@ -1415,7 +1454,7 @@ ruleIntervalTODBetween = Rule
|
||||
, regex "and"
|
||||
, Predicate isATimeOfDay
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(_:Token Time td1:_:Token Time td2:_) ->
|
||||
Token Time <$> interval TTime.Closed td1 td2
|
||||
_ -> Nothing
|
||||
@ -1428,7 +1467,7 @@ ruleIntervalBy = Rule
|
||||
[ regex "by"
|
||||
, dimension Time
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(_:Token Time td:_) -> Token Time <$> interval TTime.Open now td
|
||||
_ -> Nothing
|
||||
}
|
||||
@ -2693,6 +2732,7 @@ rules =
|
||||
, ruleIntervalFromDDDDOfMonth
|
||||
, ruleIntervalMonthDDDD
|
||||
, ruleIntervalDDDDMonth
|
||||
, ruleIntervalYearLatent
|
||||
, ruleIntervalDash
|
||||
, ruleIntervalSlash
|
||||
, ruleIntervalFrom
|
||||
|
@ -15,7 +15,6 @@ import Prelude
|
||||
|
||||
import Duckling.Testing.Types hiding (examples)
|
||||
import Duckling.Time.Corpus
|
||||
import Duckling.Time.Types hiding (Month)
|
||||
import Duckling.TimeGrain.Types hiding (add)
|
||||
|
||||
allExamples :: [Example]
|
||||
|
@ -21,7 +21,6 @@ import Duckling.Numeral.Helpers (parseInt)
|
||||
import Duckling.Regex.Types
|
||||
import Duckling.Time.Computed
|
||||
import Duckling.Time.Helpers
|
||||
import Duckling.Time.Types (TimeData (..))
|
||||
import Duckling.Types
|
||||
import qualified Duckling.TimeGrain.Types as TG
|
||||
|
||||
|
@ -111,7 +111,7 @@ instance Resolve TimeData where
|
||||
ahead:nextAhead:_
|
||||
| notImmediate && isJust (timeIntersect ahead refTime) -> Just nextAhead
|
||||
ahead:_ -> Just ahead
|
||||
values <- Just . take 3 $ if List.null future then past else future
|
||||
values <- Just $ take 3 $ if List.null future then past else future
|
||||
Just $ case direction of
|
||||
Nothing -> (TimeValue (timeValue tzSeries value)
|
||||
(map (timeValue tzSeries) values) holiday, latent)
|
||||
|
Loading…
Reference in New Issue
Block a user