mirror of
https://github.com/facebook/duckling.git
synced 2024-09-11 21:27:13 +03:00
parse latent year intervals
Summary: adds a new rule that parses year intervals such as "1960 - 1961". see inline comments for heuristics. Reviewed By: patapizza Differential Revision: D25840835 fbshipit-source-id: 851a5b1c78440cbf065bf9f20a05c78d4967ea3c
This commit is contained in:
parent
33f0c17ee2
commit
7193caafb9
@ -21,7 +21,7 @@ import GHC.Generics
|
|||||||
import Prelude
|
import Prelude
|
||||||
import Duckling.Resolve (Resolve(..))
|
import Duckling.Resolve (Resolve(..))
|
||||||
|
|
||||||
data GroupMatch = GroupMatch [Text]
|
newtype GroupMatch = GroupMatch [Text]
|
||||||
deriving (Eq, Generic, Hashable, Ord, Show, NFData)
|
deriving (Eq, Generic, Hashable, Ord, Show, NFData)
|
||||||
|
|
||||||
instance Resolve GroupMatch where
|
instance Resolve GroupMatch where
|
||||||
|
@ -131,9 +131,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
|
|||||||
, examples (datetime (2013, 2, 12, 10, 30, 0) Minute)
|
, examples (datetime (2013, 2, 12, 10, 30, 0) Minute)
|
||||||
[ "ten thirty"
|
[ "ten thirty"
|
||||||
]
|
]
|
||||||
--, examples (datetime (1954, 1, 1, 0, 0, 0) Year)
|
, examples (datetime (1974, 1, 1, 0, 0, 0) Year)
|
||||||
-- [ "1954"
|
[ "1974"
|
||||||
-- ]
|
]
|
||||||
, examples (datetime (2013, 5, 1, 0, 0, 0) Month)
|
, examples (datetime (2013, 5, 1, 0, 0, 0) Month)
|
||||||
[ "May"
|
[ "May"
|
||||||
]
|
]
|
||||||
@ -162,6 +162,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
|
|||||||
, "twelve ou three"
|
, "twelve ou three"
|
||||||
, "twelve oh three"
|
, "twelve oh three"
|
||||||
]
|
]
|
||||||
|
, examples (datetimeInterval ((1960, 1, 1, 0, 0, 0), (1962, 1, 1, 0, 0, 0)) Year)
|
||||||
|
[ "1960 - 1961"
|
||||||
|
]
|
||||||
]
|
]
|
||||||
|
|
||||||
diffContext :: Context
|
diffContext :: Context
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
module Duckling.Time.EN.Rules where
|
module Duckling.Time.EN.Rules where
|
||||||
|
|
||||||
import Control.Applicative ((<|>))
|
import Control.Applicative ((<|>))
|
||||||
|
import Control.Monad (guard)
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import Prelude
|
import Prelude
|
||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
@ -388,10 +389,10 @@ ruleYearLatent = Rule
|
|||||||
[ Predicate $
|
[ Predicate $
|
||||||
or . sequence [isIntegerBetween (- 10000) 0, isIntegerBetween 25 10000]
|
or . sequence [isIntegerBetween (- 10000) 0, isIntegerBetween 25 10000]
|
||||||
]
|
]
|
||||||
, prod = \tokens -> case tokens of
|
, prod = \case
|
||||||
(token:_) -> do
|
(token:_) -> do
|
||||||
n <- getIntValue token
|
n <- getIntValue token
|
||||||
tt . mkLatent $ year n
|
tt $ mkLatent $ year n
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1290,7 +1291,45 @@ ruleIntervalFromDDDDOfMonth = Rule
|
|||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
-- Blocked for :latent time. May need to accept certain latents only, like hours
|
-- In order to support latent year ranges, e.g. "1960 - 1961", we impose
|
||||||
|
-- the following constraints:
|
||||||
|
-- 1. Neither year can be negative.
|
||||||
|
-- 2. The first year must be less than the second year.
|
||||||
|
-- 3. The years must be within the interval [1000,10000].
|
||||||
|
-- (1): We could try to allow negative years, but years in natural language are
|
||||||
|
-- almost never written as negative, so it's unnecessary complication.
|
||||||
|
-- (2): Year ranges in natural language are written
|
||||||
|
-- <earlier year> - <later year>. No need to derive the ordering from
|
||||||
|
-- something which is not likely a year range.
|
||||||
|
-- (3): Four+ digits prevents phone numbers (e.g. "333-444-5555") from
|
||||||
|
-- registering false positives.
|
||||||
|
-- In everyday language, people are more likely to mention years closer
|
||||||
|
-- to the present than very far in the past or future (closer times are
|
||||||
|
-- more relevant).
|
||||||
|
-- Of course, this means we do not have the ability to parse something like
|
||||||
|
-- "300 - 600" as a year interval. But, prior to implementing this
|
||||||
|
-- rule, we already did not have that ability.
|
||||||
|
--
|
||||||
|
-- These guidelines are not perfect, but they work. They can be iterated on and
|
||||||
|
-- improved going forward.
|
||||||
|
ruleIntervalYearLatent :: Rule
|
||||||
|
ruleIntervalYearLatent = Rule
|
||||||
|
{ name = "<year> (latent) - <year> (latent) (interval)"
|
||||||
|
, pattern =
|
||||||
|
[ Predicate $ isIntegerBetween 1000 10000
|
||||||
|
, regex "\\-|to|th?ru|through|(un)?til(l)?"
|
||||||
|
, Predicate $ isIntegerBetween 1000 10000
|
||||||
|
]
|
||||||
|
, prod = \case
|
||||||
|
(t1:_:t2:_) -> do
|
||||||
|
y1 <- getIntValue t1
|
||||||
|
y2 <- getIntValue t2
|
||||||
|
guard (y1 < y2)
|
||||||
|
Token Time <$> interval TTime.Closed (year y1) (year y2)
|
||||||
|
_ -> Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
-- Blocked for :latent time.
|
||||||
ruleIntervalDash :: Rule
|
ruleIntervalDash :: Rule
|
||||||
ruleIntervalDash = Rule
|
ruleIntervalDash = Rule
|
||||||
{ name = "<datetime> - <datetime> (interval)"
|
{ name = "<datetime> - <datetime> (interval)"
|
||||||
@ -1299,7 +1338,7 @@ ruleIntervalDash = Rule
|
|||||||
, regex "\\-|to|th?ru|through|(un)?til(l)?"
|
, regex "\\-|to|th?ru|through|(un)?til(l)?"
|
||||||
, Predicate isNotLatent
|
, Predicate isNotLatent
|
||||||
]
|
]
|
||||||
, prod = \tokens -> case tokens of
|
, prod = \case
|
||||||
(Token Time td1:_:Token Time td2:_) ->
|
(Token Time td1:_:Token Time td2:_) ->
|
||||||
Token Time <$> interval TTime.Closed td1 td2
|
Token Time <$> interval TTime.Closed td1 td2
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
@ -1358,7 +1397,7 @@ ruleIntervalTODDash = Rule
|
|||||||
, regex "\\-|:|to|th?ru|through|(un)?til(l)?"
|
, regex "\\-|:|to|th?ru|through|(un)?til(l)?"
|
||||||
, Predicate isATimeOfDay
|
, Predicate isATimeOfDay
|
||||||
]
|
]
|
||||||
, prod = \tokens -> case tokens of
|
, prod = \case
|
||||||
(Token Time td1:_:Token Time td2:_) ->
|
(Token Time td1:_:Token Time td2:_) ->
|
||||||
Token Time <$> interval TTime.Closed td1 td2
|
Token Time <$> interval TTime.Closed td1 td2
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
@ -1415,7 +1454,7 @@ ruleIntervalTODBetween = Rule
|
|||||||
, regex "and"
|
, regex "and"
|
||||||
, Predicate isATimeOfDay
|
, Predicate isATimeOfDay
|
||||||
]
|
]
|
||||||
, prod = \tokens -> case tokens of
|
, prod = \case
|
||||||
(_:Token Time td1:_:Token Time td2:_) ->
|
(_:Token Time td1:_:Token Time td2:_) ->
|
||||||
Token Time <$> interval TTime.Closed td1 td2
|
Token Time <$> interval TTime.Closed td1 td2
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
@ -1428,7 +1467,7 @@ ruleIntervalBy = Rule
|
|||||||
[ regex "by"
|
[ regex "by"
|
||||||
, dimension Time
|
, dimension Time
|
||||||
]
|
]
|
||||||
, prod = \tokens -> case tokens of
|
, prod = \case
|
||||||
(_:Token Time td:_) -> Token Time <$> interval TTime.Open now td
|
(_:Token Time td:_) -> Token Time <$> interval TTime.Open now td
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
}
|
}
|
||||||
@ -2693,6 +2732,7 @@ rules =
|
|||||||
, ruleIntervalFromDDDDOfMonth
|
, ruleIntervalFromDDDDOfMonth
|
||||||
, ruleIntervalMonthDDDD
|
, ruleIntervalMonthDDDD
|
||||||
, ruleIntervalDDDDMonth
|
, ruleIntervalDDDDMonth
|
||||||
|
, ruleIntervalYearLatent
|
||||||
, ruleIntervalDash
|
, ruleIntervalDash
|
||||||
, ruleIntervalSlash
|
, ruleIntervalSlash
|
||||||
, ruleIntervalFrom
|
, ruleIntervalFrom
|
||||||
|
@ -15,7 +15,6 @@ import Prelude
|
|||||||
|
|
||||||
import Duckling.Testing.Types hiding (examples)
|
import Duckling.Testing.Types hiding (examples)
|
||||||
import Duckling.Time.Corpus
|
import Duckling.Time.Corpus
|
||||||
import Duckling.Time.Types hiding (Month)
|
|
||||||
import Duckling.TimeGrain.Types hiding (add)
|
import Duckling.TimeGrain.Types hiding (add)
|
||||||
|
|
||||||
allExamples :: [Example]
|
allExamples :: [Example]
|
||||||
|
@ -21,7 +21,6 @@ import Duckling.Numeral.Helpers (parseInt)
|
|||||||
import Duckling.Regex.Types
|
import Duckling.Regex.Types
|
||||||
import Duckling.Time.Computed
|
import Duckling.Time.Computed
|
||||||
import Duckling.Time.Helpers
|
import Duckling.Time.Helpers
|
||||||
import Duckling.Time.Types (TimeData (..))
|
|
||||||
import Duckling.Types
|
import Duckling.Types
|
||||||
import qualified Duckling.TimeGrain.Types as TG
|
import qualified Duckling.TimeGrain.Types as TG
|
||||||
|
|
||||||
|
@ -111,7 +111,7 @@ instance Resolve TimeData where
|
|||||||
ahead:nextAhead:_
|
ahead:nextAhead:_
|
||||||
| notImmediate && isJust (timeIntersect ahead refTime) -> Just nextAhead
|
| notImmediate && isJust (timeIntersect ahead refTime) -> Just nextAhead
|
||||||
ahead:_ -> Just ahead
|
ahead:_ -> Just ahead
|
||||||
values <- Just . take 3 $ if List.null future then past else future
|
values <- Just $ take 3 $ if List.null future then past else future
|
||||||
Just $ case direction of
|
Just $ case direction of
|
||||||
Nothing -> (TimeValue (timeValue tzSeries value)
|
Nothing -> (TimeValue (timeValue tzSeries value)
|
||||||
(map (timeValue tzSeries) values) holiday, latent)
|
(map (timeValue tzSeries) values) holiday, latent)
|
||||||
|
Loading…
Reference in New Issue
Block a user