diff --git a/Duckling/Regex/Types.hs b/Duckling/Regex/Types.hs index fd87b783..9ce7306b 100644 --- a/Duckling/Regex/Types.hs +++ b/Duckling/Regex/Types.hs @@ -21,7 +21,7 @@ import GHC.Generics import Prelude import Duckling.Resolve (Resolve(..)) -data GroupMatch = GroupMatch [Text] +newtype GroupMatch = GroupMatch [Text] deriving (Eq, Generic, Hashable, Ord, Show, NFData) instance Resolve GroupMatch where diff --git a/Duckling/Time/EN/Corpus.hs b/Duckling/Time/EN/Corpus.hs index fe4e2854..3b0749c5 100644 --- a/Duckling/Time/EN/Corpus.hs +++ b/Duckling/Time/EN/Corpus.hs @@ -131,9 +131,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs) , examples (datetime (2013, 2, 12, 10, 30, 0) Minute) [ "ten thirty" ] - --, examples (datetime (1954, 1, 1, 0, 0, 0) Year) - -- [ "1954" - -- ] + , examples (datetime (1974, 1, 1, 0, 0, 0) Year) + [ "1974" + ] , examples (datetime (2013, 5, 1, 0, 0, 0) Month) [ "May" ] @@ -162,6 +162,9 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs) , "twelve ou three" , "twelve oh three" ] + , examples (datetimeInterval ((1960, 1, 1, 0, 0, 0), (1962, 1, 1, 0, 0, 0)) Year) + [ "1960 - 1961" + ] ] diffContext :: Context diff --git a/Duckling/Time/EN/Rules.hs b/Duckling/Time/EN/Rules.hs index ec8153d0..26420284 100644 --- a/Duckling/Time/EN/Rules.hs +++ b/Duckling/Time/EN/Rules.hs @@ -14,6 +14,7 @@ module Duckling.Time.EN.Rules where import Control.Applicative ((<|>)) +import Control.Monad (guard) import Data.Maybe import Prelude import qualified Data.Text as Text @@ -388,10 +389,10 @@ ruleYearLatent = Rule [ Predicate $ or . sequence [isIntegerBetween (- 10000) 0, isIntegerBetween 25 10000] ] - , prod = \tokens -> case tokens of + , prod = \case (token:_) -> do n <- getIntValue token - tt . mkLatent $ year n + tt $ mkLatent $ year n _ -> Nothing } @@ -1290,7 +1291,45 @@ ruleIntervalFromDDDDOfMonth = Rule _ -> Nothing } --- Blocked for :latent time. May need to accept certain latents only, like hours +-- In order to support latent year ranges, e.g. "1960 - 1961", we impose +-- the following constraints: +-- 1. Neither year can be negative. +-- 2. The first year must be less than the second year. +-- 3. The years must be within the interval [1000,10000]. +-- (1): We could try to allow negative years, but years in natural language are +-- almost never written as negative, so it's unnecessary complication. +-- (2): Year ranges in natural language are written +-- - . No need to derive the ordering from +-- something which is not likely a year range. +-- (3): Four+ digits prevents phone numbers (e.g. "333-444-5555") from +-- registering false positives. +-- In everyday language, people are more likely to mention years closer +-- to the present than very far in the past or future (closer times are +-- more relevant). +-- Of course, this means we do not have the ability to parse something like +-- "300 - 600" as a year interval. But, prior to implementing this +-- rule, we already did not have that ability. +-- +-- These guidelines are not perfect, but they work. They can be iterated on and +-- improved going forward. +ruleIntervalYearLatent :: Rule +ruleIntervalYearLatent = Rule + { name = " (latent) - (latent) (interval)" + , pattern = + [ Predicate $ isIntegerBetween 1000 10000 + , regex "\\-|to|th?ru|through|(un)?til(l)?" + , Predicate $ isIntegerBetween 1000 10000 + ] + , prod = \case + (t1:_:t2:_) -> do + y1 <- getIntValue t1 + y2 <- getIntValue t2 + guard (y1 < y2) + Token Time <$> interval TTime.Closed (year y1) (year y2) + _ -> Nothing + } + +-- Blocked for :latent time. ruleIntervalDash :: Rule ruleIntervalDash = Rule { name = " - (interval)" @@ -1299,7 +1338,7 @@ ruleIntervalDash = Rule , regex "\\-|to|th?ru|through|(un)?til(l)?" , Predicate isNotLatent ] - , prod = \tokens -> case tokens of + , prod = \case (Token Time td1:_:Token Time td2:_) -> Token Time <$> interval TTime.Closed td1 td2 _ -> Nothing @@ -1358,7 +1397,7 @@ ruleIntervalTODDash = Rule , regex "\\-|:|to|th?ru|through|(un)?til(l)?" , Predicate isATimeOfDay ] - , prod = \tokens -> case tokens of + , prod = \case (Token Time td1:_:Token Time td2:_) -> Token Time <$> interval TTime.Closed td1 td2 _ -> Nothing @@ -1415,7 +1454,7 @@ ruleIntervalTODBetween = Rule , regex "and" , Predicate isATimeOfDay ] - , prod = \tokens -> case tokens of + , prod = \case (_:Token Time td1:_:Token Time td2:_) -> Token Time <$> interval TTime.Closed td1 td2 _ -> Nothing @@ -1428,7 +1467,7 @@ ruleIntervalBy = Rule [ regex "by" , dimension Time ] - , prod = \tokens -> case tokens of + , prod = \case (_:Token Time td:_) -> Token Time <$> interval TTime.Open now td _ -> Nothing } @@ -2693,6 +2732,7 @@ rules = , ruleIntervalFromDDDDOfMonth , ruleIntervalMonthDDDD , ruleIntervalDDDDMonth + , ruleIntervalYearLatent , ruleIntervalDash , ruleIntervalSlash , ruleIntervalFrom diff --git a/Duckling/Time/EN/TT/Corpus.hs b/Duckling/Time/EN/TT/Corpus.hs index e79c03e5..438fcb5f 100644 --- a/Duckling/Time/EN/TT/Corpus.hs +++ b/Duckling/Time/EN/TT/Corpus.hs @@ -15,7 +15,6 @@ import Prelude import Duckling.Testing.Types hiding (examples) import Duckling.Time.Corpus -import Duckling.Time.Types hiding (Month) import Duckling.TimeGrain.Types hiding (add) allExamples :: [Example] diff --git a/Duckling/Time/EN/TT/Rules.hs b/Duckling/Time/EN/TT/Rules.hs index 428fb844..28180d8b 100644 --- a/Duckling/Time/EN/TT/Rules.hs +++ b/Duckling/Time/EN/TT/Rules.hs @@ -21,7 +21,6 @@ import Duckling.Numeral.Helpers (parseInt) import Duckling.Regex.Types import Duckling.Time.Computed import Duckling.Time.Helpers -import Duckling.Time.Types (TimeData (..)) import Duckling.Types import qualified Duckling.TimeGrain.Types as TG diff --git a/Duckling/Time/Types.hs b/Duckling/Time/Types.hs index 35923bc5..b56fdbab 100644 --- a/Duckling/Time/Types.hs +++ b/Duckling/Time/Types.hs @@ -111,7 +111,7 @@ instance Resolve TimeData where ahead:nextAhead:_ | notImmediate && isJust (timeIntersect ahead refTime) -> Just nextAhead ahead:_ -> Just ahead - values <- Just . take 3 $ if List.null future then past else future + values <- Just $ take 3 $ if List.null future then past else future Just $ case direction of Nothing -> (TimeValue (timeValue tzSeries value) (map (timeValue tzSeries) values) holiday, latent)