Time/EN: Allow dashes in spelled-out times

Summary:
It's common to use dashes when spelling out times longhand,
e.g. "five-thirty am", but Duckling wasn't handling this at all.

This commit adds rules for times spelled out with dashes. The
rules explicitly forbid the second of the two times from including
digits via a negative match. This is because
- it wouldn't be at all idomatic to write five-26 or five-oh-6
- allowing that pattern clashes with time range parsing, e.g.
  "9-10 am" should parse as a time range, not as "9:10 am"

Reviewed By: chessai

Differential Revision: D27848428

fbshipit-source-id: dfe8b98cb38119a16db2a19db47fd3128783e617
This commit is contained in:
Steven Troxler 2021-04-22 11:45:57 -07:00 committed by Facebook GitHub Bot
parent 23ec021b07
commit bf696ba185
3 changed files with 50 additions and 0 deletions

View File

@ -8,6 +8,7 @@
* EN (English)
* Time: Allow latent match for \<part-of-day\> \<latent-time-of-day\>
* Time: Avoid parsing phrases like 'two five' as times
* Time: Add support to parse spelled-out times like 'five-thirty'
* RU (Russian)
* Duration: Diminutives for minutes and hours

View File

@ -132,6 +132,7 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
]
, examples (datetime (2013, 2, 12, 10, 30, 0) Minute)
[ "ten thirty"
, "ten-thirty"
]
, examples (datetime (1974, 1, 1, 0, 0, 0) Year)
[ "1974"
@ -163,6 +164,8 @@ latentCorpus = (testContext, testOptions {withLatent = True}, xs)
, "twelve o three"
, "twelve ou three"
, "twelve oh three"
, "twelve-zero-three"
, "twelve-oh-three"
]
, examples (datetimeInterval ((1960, 1, 1, 0, 0, 0), (1962, 1, 1, 0, 0, 0)) Year)
[ "1960 - 1961"

View File

@ -780,6 +780,26 @@ ruleHONumeral = Rule
_ -> Nothing
}
ruleHONumeralDash :: Rule
ruleHONumeralDash = Rule
{ name = "<hour-of-day> - <integer-as-word>"
, pattern =
[ Predicate isAnHourOfDay
, regex "-(?!\\d)"
, Predicate $ isIntegerBetween 10 59
]
, prod = \tokens -> case tokens of
(Token Time TimeData{TTime.form = Just (TTime.TimeOfDay (Just hours) is12H)
,TTime.latent = isLatent}:
_:
token:
_) -> do
n <- getIntValue token
let lt = if isLatent then mkLatent else id
tt $ lt $ hourMinute is12H hours n
_ -> Nothing
}
ruleHONumeralAlt :: Rule
ruleHONumeralAlt = Rule
{ name = "<hour-of-day> zero <integer>"
@ -803,6 +823,30 @@ ruleHONumeralAlt = Rule
_ -> Nothing
}
ruleHONumeralAltDash :: Rule
ruleHONumeralAltDash = Rule
{ name = "<hour-of-day> - zero - <integer>"
, pattern =
[ Predicate isAnHourOfDay
, regex "-"
, regex "(zero|o(h|u)?)"
, regex "-(?!\\d)"
, Predicate $ isIntegerBetween 1 9
]
, prod = \case
(
Token Time TimeData{TTime.form = Just (TTime.TimeOfDay
(Just hours) is12H)
, TTime.latent = isLatent}:
_:_:_:
token:
_) -> do
n <- getIntValue token
let lt = if isLatent then mkLatent else id
tt $ lt $ hourMinute is12H hours n
_ -> Nothing
}
ruleHODHalf :: Rule
ruleHODHalf = Rule
{ name = "<hour-of-day> half"
@ -2729,7 +2773,9 @@ rules =
, ruleMilitarySpelledOutAMPM2
, ruleTODAMPM
, ruleHONumeral
, ruleHONumeralDash
, ruleHONumeralAlt
, ruleHONumeralAltDash
, ruleHODHalf
, ruleHODQuarter
, ruleNumeralToHOD