Parse more date formats in Norwegian (#395)

Summary:
In general there are some clashes between time formats `hhmm` and date formats `ddmm`. For example, depending on context, `22.10` can mean clock time ten past ten or the twenty second of october. In general it's correct to interpret this as clock time, as Duckling currently does.

But there are some cases not currently covered by Duckling where we have more unambiguous dates, e.g. `12.03.2018` and `27.11`. These are included here (in addition to midnight `24:00` which was also missing).

#### Changes:

- Bug in `ruleDdmm` regex meant that dates on the format `dd/mm` where `mm > 9` were not parsed
- `ruleYyyymmdd` now also parses dots and forward slashes, i.e. `2012.05.14` and `2012/05/14`
- New rule `rule2400` parses `24:00` and `24.00` (I elected not to include it in `ruleMidnighteodendOfDay` as it has grain minute rather than day)
- New rule `ruleDmm` parses `1/10`, `9.12` etc
- New rule `ruleDDm` parses `10/3`, `11.1` etc
- New rule `ruleDdDotMm` parses `25.02`, `31.10` etc
- `ruleDdmmyyyy` now also parses dots, i.e. `03.10.1983`
- New tests

Pull Request resolved: https://github.com/facebook/duckling/pull/395

Reviewed By: patapizza

Differential Revision: D26193069

Pulled By: chessai

fbshipit-source-id: cf711807fa1d40be2303f2426d74ded40c2e23b3
This commit is contained in:
jfulse 2021-02-02 23:15:58 -08:00 committed by Facebook GitHub Bot
parent 16708d9572
commit 788f63eeac
2 changed files with 80 additions and 3 deletions

View File

@ -81,6 +81,11 @@ allExamples = concat
, "den tredje mars"
, "den 3. mars"
]
, examples (datetime (2013, 10, 30, 0, 0, 0) Day)
[ "30.10"
, "30/10"
, "30-10"
]
, examples (datetime (2015, 3, 3, 0, 0, 0) Day)
[ "3 mars 2015"
, "tredje mars 2015"
@ -91,6 +96,9 @@ allExamples = concat
, "3/3/15"
, "2015-3-3"
, "2015-03-03"
, "2015.03.03"
, "03/03/2015"
, "03.03.2015"
]
, examples (datetime (2013, 2, 15, 0, 0, 0) Day)
[ "På den 15."
@ -104,6 +112,15 @@ allExamples = concat
, "februar 15"
, "15-02"
, "15/02"
, "15.2"
]
, examples (datetime (2013, 11, 27, 0, 0, 0) Day)
[ "den 27. november"
, "27. november"
, "november 27"
, "27-11"
, "27/11"
, "27.11"
]
, examples (datetime (2013, 8, 8, 0, 0, 0) Day)
[ "8 Aug"
@ -113,8 +130,13 @@ allExamples = concat
]
, examples (datetime (1974, 10, 31, 0, 0, 0) Day)
[ "31/10/1974"
, "31.10.1974"
, "31/10/74"
, "31-10-74"
, "31.10.74"
, "1974/10/31"
, "1974-10-31"
, "1974.10.31"
]
, examples (datetime (2015, 4, 14, 0, 0, 0) Day)
[ "14april 2015"

View File

@ -612,7 +612,7 @@ ruleDdmm :: Rule
ruleDdmm = Rule
{ name = "dd/mm"
, pattern =
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/-](0?[1-9]|1[0-2])"
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/-](0?[1-9]\\b|1[0-2])"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
@ -1100,7 +1100,7 @@ ruleYyyymmdd :: Rule
ruleYyyymmdd = Rule
{ name = "yyyy-mm-dd"
, pattern =
[ regex "(\\d{2,4})-(0?[1-9]|1[0-2])-(3[01]|[12]\\d|0?[1-9])"
[ regex "(\\d{4})[\\-\\.\\/](0?[1-9]|1[0-2])[\\-\\.\\/](3[01]|[12]\\d|0?[1-9])"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (yy:mm:dd:_)):_) -> do
@ -1382,6 +1382,57 @@ ruleHhmm = Rule
_ -> Nothing
}
rule2400 :: Rule
rule2400 = Rule
{ name = "24:00"
, pattern =
[ regex "24[.:]00"
]
, prod = \_ -> tt $ hourMinute False 24 00
}
ruleDmm :: Rule
ruleDmm = Rule
{ name = "d/mm"
, pattern =
[ regex "([1-9])[\\/\\.\\-]([1-9]\\b|1[0-2]\\b)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
d <- parseInt dd
m <- parseInt mm
tt $ monthDay m d
_ -> Nothing
}
ruleDDm :: Rule
ruleDDm = Rule
{ name = "dd/m"
, pattern =
[ regex "([0-2]\\d|3[01])[\\/\\.\\-]([1-9])"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
d <- parseInt dd
m <- parseInt mm
tt $ monthDay m d
_ -> Nothing
}
ruleDdDotMm :: Rule
ruleDdDotMm = Rule
{ name = "dd.mm"
, pattern =
[ regex "(2[5-9]|3[01])\\.(0[1-9]|1[0-2])"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
d <- parseInt dd
m <- parseInt mm
tt $ monthDay m d
_ -> Nothing
}
ruleTonight :: Rule
ruleTonight = Rule
{ name = "tonight"
@ -1481,7 +1532,7 @@ ruleDdmmyyyy :: Rule
ruleDdmmyyyy = Rule
{ name = "dd/mm/yyyy"
, pattern =
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/-](0?[1-9]|1[0-2])[\\/-](\\d{2,4})"
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/\\-\\.](0?[1-9]|1[0-2])[\\/\\-\\.](\\d{2,4})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (dd:mm:yy:_)):_) -> do
@ -1791,6 +1842,10 @@ rules =
, ruleYesterday
, ruleYyyymmdd
, ruleTimezone
, rule2400
, ruleDmm
, ruleDDm
, ruleDdDotMm
]
++ ruleDaysOfWeek
++ ruleMonths