mirror of
https://github.com/facebook/duckling.git
synced 2025-01-06 04:53:13 +03:00
Parse more date formats in Norwegian (#395)
Summary: In general there are some clashes between time formats `hhmm` and date formats `ddmm`. For example, depending on context, `22.10` can mean clock time ten past ten or the twenty second of october. In general it's correct to interpret this as clock time, as Duckling currently does. But there are some cases not currently covered by Duckling where we have more unambiguous dates, e.g. `12.03.2018` and `27.11`. These are included here (in addition to midnight `24:00` which was also missing). #### Changes: - Bug in `ruleDdmm` regex meant that dates on the format `dd/mm` where `mm > 9` were not parsed - `ruleYyyymmdd` now also parses dots and forward slashes, i.e. `2012.05.14` and `2012/05/14` - New rule `rule2400` parses `24:00` and `24.00` (I elected not to include it in `ruleMidnighteodendOfDay` as it has grain minute rather than day) - New rule `ruleDmm` parses `1/10`, `9.12` etc - New rule `ruleDDm` parses `10/3`, `11.1` etc - New rule `ruleDdDotMm` parses `25.02`, `31.10` etc - `ruleDdmmyyyy` now also parses dots, i.e. `03.10.1983` - New tests Pull Request resolved: https://github.com/facebook/duckling/pull/395 Reviewed By: patapizza Differential Revision: D26193069 Pulled By: chessai fbshipit-source-id: cf711807fa1d40be2303f2426d74ded40c2e23b3
This commit is contained in:
parent
16708d9572
commit
788f63eeac
@ -81,6 +81,11 @@ allExamples = concat
|
||||
, "den tredje mars"
|
||||
, "den 3. mars"
|
||||
]
|
||||
, examples (datetime (2013, 10, 30, 0, 0, 0) Day)
|
||||
[ "30.10"
|
||||
, "30/10"
|
||||
, "30-10"
|
||||
]
|
||||
, examples (datetime (2015, 3, 3, 0, 0, 0) Day)
|
||||
[ "3 mars 2015"
|
||||
, "tredje mars 2015"
|
||||
@ -91,6 +96,9 @@ allExamples = concat
|
||||
, "3/3/15"
|
||||
, "2015-3-3"
|
||||
, "2015-03-03"
|
||||
, "2015.03.03"
|
||||
, "03/03/2015"
|
||||
, "03.03.2015"
|
||||
]
|
||||
, examples (datetime (2013, 2, 15, 0, 0, 0) Day)
|
||||
[ "På den 15."
|
||||
@ -104,6 +112,15 @@ allExamples = concat
|
||||
, "februar 15"
|
||||
, "15-02"
|
||||
, "15/02"
|
||||
, "15.2"
|
||||
]
|
||||
, examples (datetime (2013, 11, 27, 0, 0, 0) Day)
|
||||
[ "den 27. november"
|
||||
, "27. november"
|
||||
, "november 27"
|
||||
, "27-11"
|
||||
, "27/11"
|
||||
, "27.11"
|
||||
]
|
||||
, examples (datetime (2013, 8, 8, 0, 0, 0) Day)
|
||||
[ "8 Aug"
|
||||
@ -113,8 +130,13 @@ allExamples = concat
|
||||
]
|
||||
, examples (datetime (1974, 10, 31, 0, 0, 0) Day)
|
||||
[ "31/10/1974"
|
||||
, "31.10.1974"
|
||||
, "31/10/74"
|
||||
, "31-10-74"
|
||||
, "31.10.74"
|
||||
, "1974/10/31"
|
||||
, "1974-10-31"
|
||||
, "1974.10.31"
|
||||
]
|
||||
, examples (datetime (2015, 4, 14, 0, 0, 0) Day)
|
||||
[ "14april 2015"
|
||||
|
@ -612,7 +612,7 @@ ruleDdmm :: Rule
|
||||
ruleDdmm = Rule
|
||||
{ name = "dd/mm"
|
||||
, pattern =
|
||||
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/-](0?[1-9]|1[0-2])"
|
||||
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/-](0?[1-9]\\b|1[0-2])"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
|
||||
@ -1100,7 +1100,7 @@ ruleYyyymmdd :: Rule
|
||||
ruleYyyymmdd = Rule
|
||||
{ name = "yyyy-mm-dd"
|
||||
, pattern =
|
||||
[ regex "(\\d{2,4})-(0?[1-9]|1[0-2])-(3[01]|[12]\\d|0?[1-9])"
|
||||
[ regex "(\\d{4})[\\-\\.\\/](0?[1-9]|1[0-2])[\\-\\.\\/](3[01]|[12]\\d|0?[1-9])"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (yy:mm:dd:_)):_) -> do
|
||||
@ -1382,6 +1382,57 @@ ruleHhmm = Rule
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
rule2400 :: Rule
|
||||
rule2400 = Rule
|
||||
{ name = "24:00"
|
||||
, pattern =
|
||||
[ regex "24[.:]00"
|
||||
]
|
||||
, prod = \_ -> tt $ hourMinute False 24 00
|
||||
}
|
||||
|
||||
ruleDmm :: Rule
|
||||
ruleDmm = Rule
|
||||
{ name = "d/mm"
|
||||
, pattern =
|
||||
[ regex "([1-9])[\\/\\.\\-]([1-9]\\b|1[0-2]\\b)"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
|
||||
d <- parseInt dd
|
||||
m <- parseInt mm
|
||||
tt $ monthDay m d
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleDDm :: Rule
|
||||
ruleDDm = Rule
|
||||
{ name = "dd/m"
|
||||
, pattern =
|
||||
[ regex "([0-2]\\d|3[01])[\\/\\.\\-]([1-9])"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
|
||||
d <- parseInt dd
|
||||
m <- parseInt mm
|
||||
tt $ monthDay m d
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleDdDotMm :: Rule
|
||||
ruleDdDotMm = Rule
|
||||
{ name = "dd.mm"
|
||||
, pattern =
|
||||
[ regex "(2[5-9]|3[01])\\.(0[1-9]|1[0-2])"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (dd:mm:_)):_) -> do
|
||||
d <- parseInt dd
|
||||
m <- parseInt mm
|
||||
tt $ monthDay m d
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleTonight :: Rule
|
||||
ruleTonight = Rule
|
||||
{ name = "tonight"
|
||||
@ -1481,7 +1532,7 @@ ruleDdmmyyyy :: Rule
|
||||
ruleDdmmyyyy = Rule
|
||||
{ name = "dd/mm/yyyy"
|
||||
, pattern =
|
||||
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/-](0?[1-9]|1[0-2])[\\/-](\\d{2,4})"
|
||||
[ regex "(3[01]|[12]\\d|0?[1-9])[\\/\\-\\.](0?[1-9]|1[0-2])[\\/\\-\\.](\\d{2,4})"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (dd:mm:yy:_)):_) -> do
|
||||
@ -1791,6 +1842,10 @@ rules =
|
||||
, ruleYesterday
|
||||
, ruleYyyymmdd
|
||||
, ruleTimezone
|
||||
, rule2400
|
||||
, ruleDmm
|
||||
, ruleDDm
|
||||
, ruleDdDotMm
|
||||
]
|
||||
++ ruleDaysOfWeek
|
||||
++ ruleMonths
|
||||
|
Loading…
Reference in New Issue
Block a user