Time/HU: extend dates (#462)

Summary:
Extend Hungarian dates with new cases

Pull Request resolved: https://github.com/facebook/duckling/pull/462

Differential Revision: D25573636

Pulled By: chessai

fbshipit-source-id: 251698cf9f5126162ad4fbf1489dcbc4c12541ed
This commit is contained in:
Dubovinszky Péter 2021-08-11 13:21:43 -07:00 committed by Facebook GitHub Bot
parent 57dab83ad3
commit 0354f27ef4
3 changed files with 343 additions and 49 deletions

View File

@ -16,7 +16,6 @@ import Prelude
import Duckling.Locale
import Duckling.Resolve
import Duckling.Time.Corpus
import Duckling.Time.Types hiding (Month)
import Duckling.TimeGrain.Types hiding (add)
import Duckling.Testing.Types hiding (examples)
@ -28,62 +27,258 @@ allExamples = concat
[ examples (datetime (2013, 2, 12, 4, 30, 0) Second)
[ "most"
, "épp most"
, "azonnal"
, "mostani"
]
, examples (datetime (2013, 2, 12, 0, 0, 0) Day)
[ "ma"
, "máma"
, "Mai nap"
, "Mai napot"
, "mai napon"
, "mai"
, "mait"
, "mai napi"
, "mai napit"
]
, examples (datetime (2013, 2, 13, 0, 0, 0) Day)
[ "holnap"
, "holnapi"
, "holnapit"
]
, examples (datetime (2013, 2, 14, 0, 0, 0) Day)
[ "holnapután"
, "holnaputáni"
, "holnaputánit"
]
, examples (datetime (2013, 2, 11, 0, 0, 0) Day)
[ "tegnap"
, "tegnapi"
, "tegnapit"
]
, examples (datetime (2013, 2, 10, 0, 0, 0) Day)
[ "tegnapelőtt"
, "tegnapelőtti"
, "tegnapelőttit"
]
, examples (datetime (2013, 3, 1, 0, 0, 0) Month)
[ "hónap vége"
, "a hónap vége"
, "hó vége"
, "hó végi"
, "hó végit"
, "hó végén"
]
, examples (datetime (2014, 1, 1, 0, 0, 0) Year)
[ "év vége"
, "az év vége"
, "év végi"
, "év végit"
, "év végén"
]
, examples (datetime (2013, 2, 18, 0, 0, 0) Day)
[ "hétfő"
, "hét"
, "hétfőn"
, "hétfőt"
, "hétfői"
, "hétfőit"
, "hét."
, "hét"
]
, examples (datetime (2013, 2, 19, 0, 0, 0) Day)
[ "kedd"
, "kedden"
, "keddet"
, "keddi"
, "keddit"
]
, examples (datetime (2013, 2, 13, 0, 0, 0) Day)
[ "szerda"
, "szerdán"
, "szerdát"
, "szerdai"
, "szerdait"
, "szer"
, "szer."
]
, examples (datetime (2013, 2, 14, 0, 0, 0) Day)
[ "csütörtök"
, "csütörtökön"
, "csütörtököt"
, "csütörtöki"
, "csütörtökit"
, "csüt"
, "csüt."
]
, examples (datetime (2013, 2, 15, 0, 0, 0) Day)
[ "péntek"
, "pénteken"
, "pénteket"
, "pénteki"
, "péntekit"
, "pén"
, "pén."
]
, examples (datetime (2013, 2, 16, 0, 0, 0) Day)
[ "szombat"
, "szombaton"
, "szombatot"
, "szombati"
, "szombatit"
, "szom"
, "szom."
]
, examples (datetime (2013, 2, 17, 0, 0, 0) Day)
[ "vasárnap"
, "vasárnapot"
, "vasárnapi"
, "vasárnapit"
, "vas"
, "vas."
]
, examples (datetime (2014, 1, 1, 0, 0, 0) Month)
[ "január"
, "januárban"
, "januári"
, "januárit"
, "jan"
, "jan."
]
, examples (datetime (2013, 2, 1, 0, 0, 0) Month)
[ "február"
, "februárban"
, "februári"
, "februárit"
, "feb"
, "feb."
]
, examples (datetime (2013, 3, 1, 0, 0, 0) Month)
[ "március"
, "márciusban"
, "márciusi"
, "márciusit"
, "már"
, "már."
, "márc"
, "márc"
, "márc."
]
, examples (datetime (2013, 4, 1, 0, 0, 0) Month)
[ "április"
, "áprilisban"
, "áprilisi"
, "áprilisit"
, "ápr"
, "ápr."
]
, examples (datetime (2013, 5, 1, 0, 0, 0) Month)
[ "május"
, "májusban"
, "májusi"
, "májusit"
, "máj"
, "máj."
]
, examples (datetime (2013, 6, 1, 0, 0, 0) Month)
[ "június"
, "júniusban"
, "júniusi"
, "júniusit"
, "jún"
, "jún."
]
, examples (datetime (2013, 7, 1, 0, 0, 0) Month)
[ "július"
, "júliusban"
, "júliusi"
, "júliusit"
, "júl"
, "júl."
]
, examples (datetime (2013, 8, 1, 0, 0, 0) Month)
[ "augusztus"
, "augusztusban"
, "augusztusi"
, "augusztusit"
, "aug"
, "aug."
]
, examples (datetime (2013, 9, 1, 0, 0, 0) Month)
[ "szeptember"
, "szeptemberben"
, "szeptemberi"
, "szeptemberit"
, "szep"
, "szep."
, "szept"
, "szept."
]
, examples (datetime (2013, 10, 1, 0, 0, 0) Month)
[ "október"
, "októberben"
, "októberi"
, "októberit"
, "okt"
, "okt."
]
, examples (datetime (2013, 11, 1, 0, 0, 0) Month)
[ "november"
, "novemberben"
, "novemberi"
, "novemberit"
, "nov"
, "nov."
]
, examples (datetime (2013, 12, 1, 0, 0, 0) Month)
[ "december"
, "decemberben"
, "decemberi"
, "decemberit"
, "dec"
, "dec."
]
, examples (datetime (2013, 3, 15, 0, 0, 0) Day)
[ "március 15"
, "március 15."
, "már 15"
, "már. 15"
, "márc 15"
, "márc. 15"
]
, examples (datetime (2013, 3, 1, 0, 0, 0) Day)
[ "március 1-én"
, "március 1-jén"
, "március 1-je"
, "március 1-jei"
, "március 1-i"
, "március 1én"
, "március 1je"
, "március 1jei"
, "március 1i"
]
, examples (datetime (2013, 3, 2, 0, 0, 0) Day)
[ "március 2-án"
, "március 2-ai"
, "március 2-a"
, "március 2án"
, "március 2ai"
, "március 2a"
]
, examples (datetime (2013, 3, 4, 0, 0, 0) Day)
[ "március 4-e"
, "március 4e"
]
, examples (datetime (2013, 3, 7, 0, 0, 0) Day)
[ "március 7-ei"
, "március 7ei"
]
, examples (datetime (2013, 3, 15, 0, 0, 0) Day)
[ "március 15-én"
, "március 15-ei"
, "március 15-i"
, "március 15én"
, "március 15e"
, "március 15ei"
]
, examples (datetime (2013, 3, 1, 0, 0, 0) Month)
[ "következő hónap"
, "jövő hónap"
@ -150,6 +345,20 @@ allExamples = concat
, examples (datetimeInterval ((2013, 2, 12, 6, 0, 0), (2013, 2, 12, 10, 0, 0)) Hour)
[ "ma reggel"
, "reggel"
, "ma reggeli"
, "reggeli"
, "ma reggelit"
, "reggelit"
]
, examples (datetimeInterval ((2013, 2, 12, 8, 0, 0), (2013, 2, 12, 12, 0, 0)) Hour)
[ "délelőtt"
, "délelőtti"
, "délelőttit"
]
, examples (datetimeInterval ((2013, 2, 12, 12, 0, 0), (2013, 2, 12, 13, 0, 0)) Hour)
[ "délben"
, "déli"
, "délit"
]
, examples (datetimeInterval ((2013, 2, 13, 8, 0, 0), (2013, 2, 13, 12, 0, 0)) Hour)
[ "holnap délelőtt"
@ -160,16 +369,43 @@ allExamples = concat
, examples (datetimeInterval ((2013, 2, 12, 12, 0, 0), (2013, 2, 12, 18, 0, 0)) Hour)
[ "ma délután"
, "délután"
, "délutáni"
, "délutánit"
]
, examples (datetimeInterval ((2013, 2, 12, 16, 0, 0), (2013, 2, 12, 20, 0, 0)) Hour)
[ "ma este"
, "este"
, "esti"
, "estit"
]
, examples (datetimeInterval ((2013, 2, 12, 20, 0, 0), (2013, 2, 12, 23, 0, 0)) Hour)
[ "ma éjszaka"
, "éjszaka"
, "éjszakai"
, "éjszakait"
]
, examples (datetimeInterval ((2013, 6, 21, 0, 0, 0), (2013, 9, 24, 0, 0, 0)) Day)
[ "nyár"
, "nyári"
, "nyárit"
, "nyáron"
]
, examples (datetimeInterval ((2013, 9, 23, 0, 0, 0), (2013, 12, 22, 0, 0, 0)) Day)
[ "ősz"
, "őszi"
, "őszit"
, "ősszel"
]
, examples (datetimeInterval ((2012, 12, 21, 0, 0, 0), (2013, 3, 21, 0, 0, 0)) Day)
[ "tél"
, "téli"
, "télit"
, "télen"
]
, examples (datetimeInterval ((2013, 3, 20, 0, 0, 0), (2013, 6, 22, 0, 0, 0)) Day)
[ "tavasz"
, "tavaszi"
, "tavaszit"
, "tavasszal"
]
]

View File

@ -13,8 +13,6 @@ module Duckling.Time.HU.Rules
( rules ) where
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.Text as Text
@ -41,41 +39,41 @@ ruleIntersect = Rule
ruleInstants :: [Rule]
ruleInstants = mkRuleInstants
[ ("right now", TG.Second, 0 , "((\x00E9pp )?most)|azonnal" )
, ("today", TG.Day, 0 , "ma" )
, ("tomorrow", TG.Day, 1 , "holnap" )
, ("day after tomorrow", TG.Day, 2 , "holnaput\x00E1n" )
, ("yesterday", TG.Day, -1, "tegnap" )
, ("day before yesterday", TG.Day, -2, "tegnapel\x0151tt" )
, ("end of month", TG.Month, 1 , "(a )?h\x00F3nap v\x00E9ge" )
, ("end of year", TG.Year, 1 , "(az )?\x00E9v v\x00E9ge" )
[ ("right now", TG.Second, 0 , "mostani(t)?|((\x00E9pp )?most)|azonnal" )
, ("today", TG.Day, 0 , "m(á|a)(i( nap(o(t|n)|it?)?|t)?|ma)?" )
, ("tomorrow", TG.Day, 1 , "holnap(i(t)?)?" )
, ("day after tomorrow", TG.Day, 2 , "holnaput\x00E1n(i(t)?)?" )
, ("yesterday", TG.Day, -1, "tegnap(i(t)?)?" )
, ("day before yesterday", TG.Day, -2, "tegnapel\x0151tt(i(t)?)?" )
, ("end of month", TG.Month, 1 , "(a )?h\x00F3nap v\x00E9ge|h\x00F3 v\x00E9g(e|i(t)?|\x00E9n)" )
, ("end of year", TG.Year, 1 , "(az )?\x00E9v v\x00E9g(e|i(t)?|\x00E9n)" )
]
ruleDaysOfWeek :: [Rule]
ruleDaysOfWeek = mkRuleDaysOfWeek
[ ( "Monday" , "h\x00E9tf\x0151|h\x00E9t\\.?" )
, ( "Tuesday" , "kedd" )
, ( "Wednesday", "szerda|szer\\.?" )
, ( "Thursday" , "cs\x00FCt\x00F6rt\x00F6k|cs\x00FCt\\.?" )
, ( "Friday" , "p\x00E9ntek|p\x00E9n\\.?" )
, ( "Saturday" , "szombat|szom\\.?" )
, ( "Sunday" , "vas\x00E1rnap|vas\\.?" )
[ ( "Monday" , "h\x00E9tf\x0151(n|t|i(t)?)?|h\x00E9t\\.?" )
, ( "Tuesday" , "kedd(en|et|i(t)?)?" )
, ( "Wednesday", "szerda(i(t)?)?|szerdá(n|t)|szer\\.?" )
, ( "Thursday" , "cs\x00FCt\x00F6rt\x00F6k(\x00F6(n|t)|i(t)?)?|cs\x00FCt\\.?" )
, ( "Friday" , "p\x00E9ntek(e(n|t)|i(t)?)?|p\x00E9n\\.?" )
, ( "Saturday" , "szombat(o(n|t)|i(t)?)?|szom\\.?" )
, ( "Sunday" , "vas\x00E1rnap(ot|i(t)?)?|vas\\.?" )
]
ruleMonths :: [Rule]
ruleMonths = mkRuleMonths
[ ( "January" , "janu\x00E1r|jan\\.?" )
, ( "February" , "febru\x00E1r|febr?\\.?" )
, ( "March" , "m\x00E1rcius|m\x00E1rc?\\.?" )
, ( "April" , "\x00E1prilis|\x00E1pr\\.?" )
, ( "May" , "m\x00E1jus|m\x00E1j\\.?" )
, ( "June" , "j\x00FAnius|j\x00FAn\\.?" )
, ( "July" , "j\x00FAlius|j\x00FAl\\.?" )
, ( "August" , "augusztus|aug\\.?" )
, ( "September", "szeptember|szept?\\.?" )
, ( "October" , "okt\x00F3ber|okt\\.?" )
, ( "November" , "november|nov\\.?" )
, ( "December" , "december|dec\\.?" )
[ ( "January" , "janu\x00E1r(ban|i(t)?)?|jan\\.?" )
, ( "February" , "febru\x00E1r(ban|i(t)?)?|febr?\\.?" )
, ( "March" , "m\x00E1rcius(ban|i(t)?)?|m\x00E1rc?\\.?" )
, ( "April" , "\x00E1prilis(ban|i(t)?)?|\x00E1pr\\.?" )
, ( "May" , "m\x00E1jus(ban|i(t)?)?|m\x00E1j\\.?" )
, ( "June" , "j\x00FAnius(ban|i(t)?)?|j\x00FAn\\.?" )
, ( "July" , "j\x00FAlius(ban|i(t)?)?|j\x00FAl\\.?" )
, ( "August" , "augusztus(ban|i(t)?)?|aug\\.?" )
, ( "September", "szeptember(ben|i(t)?)?|szept?\\.?" )
, ( "October" , "október(ben|i(t)?)?|okt\\.?" )
, ( "November" , "november(ben|i(t)?)?|nov\\.?" )
, ( "December" , "december(ben|i(t)?)?|dec\\.?" )
]
ruleMonthDOMNumeral :: Rule
@ -90,6 +88,31 @@ ruleMonthDOMNumeral = Rule
_ -> Nothing
}
ruleMonthDOMNumeralSuffix :: Rule
ruleMonthDOMNumeralSuffix = Rule
{ name = "<named-month> <day-of-month> (non ordinal)"
, pattern =
[ Predicate isAMonth
, Predicate isDOMInteger
, regex "(-?(jei|jén|án|én|ai|ei|je|ji|a|e|i))"
]
, prod = \tokens -> case tokens of
(Token Time td:token:_) -> Token Time <$> intersectDOM td token
_ -> Nothing
}
ruleMonthDOMOrdinal :: Rule
ruleMonthDOMOrdinal = Rule
{ name = "<named-month> <day-of-month> (ordinal)"
, pattern =
[ Predicate isAMonth
, Predicate isDOMOrdinal
]
, prod = \tokens -> case tokens of
(Token Time td:token:_) -> Token Time <$> intersectDOM td token
_ -> Nothing
}
ruleCycleThisLastNext :: Rule
ruleCycleThisLastNext = Rule
{ name = "this|last|next <cycle>"
@ -220,17 +243,27 @@ rulePartOfDays :: Rule
rulePartOfDays = Rule
{ name = "part of days"
, pattern =
[ regex "(reggel|d\x00E9lel\x0151tt|d\x00E9lben|d\x00E9lut\x00E1n|este|\x00E9jszaka)"
[ regex "(reggel(i(t)?)?|d\x00E9lel\x0151tt(i(t)?)?|d\x00E9lben|d\x00E9li(t)?|d\x00E9lut\x00E1n(i(t)?)?|est(e|i(t)?)|\x00E9jszaka(i(t)?)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
let (start, end) = case Text.toLower match of
"reggel" -> (hour False 6, hour False 10)
"d\x00E9lel\x0151tt" -> (hour False 08, hour False 12)
"d\x00E9lben" -> (hour False 12, hour False 13)
"d\x00E9lut\x00E1n" -> (hour False 12, hour False 18)
"este" -> (hour False 16, hour False 20)
_ -> (hour False 20, hour False 23)
"reggel" -> (hour False 6, hour False 10)
"reggeli" -> (hour False 6, hour False 10)
"reggelit" -> (hour False 6, hour False 10)
"d\x00E9lel\x0151tt" -> (hour False 08, hour False 12)
"d\x00E9lel\x0151tti" -> (hour False 08, hour False 12)
"d\x00E9lel\x0151ttit" -> (hour False 08, hour False 12)
"d\x00E9lben" -> (hour False 12, hour False 13)
"d\x00E9li" -> (hour False 12, hour False 13)
"d\x00E9lit" -> (hour False 12, hour False 13)
"d\x00E9lut\x00E1n" -> (hour False 12, hour False 18)
"d\x00E9lut\x00E1ni" -> (hour False 12, hour False 18)
"d\x00E9lut\x00E1nit" -> (hour False 12, hour False 18)
"este" -> (hour False 16, hour False 20)
"esti" -> (hour False 16, hour False 20)
"estit" -> (hour False 16, hour False 20)
_ -> (hour False 20, hour False 23)
td <- interval TTime.Open start end
tt . partOfDay $ td
_ -> Nothing
@ -253,21 +286,45 @@ ruleSeasons :: Rule
ruleSeasons = Rule
{ name = "seasons"
, pattern =
[ regex "(ny\x00E1r|\x0151sz|t\x00E9l|tavasz)"
[ regex "(ny\x00E1r(i(t)?|on)?|\x0151sz(i(t)?)?|\x0151sszel|t\x00E9l(i(t)?|en)?|tavasz(i(t)?)?|tavasszal)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
start <- case Text.toLower match of
"ny\x00E1r" -> Just $ monthDay 6 21
"\x0151sz" -> Just $ monthDay 9 23
"t\x00E9l" -> Just $ monthDay 12 21
"tavasz" -> Just $ monthDay 3 20
"ny\x00E1r" -> Just $ monthDay 6 21
"ny\x00E1ri" -> Just $ monthDay 6 21
"ny\x00E1rit" -> Just $ monthDay 6 21
"ny\x00E1ron" -> Just $ monthDay 6 21
"\x0151sz" -> Just $ monthDay 9 23
"\x0151szi" -> Just $ monthDay 9 23
"\x0151szit" -> Just $ monthDay 9 23
"\x0151sszel" -> Just $ monthDay 9 23
"t\x00E9l" -> Just $ monthDay 12 21
"t\x00E9li" -> Just $ monthDay 12 21
"t\x00E9lit" -> Just $ monthDay 12 21
"t\x00E9len" -> Just $ monthDay 12 21
"tavasz" -> Just $ monthDay 3 20
"tavaszi" -> Just $ monthDay 3 20
"tavaszit" -> Just $ monthDay 3 20
"tavasszal" -> Just $ monthDay 3 20
_ -> Nothing
end <- case Text.toLower match of
"ny\x00E1r" -> Just $ monthDay 9 23
"\x0151sz" -> Just $ monthDay 12 21
"t\x00E9l" -> Just $ monthDay 3 20
"tavasz" -> Just $ monthDay 6 21
"ny\x00E1r" -> Just $ monthDay 9 23
"ny\x00E1ri" -> Just $ monthDay 9 23
"ny\x00E1rit" -> Just $ monthDay 9 23
"ny\x00E1ron" -> Just $ monthDay 9 23
"\x0151sz" -> Just $ monthDay 12 21
"\x0151szi" -> Just $ monthDay 12 21
"\x0151szit" -> Just $ monthDay 12 21
"\x0151sszel" -> Just $ monthDay 12 21
"t\x00E9l" -> Just $ monthDay 3 20
"t\x00E9li" -> Just $ monthDay 3 20
"t\x00E9lit" -> Just $ monthDay 3 20
"t\x00E9len" -> Just $ monthDay 3 20
"tavasz" -> Just $ monthDay 6 21
"tavaszi" -> Just $ monthDay 6 21
"tavaszit" -> Just $ monthDay 6 21
"tavasszal" -> Just $ monthDay 6 21
_ -> Nothing
Token Time <$> interval TTime.Open start end
_ -> Nothing
@ -278,6 +335,8 @@ rules :: [Rule]
rules =
[ ruleIntersect
, ruleMonthDOMNumeral
, ruleMonthDOMNumeralSuffix
, ruleMonthDOMOrdinal
, ruleCycleThisLastNext
, ruleNextDOW
, ruleHHMM

View File

@ -8,7 +8,6 @@ module Duckling.Time.HU.Tests
( tests ) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types