diff --git a/Duckling/Time/HU/Corpus.hs b/Duckling/Time/HU/Corpus.hs index 23173197..52a43752 100644 --- a/Duckling/Time/HU/Corpus.hs +++ b/Duckling/Time/HU/Corpus.hs @@ -16,7 +16,6 @@ import Prelude import Duckling.Locale import Duckling.Resolve import Duckling.Time.Corpus -import Duckling.Time.Types hiding (Month) import Duckling.TimeGrain.Types hiding (add) import Duckling.Testing.Types hiding (examples) @@ -28,62 +27,258 @@ allExamples = concat [ examples (datetime (2013, 2, 12, 4, 30, 0) Second) [ "most" , "épp most" + , "azonnal" + , "mostani" ] , examples (datetime (2013, 2, 12, 0, 0, 0) Day) [ "ma" + , "máma" + , "Mai nap" + , "Mai napot" + , "mai napon" + , "mai" + , "mait" + , "mai napi" + , "mai napit" ] , examples (datetime (2013, 2, 13, 0, 0, 0) Day) [ "holnap" + , "holnapi" + , "holnapit" ] , examples (datetime (2013, 2, 14, 0, 0, 0) Day) [ "holnapután" + , "holnaputáni" + , "holnaputánit" ] , examples (datetime (2013, 2, 11, 0, 0, 0) Day) [ "tegnap" + , "tegnapi" + , "tegnapit" ] , examples (datetime (2013, 2, 10, 0, 0, 0) Day) [ "tegnapelőtt" + , "tegnapelőtti" + , "tegnapelőttit" ] , examples (datetime (2013, 3, 1, 0, 0, 0) Month) [ "hónap vége" , "a hónap vége" + , "hó vége" + , "hó végi" + , "hó végit" + , "hó végén" ] , examples (datetime (2014, 1, 1, 0, 0, 0) Year) [ "év vége" , "az év vége" + , "év végi" + , "év végit" + , "év végén" ] , examples (datetime (2013, 2, 18, 0, 0, 0) Day) [ "hétfő" - , "hét" + , "hétfőn" + , "hétfőt" + , "hétfői" + , "hétfőit" , "hét." + , "hét" ] , examples (datetime (2013, 2, 19, 0, 0, 0) Day) [ "kedd" + , "kedden" + , "keddet" + , "keddi" + , "keddit" + ] + , examples (datetime (2013, 2, 13, 0, 0, 0) Day) + [ "szerda" + , "szerdán" + , "szerdát" + , "szerdai" + , "szerdait" + , "szer" + , "szer." + ] + , examples (datetime (2013, 2, 14, 0, 0, 0) Day) + [ "csütörtök" + , "csütörtökön" + , "csütörtököt" + , "csütörtöki" + , "csütörtökit" + , "csüt" + , "csüt." + ] + , examples (datetime (2013, 2, 15, 0, 0, 0) Day) + [ "péntek" + , "pénteken" + , "pénteket" + , "pénteki" + , "péntekit" + , "pén" + , "pén." + ] + , examples (datetime (2013, 2, 16, 0, 0, 0) Day) + [ "szombat" + , "szombaton" + , "szombatot" + , "szombati" + , "szombatit" + , "szom" + , "szom." ] , examples (datetime (2013, 2, 17, 0, 0, 0) Day) [ "vasárnap" + , "vasárnapot" + , "vasárnapi" + , "vasárnapit" , "vas" , "vas." ] , examples (datetime (2014, 1, 1, 0, 0, 0) Month) [ "január" + , "januárban" + , "januári" + , "januárit" , "jan" , "jan." ] + , examples (datetime (2013, 2, 1, 0, 0, 0) Month) + [ "február" + , "februárban" + , "februári" + , "februárit" + , "feb" + , "feb." + ] , examples (datetime (2013, 3, 1, 0, 0, 0) Month) [ "március" + , "márciusban" + , "márciusi" + , "márciusit" , "már" , "már." , "márc" - , "márc" + , "márc." + ] + , examples (datetime (2013, 4, 1, 0, 0, 0) Month) + [ "április" + , "áprilisban" + , "áprilisi" + , "áprilisit" + , "ápr" + , "ápr." + ] + , examples (datetime (2013, 5, 1, 0, 0, 0) Month) + [ "május" + , "májusban" + , "májusi" + , "májusit" + , "máj" + , "máj." + ] + , examples (datetime (2013, 6, 1, 0, 0, 0) Month) + [ "június" + , "júniusban" + , "júniusi" + , "júniusit" + , "jún" + , "jún." + ] + , examples (datetime (2013, 7, 1, 0, 0, 0) Month) + [ "július" + , "júliusban" + , "júliusi" + , "júliusit" + , "júl" + , "júl." + ] + , examples (datetime (2013, 8, 1, 0, 0, 0) Month) + [ "augusztus" + , "augusztusban" + , "augusztusi" + , "augusztusit" + , "aug" + , "aug." + ] + , examples (datetime (2013, 9, 1, 0, 0, 0) Month) + [ "szeptember" + , "szeptemberben" + , "szeptemberi" + , "szeptemberit" + , "szep" + , "szep." + , "szept" + , "szept." + ] + , examples (datetime (2013, 10, 1, 0, 0, 0) Month) + [ "október" + , "októberben" + , "októberi" + , "októberit" + , "okt" + , "okt." + ] + , examples (datetime (2013, 11, 1, 0, 0, 0) Month) + [ "november" + , "novemberben" + , "novemberi" + , "novemberit" + , "nov" + , "nov." + ] + , examples (datetime (2013, 12, 1, 0, 0, 0) Month) + [ "december" + , "decemberben" + , "decemberi" + , "decemberit" + , "dec" + , "dec." ] , examples (datetime (2013, 3, 15, 0, 0, 0) Day) [ "március 15" + , "március 15." , "már 15" , "már. 15" , "márc 15" , "márc. 15" ] + , examples (datetime (2013, 3, 1, 0, 0, 0) Day) + [ "március 1-én" + , "március 1-jén" + , "március 1-je" + , "március 1-jei" + , "március 1-i" + , "március 1én" + , "március 1je" + , "március 1jei" + , "március 1i" + ] + , examples (datetime (2013, 3, 2, 0, 0, 0) Day) + [ "március 2-án" + , "március 2-ai" + , "március 2-a" + , "március 2án" + , "március 2ai" + , "március 2a" + ] + , examples (datetime (2013, 3, 4, 0, 0, 0) Day) + [ "március 4-e" + , "március 4e" + ] + , examples (datetime (2013, 3, 7, 0, 0, 0) Day) + [ "március 7-ei" + , "március 7ei" + ] + , examples (datetime (2013, 3, 15, 0, 0, 0) Day) + [ "március 15-én" + , "március 15-ei" + , "március 15-i" + , "március 15én" + , "március 15e" + , "március 15ei" + ] , examples (datetime (2013, 3, 1, 0, 0, 0) Month) [ "következő hónap" , "jövő hónap" @@ -150,6 +345,20 @@ allExamples = concat , examples (datetimeInterval ((2013, 2, 12, 6, 0, 0), (2013, 2, 12, 10, 0, 0)) Hour) [ "ma reggel" , "reggel" + , "ma reggeli" + , "reggeli" + , "ma reggelit" + , "reggelit" + ] + , examples (datetimeInterval ((2013, 2, 12, 8, 0, 0), (2013, 2, 12, 12, 0, 0)) Hour) + [ "délelőtt" + , "délelőtti" + , "délelőttit" + ] + , examples (datetimeInterval ((2013, 2, 12, 12, 0, 0), (2013, 2, 12, 13, 0, 0)) Hour) + [ "délben" + , "déli" + , "délit" ] , examples (datetimeInterval ((2013, 2, 13, 8, 0, 0), (2013, 2, 13, 12, 0, 0)) Hour) [ "holnap délelőtt" @@ -160,16 +369,43 @@ allExamples = concat , examples (datetimeInterval ((2013, 2, 12, 12, 0, 0), (2013, 2, 12, 18, 0, 0)) Hour) [ "ma délután" , "délután" + , "délutáni" + , "délutánit" ] , examples (datetimeInterval ((2013, 2, 12, 16, 0, 0), (2013, 2, 12, 20, 0, 0)) Hour) [ "ma este" , "este" + , "esti" + , "estit" ] , examples (datetimeInterval ((2013, 2, 12, 20, 0, 0), (2013, 2, 12, 23, 0, 0)) Hour) [ "ma éjszaka" , "éjszaka" + , "éjszakai" + , "éjszakait" ] , examples (datetimeInterval ((2013, 6, 21, 0, 0, 0), (2013, 9, 24, 0, 0, 0)) Day) [ "nyár" + , "nyári" + , "nyárit" + , "nyáron" + ] + , examples (datetimeInterval ((2013, 9, 23, 0, 0, 0), (2013, 12, 22, 0, 0, 0)) Day) + [ "ősz" + , "őszi" + , "őszit" + , "ősszel" + ] + , examples (datetimeInterval ((2012, 12, 21, 0, 0, 0), (2013, 3, 21, 0, 0, 0)) Day) + [ "tél" + , "téli" + , "télit" + , "télen" + ] + , examples (datetimeInterval ((2013, 3, 20, 0, 0, 0), (2013, 6, 22, 0, 0, 0)) Day) + [ "tavasz" + , "tavaszi" + , "tavaszit" + , "tavasszal" ] ] diff --git a/Duckling/Time/HU/Rules.hs b/Duckling/Time/HU/Rules.hs index e2889f52..e1775c1e 100644 --- a/Duckling/Time/HU/Rules.hs +++ b/Duckling/Time/HU/Rules.hs @@ -13,8 +13,6 @@ module Duckling.Time.HU.Rules ( rules ) where import Data.Maybe -import Data.String -import Data.Text (Text) import Prelude import qualified Data.Text as Text @@ -41,41 +39,41 @@ ruleIntersect = Rule ruleInstants :: [Rule] ruleInstants = mkRuleInstants - [ ("right now", TG.Second, 0 , "((\x00E9pp )?most)|azonnal" ) - , ("today", TG.Day, 0 , "ma" ) - , ("tomorrow", TG.Day, 1 , "holnap" ) - , ("day after tomorrow", TG.Day, 2 , "holnaput\x00E1n" ) - , ("yesterday", TG.Day, -1, "tegnap" ) - , ("day before yesterday", TG.Day, -2, "tegnapel\x0151tt" ) - , ("end of month", TG.Month, 1 , "(a )?h\x00F3nap v\x00E9ge" ) - , ("end of year", TG.Year, 1 , "(az )?\x00E9v v\x00E9ge" ) + [ ("right now", TG.Second, 0 , "mostani(t)?|((\x00E9pp )?most)|azonnal" ) + , ("today", TG.Day, 0 , "m(á|a)(i( nap(o(t|n)|it?)?|t)?|ma)?" ) + , ("tomorrow", TG.Day, 1 , "holnap(i(t)?)?" ) + , ("day after tomorrow", TG.Day, 2 , "holnaput\x00E1n(i(t)?)?" ) + , ("yesterday", TG.Day, -1, "tegnap(i(t)?)?" ) + , ("day before yesterday", TG.Day, -2, "tegnapel\x0151tt(i(t)?)?" ) + , ("end of month", TG.Month, 1 , "(a )?h\x00F3nap v\x00E9ge|h\x00F3 v\x00E9g(e|i(t)?|\x00E9n)" ) + , ("end of year", TG.Year, 1 , "(az )?\x00E9v v\x00E9g(e|i(t)?|\x00E9n)" ) ] ruleDaysOfWeek :: [Rule] ruleDaysOfWeek = mkRuleDaysOfWeek - [ ( "Monday" , "h\x00E9tf\x0151|h\x00E9t\\.?" ) - , ( "Tuesday" , "kedd" ) - , ( "Wednesday", "szerda|szer\\.?" ) - , ( "Thursday" , "cs\x00FCt\x00F6rt\x00F6k|cs\x00FCt\\.?" ) - , ( "Friday" , "p\x00E9ntek|p\x00E9n\\.?" ) - , ( "Saturday" , "szombat|szom\\.?" ) - , ( "Sunday" , "vas\x00E1rnap|vas\\.?" ) + [ ( "Monday" , "h\x00E9tf\x0151(n|t|i(t)?)?|h\x00E9t\\.?" ) + , ( "Tuesday" , "kedd(en|et|i(t)?)?" ) + , ( "Wednesday", "szerda(i(t)?)?|szerdá(n|t)|szer\\.?" ) + , ( "Thursday" , "cs\x00FCt\x00F6rt\x00F6k(\x00F6(n|t)|i(t)?)?|cs\x00FCt\\.?" ) + , ( "Friday" , "p\x00E9ntek(e(n|t)|i(t)?)?|p\x00E9n\\.?" ) + , ( "Saturday" , "szombat(o(n|t)|i(t)?)?|szom\\.?" ) + , ( "Sunday" , "vas\x00E1rnap(ot|i(t)?)?|vas\\.?" ) ] ruleMonths :: [Rule] ruleMonths = mkRuleMonths - [ ( "January" , "janu\x00E1r|jan\\.?" ) - , ( "February" , "febru\x00E1r|febr?\\.?" ) - , ( "March" , "m\x00E1rcius|m\x00E1rc?\\.?" ) - , ( "April" , "\x00E1prilis|\x00E1pr\\.?" ) - , ( "May" , "m\x00E1jus|m\x00E1j\\.?" ) - , ( "June" , "j\x00FAnius|j\x00FAn\\.?" ) - , ( "July" , "j\x00FAlius|j\x00FAl\\.?" ) - , ( "August" , "augusztus|aug\\.?" ) - , ( "September", "szeptember|szept?\\.?" ) - , ( "October" , "okt\x00F3ber|okt\\.?" ) - , ( "November" , "november|nov\\.?" ) - , ( "December" , "december|dec\\.?" ) + [ ( "January" , "janu\x00E1r(ban|i(t)?)?|jan\\.?" ) + , ( "February" , "febru\x00E1r(ban|i(t)?)?|febr?\\.?" ) + , ( "March" , "m\x00E1rcius(ban|i(t)?)?|m\x00E1rc?\\.?" ) + , ( "April" , "\x00E1prilis(ban|i(t)?)?|\x00E1pr\\.?" ) + , ( "May" , "m\x00E1jus(ban|i(t)?)?|m\x00E1j\\.?" ) + , ( "June" , "j\x00FAnius(ban|i(t)?)?|j\x00FAn\\.?" ) + , ( "July" , "j\x00FAlius(ban|i(t)?)?|j\x00FAl\\.?" ) + , ( "August" , "augusztus(ban|i(t)?)?|aug\\.?" ) + , ( "September", "szeptember(ben|i(t)?)?|szept?\\.?" ) + , ( "October" , "október(ben|i(t)?)?|okt\\.?" ) + , ( "November" , "november(ben|i(t)?)?|nov\\.?" ) + , ( "December" , "december(ben|i(t)?)?|dec\\.?" ) ] ruleMonthDOMNumeral :: Rule @@ -90,6 +88,31 @@ ruleMonthDOMNumeral = Rule _ -> Nothing } +ruleMonthDOMNumeralSuffix :: Rule +ruleMonthDOMNumeralSuffix = Rule + { name = " (non ordinal)" + , pattern = + [ Predicate isAMonth + , Predicate isDOMInteger + , regex "(-?(jei|jén|án|én|ai|ei|je|ji|a|e|i))" + ] + , prod = \tokens -> case tokens of + (Token Time td:token:_) -> Token Time <$> intersectDOM td token + _ -> Nothing + } + +ruleMonthDOMOrdinal :: Rule +ruleMonthDOMOrdinal = Rule + { name = " (ordinal)" + , pattern = + [ Predicate isAMonth + , Predicate isDOMOrdinal + ] + , prod = \tokens -> case tokens of + (Token Time td:token:_) -> Token Time <$> intersectDOM td token + _ -> Nothing + } + ruleCycleThisLastNext :: Rule ruleCycleThisLastNext = Rule { name = "this|last|next " @@ -220,17 +243,27 @@ rulePartOfDays :: Rule rulePartOfDays = Rule { name = "part of days" , pattern = - [ regex "(reggel|d\x00E9lel\x0151tt|d\x00E9lben|d\x00E9lut\x00E1n|este|\x00E9jszaka)" + [ regex "(reggel(i(t)?)?|d\x00E9lel\x0151tt(i(t)?)?|d\x00E9lben|d\x00E9li(t)?|d\x00E9lut\x00E1n(i(t)?)?|est(e|i(t)?)|\x00E9jszaka(i(t)?)?)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> do let (start, end) = case Text.toLower match of - "reggel" -> (hour False 6, hour False 10) - "d\x00E9lel\x0151tt" -> (hour False 08, hour False 12) - "d\x00E9lben" -> (hour False 12, hour False 13) - "d\x00E9lut\x00E1n" -> (hour False 12, hour False 18) - "este" -> (hour False 16, hour False 20) - _ -> (hour False 20, hour False 23) + "reggel" -> (hour False 6, hour False 10) + "reggeli" -> (hour False 6, hour False 10) + "reggelit" -> (hour False 6, hour False 10) + "d\x00E9lel\x0151tt" -> (hour False 08, hour False 12) + "d\x00E9lel\x0151tti" -> (hour False 08, hour False 12) + "d\x00E9lel\x0151ttit" -> (hour False 08, hour False 12) + "d\x00E9lben" -> (hour False 12, hour False 13) + "d\x00E9li" -> (hour False 12, hour False 13) + "d\x00E9lit" -> (hour False 12, hour False 13) + "d\x00E9lut\x00E1n" -> (hour False 12, hour False 18) + "d\x00E9lut\x00E1ni" -> (hour False 12, hour False 18) + "d\x00E9lut\x00E1nit" -> (hour False 12, hour False 18) + "este" -> (hour False 16, hour False 20) + "esti" -> (hour False 16, hour False 20) + "estit" -> (hour False 16, hour False 20) + _ -> (hour False 20, hour False 23) td <- interval TTime.Open start end tt . partOfDay $ td _ -> Nothing @@ -253,21 +286,45 @@ ruleSeasons :: Rule ruleSeasons = Rule { name = "seasons" , pattern = - [ regex "(ny\x00E1r|\x0151sz|t\x00E9l|tavasz)" + [ regex "(ny\x00E1r(i(t)?|on)?|\x0151sz(i(t)?)?|\x0151sszel|t\x00E9l(i(t)?|en)?|tavasz(i(t)?)?|tavasszal)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> do start <- case Text.toLower match of - "ny\x00E1r" -> Just $ monthDay 6 21 - "\x0151sz" -> Just $ monthDay 9 23 - "t\x00E9l" -> Just $ monthDay 12 21 - "tavasz" -> Just $ monthDay 3 20 + "ny\x00E1r" -> Just $ monthDay 6 21 + "ny\x00E1ri" -> Just $ monthDay 6 21 + "ny\x00E1rit" -> Just $ monthDay 6 21 + "ny\x00E1ron" -> Just $ monthDay 6 21 + "\x0151sz" -> Just $ monthDay 9 23 + "\x0151szi" -> Just $ monthDay 9 23 + "\x0151szit" -> Just $ monthDay 9 23 + "\x0151sszel" -> Just $ monthDay 9 23 + "t\x00E9l" -> Just $ monthDay 12 21 + "t\x00E9li" -> Just $ monthDay 12 21 + "t\x00E9lit" -> Just $ monthDay 12 21 + "t\x00E9len" -> Just $ monthDay 12 21 + "tavasz" -> Just $ monthDay 3 20 + "tavaszi" -> Just $ monthDay 3 20 + "tavaszit" -> Just $ monthDay 3 20 + "tavasszal" -> Just $ monthDay 3 20 _ -> Nothing end <- case Text.toLower match of - "ny\x00E1r" -> Just $ monthDay 9 23 - "\x0151sz" -> Just $ monthDay 12 21 - "t\x00E9l" -> Just $ monthDay 3 20 - "tavasz" -> Just $ monthDay 6 21 + "ny\x00E1r" -> Just $ monthDay 9 23 + "ny\x00E1ri" -> Just $ monthDay 9 23 + "ny\x00E1rit" -> Just $ monthDay 9 23 + "ny\x00E1ron" -> Just $ monthDay 9 23 + "\x0151sz" -> Just $ monthDay 12 21 + "\x0151szi" -> Just $ monthDay 12 21 + "\x0151szit" -> Just $ monthDay 12 21 + "\x0151sszel" -> Just $ monthDay 12 21 + "t\x00E9l" -> Just $ monthDay 3 20 + "t\x00E9li" -> Just $ monthDay 3 20 + "t\x00E9lit" -> Just $ monthDay 3 20 + "t\x00E9len" -> Just $ monthDay 3 20 + "tavasz" -> Just $ monthDay 6 21 + "tavaszi" -> Just $ monthDay 6 21 + "tavaszit" -> Just $ monthDay 6 21 + "tavasszal" -> Just $ monthDay 6 21 _ -> Nothing Token Time <$> interval TTime.Open start end _ -> Nothing @@ -278,6 +335,8 @@ rules :: [Rule] rules = [ ruleIntersect , ruleMonthDOMNumeral + , ruleMonthDOMNumeralSuffix + , ruleMonthDOMOrdinal , ruleCycleThisLastNext , ruleNextDOW , ruleHHMM diff --git a/tests/Duckling/Time/HU/Tests.hs b/tests/Duckling/Time/HU/Tests.hs index 966c1089..49b97e7a 100644 --- a/tests/Duckling/Time/HU/Tests.hs +++ b/tests/Duckling/Time/HU/Tests.hs @@ -8,7 +8,6 @@ module Duckling.Time.HU.Tests ( tests ) where import Data.String -import Prelude import Test.Tasty import Duckling.Dimensions.Types