From 4878820294a687f960d36f211fc7936aa4c1e992 Mon Sep 17 00:00:00 2001 From: evjava Date: Fri, 4 Jun 2021 12:16:57 -0700 Subject: [PATCH] Russian(RU) numeral and ordinal improvements (#374) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: - added non-typo variant for 11 (одиннадцать) - added variants for grammatical cases Pull Request resolved: https://github.com/facebook/duckling/pull/374 Test Plan: ``` :test Duckling.Numeral.RU.Tests :test Duckling.Ordinal.RU.Tests ``` Reviewed By: stroxler Differential Revision: D20332223 Pulled By: chessai fbshipit-source-id: be1c6f6477af56418b69da21f5219ba27b50d0a1 --- Duckling/Numeral/RU/Corpus.hs | 1 + Duckling/Numeral/RU/Rules.hs | 34 ++++++++++++++++++++++++++++++++++ Duckling/Ordinal/RU/Rules.hs | 4 ++-- 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/Duckling/Numeral/RU/Corpus.hs b/Duckling/Numeral/RU/Corpus.hs index c567536d..ebe69656 100644 --- a/Duckling/Numeral/RU/Corpus.hs +++ b/Duckling/Numeral/RU/Corpus.hs @@ -41,6 +41,7 @@ allExamples = concat [ "3" , "три" , "03" + , "трех" ] , examples (NumeralValue 4) [ "4" diff --git a/Duckling/Numeral/RU/Rules.hs b/Duckling/Numeral/RU/Rules.hs index 26943a04..d196f835 100644 --- a/Duckling/Numeral/RU/Rules.hs +++ b/Duckling/Numeral/RU/Rules.hs @@ -232,6 +232,27 @@ threeToNineteenMap = HashMap.fromList , ( "девятнадцать", 19) ] +threeToNineteenMapGenitive :: HashMap Text Integer +threeToNineteenMapGenitive = HashMap.fromList + [ ( "трех", 3) + , ( "четырех", 4) + , ( "пяти", 5) + , ( "шести", 6) + , ( "семи", 7) + , ( "восьми", 8) + , ( "девяти", 9) + , ( "десяти", 10) + , ( "одиннадцати", 11) + , ( "двенадцати", 12) + , ( "тринадцати", 13) + , ( "четырнадцати", 14) + , ( "пятнадцати", 15) + , ( "шестнадцати", 16) + , ( "семнадцати", 17) + , ( "восемнадцати", 18) + , ( "девятнадцати", 19) + ] + ruleInteger4 :: Rule ruleInteger4 = Rule { name = "integer (3..19)" @@ -244,6 +265,18 @@ ruleInteger4 = Rule _ -> Nothing } +ruleInteger4Genitive :: Rule +ruleInteger4Genitive = Rule + { name = "integer (3..19)" + , pattern = + [ regex "(трех|четырнадцати|четырех|пятнадцати|пяти|шестнадцати|шести|семнадцати|семи|восемнадцати|восьми|девятнадцати|девяти|десяти|одиннадцати|двенадцати|тринадцати)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) threeToNineteenMapGenitive >>= integer + _ -> Nothing + } + ruleInteger2 :: Rule ruleInteger2 = Rule { name = "integer 1" @@ -287,6 +320,7 @@ rules = , ruleInteger2 , ruleInteger3 , ruleInteger4 + , ruleInteger4Genitive , ruleInteger5 , ruleInteger6 , ruleInteger7 diff --git a/Duckling/Ordinal/RU/Rules.hs b/Duckling/Ordinal/RU/Rules.hs index ac429dbb..3a4f846d 100644 --- a/Duckling/Ordinal/RU/Rules.hs +++ b/Duckling/Ordinal/RU/Rules.hs @@ -37,7 +37,7 @@ ordinalsFirstthMap = HashMap.fromList , ( "восьм", 8 ) , ( "девят", 9 ) , ( "десят", 10 ) - , ( "одинадцат", 11 ) + , ( "одиннадцат", 11 ) , ( "двенадцат", 12 ) , ( "тринадцат", 13 ) , ( "четырнадцат", 14 ) @@ -65,7 +65,7 @@ ruleOrdinalsFirstth :: Rule ruleOrdinalsFirstth = Rule { name = "ordinals (first..19th)" , pattern = - [ regex "(перв|втор|трет|четверт|пят|шест|седьм|восьм|девят|десят|одинадцат|двенадцат|тринадцат|четырнадцат|пятнадцат|шестнадцат|семнадцат|восемнадцат|девятнадцат|двадцат)(ье(го|й)?|ого|ый|ой|ий|ая|ое|ья)" + [ regex "(перв|втор|трет|четверт|пят|шест|седьм|восьм|девят|десят|одиннадцат|двенадцат|тринадцат|четырнадцат|пятнадцат|шестнадцат|семнадцат|восемнадцат|девятнадцат|двадцат)(ье(го|й)?|ого|ый|ой|ий|ая|ое|ья)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) ->