From 888da76215f6e05a9bdef3ad06887b0b1ed14276 Mon Sep 17 00:00:00 2001 From: Steven Troxler Date: Fri, 23 Apr 2021 09:35:59 -0700 Subject: [PATCH] Numeral/ES: Add support for 1M, and multiples of 1K/1M Summary: This commit adds two things to Spanish numeral support: - support for millions - support, via hooking into the `isMultipliable` logic used by EN, for composing counts of 2-999 with either "mil" or "millones", which is the standard way to say things like "tres mil" = 3000 Reviewed By: chessai Differential Revision: D27858135 fbshipit-source-id: 980e95bd989f818c5ceaa2bb6c87fe81d3e08366 --- CHANGELOG.md | 1 + Duckling/Numeral/ES/Corpus.hs | 11 +++++--- Duckling/Numeral/ES/Rules.hs | 36 ++++++++++++++++++++++++++- Duckling/Ranking/Classifiers/ES_AR.hs | 17 ++++++++----- Duckling/Ranking/Classifiers/ES_CL.hs | 17 ++++++++----- Duckling/Ranking/Classifiers/ES_CO.hs | 17 ++++++++----- Duckling/Ranking/Classifiers/ES_ES.hs | 17 ++++++++----- Duckling/Ranking/Classifiers/ES_MX.hs | 17 ++++++++----- Duckling/Ranking/Classifiers/ES_PE.hs | 17 ++++++++----- Duckling/Ranking/Classifiers/ES_VE.hs | 17 ++++++++----- Duckling/Ranking/Classifiers/ES_XX.hs | 17 ++++++++----- 11 files changed, 131 insertions(+), 53 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dcfb2cf..ffb272ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * ES (Spanish) * Numeral: Fix parsing for hundreds phrases like `doscientos tres` + * Numeral: Add basic support for thousands and millions * RU (Russian) * Duration: Diminutives for minutes and hours diff --git a/Duckling/Numeral/ES/Corpus.hs b/Duckling/Numeral/ES/Corpus.hs index 7b8d7dc3..fa47b1d5 100644 --- a/Duckling/Numeral/ES/Corpus.hs +++ b/Duckling/Numeral/ES/Corpus.hs @@ -6,6 +6,7 @@ {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NumericUnderscores #-} module Duckling.Numeral.ES.Corpus (corpus) where import Data.String @@ -35,12 +36,12 @@ allExamples = , examples (NumeralValue 78) ["Setenta y ocho"] , examples (NumeralValue 80) ["ochenta"] , examples (NumeralValue 33) ["33", "treinta y tres", "treinta y 3"] - , examples (NumeralValue 100000) ["100000", "100K", "100k"] + , examples (NumeralValue 100_000) ["100000", "100K", "100k"] , examples (NumeralValue 300) ["trescientos"] , examples (NumeralValue 243) ["243"] - , examples (NumeralValue 3000000) ["3M", "3000K", "3000000"] - , examples (NumeralValue 1200000) ["1200000", "1200K"] - , examples (NumeralValue (-1200000)) ["-1200000", "-1200K"] + , examples (NumeralValue 3_000_000) ["3M", "3000K", "3000000"] + , examples (NumeralValue 1_200_000) ["1200000", "1200K"] + , examples (NumeralValue (-1_200_000)) ["-1200000", "-1200K"] , examples (NumeralValue 1.5) ["1 punto cinco", "una punto cinco"] , examples (NumeralValue 1) ["cero uno", "zero uno"] , examples (NumeralValue 2) ["cero dos", "zero dos"] @@ -64,4 +65,6 @@ allExamples = , examples (NumeralValue 8) ["cero ocho", "zero ocho"] , examples (NumeralValue 9) ["cero nueve", "zero nueve"] , examples (NumeralValue 203) ["dos cientos tres", "doscientos tres"] + , examples (NumeralValue 203_000) ["doscientos tres mil", "203 mil"] + , examples (NumeralValue 4_000_000) ["cuatro millones", "4 millones"] ] diff --git a/Duckling/Numeral/ES/Rules.hs b/Duckling/Numeral/ES/Rules.hs index 36f6a74a..901c5155 100644 --- a/Duckling/Numeral/ES/Rules.hs +++ b/Duckling/Numeral/ES/Rules.hs @@ -183,6 +183,12 @@ bigNumbersMap = , ("ochocientos", 800) , ("novecientos", 900) , ("mil", 1000) + , ("millon", 1000000) + , ("millón", 1000000) + , ("un millon", 1000000) + , ("un millón", 1000000) + , ("millones", 1000000) + -- Note: billion and larger is ambiguous becaouse of long vs short scale ] ruleBigNumeral :: Rule @@ -190,7 +196,7 @@ ruleBigNumeral = Rule { name = "big number 100 to 1K" , pattern = [ regex - "(cien(to|tos)?|doscientos|trescientos|cuatrocientos|quinientos|seiscientos|setecientos|ochocientos|novecientos|mil)" + "(cien(to|tos)?|doscientos|trescientos|cuatrocientos|quinientos|seiscientos|setecientos|ochocientos|novecientos|(un )?mill(o|ó)n)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match : _)) : _) -> @@ -198,6 +204,19 @@ ruleBigNumeral = Rule _ -> Nothing } +ruleBigNumeralMultipliable :: Rule +ruleBigNumeralMultipliable = Rule + { name = "1K or 1M in multipliable form" + , pattern = + [ regex + "(mil(lones)?)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match : _)) : _) -> + HashMap.lookup (Text.toLower match) bigNumbersMap >>= integer >>= withMultipliable + _ -> Nothing + } + ruleTwoPartHundreds :: Rule ruleTwoPartHundreds = Rule { name = "2..9 cientos" @@ -224,6 +243,19 @@ ruleNumeralHundredsAndSmaller = Rule _ -> Nothing } +ruleNumeralMultiply :: Rule +ruleNumeralMultiply = Rule + { name = "2..999 " + , pattern = + [ numberBetween 2 1000 + , Predicate isMultipliable + ] + , prod = \tokens -> case tokens of + (Token Numeral NumeralData { TNumeral.value = v1 } : Token Numeral NumeralData { TNumeral.value = v2 } : _) -> + double $ v1 * v2 + _ -> Nothing + } + ruleNumeralDotNumeral :: Rule ruleNumeralDotNumeral = Rule { name = "number dot number" @@ -268,6 +300,8 @@ rules = , ruleNumeralTwentyToNinetyTens , ruleNumeralTwentyOneToNinetyNine , ruleBigNumeral + , ruleBigNumeralMultipliable + , ruleNumeralMultiply , ruleTwoPartHundreds , ruleNumeralHundredsAndSmaller , ruleNumeralDotNumeral diff --git a/Duckling/Ranking/Classifiers/ES_AR.hs b/Duckling/Ranking/Classifiers/ES_AR.hs index fec87c20..2c53902a 100644 --- a/Duckling/Ranking/Classifiers/ES_AR.hs +++ b/Duckling/Ranking/Classifiers/ES_AR.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData = diff --git a/Duckling/Ranking/Classifiers/ES_CL.hs b/Duckling/Ranking/Classifiers/ES_CL.hs index d2ab2718..b5f85fbf 100644 --- a/Duckling/Ranking/Classifiers/ES_CL.hs +++ b/Duckling/Ranking/Classifiers/ES_CL.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData = diff --git a/Duckling/Ranking/Classifiers/ES_CO.hs b/Duckling/Ranking/Classifiers/ES_CO.hs index d99e6512..5b748799 100644 --- a/Duckling/Ranking/Classifiers/ES_CO.hs +++ b/Duckling/Ranking/Classifiers/ES_CO.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData = diff --git a/Duckling/Ranking/Classifiers/ES_ES.hs b/Duckling/Ranking/Classifiers/ES_ES.hs index 39b601d0..631b629a 100644 --- a/Duckling/Ranking/Classifiers/ES_ES.hs +++ b/Duckling/Ranking/Classifiers/ES_ES.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData = diff --git a/Duckling/Ranking/Classifiers/ES_MX.hs b/Duckling/Ranking/Classifiers/ES_MX.hs index d3ce3632..57b380a7 100644 --- a/Duckling/Ranking/Classifiers/ES_MX.hs +++ b/Duckling/Ranking/Classifiers/ES_MX.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData = diff --git a/Duckling/Ranking/Classifiers/ES_PE.hs b/Duckling/Ranking/Classifiers/ES_PE.hs index b56583ae..a4f32d95 100644 --- a/Duckling/Ranking/Classifiers/ES_PE.hs +++ b/Duckling/Ranking/Classifiers/ES_PE.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData = diff --git a/Duckling/Ranking/Classifiers/ES_VE.hs b/Duckling/Ranking/Classifiers/ES_VE.hs index 97ea8ea3..a7e5929d 100644 --- a/Duckling/Ranking/Classifiers/ES_VE.hs +++ b/Duckling/Ranking/Classifiers/ES_VE.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData = diff --git a/Duckling/Ranking/Classifiers/ES_XX.hs b/Duckling/Ranking/Classifiers/ES_XX.hs index b61b14c2..10e20591 100644 --- a/Duckling/Ranking/Classifiers/ES_XX.hs +++ b/Duckling/Ranking/Classifiers/ES_XX.hs @@ -242,13 +242,11 @@ classifiers n = 1}}), ("big number 100 to 1K", Classifier{okData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, + ClassData{prior = 0.0, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [("", 0.0)], n = 2}, koData = - ClassData{prior = -0.6931471805599453, - unseen = -1.3862943611198906, - likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("del mediod\237a", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -786,6 +784,13 @@ classifiers koData = ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList [("", 0.0)], n = 6}}), + ("1K or 1M in multipliable form", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), ("afternoon", Classifier{okData = ClassData{prior = -0.2876820724517809, @@ -848,7 +853,7 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.40546510810816444), - ("big number 100 to 1K", -1.0986122886681098)], + ("1K or 1M in multipliable form", -1.0986122886681098)], n = 7}}), ("en ", Classifier{okData =