From 9509e042dc68aeff7b5f80e1a38a4398bdbe4aaa Mon Sep 17 00:00:00 2001 From: andhai Date: Thu, 14 Jul 2022 16:47:23 -0700 Subject: [PATCH] DE-Numeral-complex-German-numerals (#699) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: The commit adds a rule and an underlying parser for German numeral expressions representing (integer) numbers smaller than 1 million. Other than in English, those numbers are represented by single words, e.g. "neunhundertsiebenundachtzigtausendsechshundertvierundfünfzig" (987654). Other rukes are simplified or removed to eliminate redundancies. Pull Request resolved: https://github.com/facebook/duckling/pull/699 Reviewed By: patapizza Differential Revision: D37716120 Pulled By: stroxler fbshipit-source-id: 90b26e253259c5bc1aaa76f3972537c2361f6bb3 --- Duckling/Numeral/DE/Corpus.hs | 32 ++++-- Duckling/Numeral/DE/NumParser.hs | 160 ++++++++++++++++++++++++++ Duckling/Numeral/DE/Rules.hs | 137 +++------------------- Duckling/Ranking/Classifiers/DE_XX.hs | 132 ++++++++++++--------- Duckling/Ranking/Classifiers/IT_XX.hs | 4 +- duckling.cabal | 1 + 6 files changed, 283 insertions(+), 183 deletions(-) create mode 100644 Duckling/Numeral/DE/NumParser.hs diff --git a/Duckling/Numeral/DE/Corpus.hs b/Duckling/Numeral/DE/Corpus.hs index 4149ccd5..69b03132 100644 --- a/Duckling/Numeral/DE/Corpus.hs +++ b/Duckling/Numeral/DE/Corpus.hs @@ -33,6 +33,8 @@ allExamples = concat , examples (NumeralValue 1) [ "1" , "eins" + , "Eine" + , "einen" ] , examples (NumeralValue 3) [ "3" @@ -46,11 +48,12 @@ allExamples = concat ] , examples (NumeralValue 30) [ "30" + , "dreißig" , "dreissig" ] , examples (NumeralValue 33) [ "33" - , "drei Und dreissig" + , "dreiunddreißig" , "dreiunddreissig" , "0033" ] @@ -76,11 +79,11 @@ allExamples = concat ] , examples (NumeralValue 200) [ "200" - , "zwei hundert" + , "zweihundert" ] , examples (NumeralValue 102) [ "102" - , "Hundert zwei" + , "Hundertzwei" ] , examples (NumeralValue 1.1) [ "1,1" @@ -97,12 +100,15 @@ allExamples = concat , "100000" , "100K" , "100k" + , "einhunderttausend" + , "hunderttausend" ] , examples (NumeralValue 3000000) [ "3M" , "3000K" , "3000000" , "3.000.000" + , "drei Millionen" ] , examples (NumeralValue 1200000) [ "1.200.000" @@ -120,30 +126,34 @@ allExamples = concat , "-1200K" , "-,0012G" ] + , examples (NumeralValue 1852) + [ "eintausendachthundertzweiundfünfzig" + , "tausendachthundertzweiundfünfzig" + , "achtzehnhundertzweiundfünfzig"] , examples (NumeralValue 5000) [ "5 tausend" - , "fünf tausend" + , "fünftausend" ] , examples (NumeralValue 200000) - [ "zwei hundert tausend" + [ "zweihunderttausend" ] , examples (NumeralValue 721012) - [ "sieben hundert einundzwanzig tausend zwölf" + [ "siebenhunderteinundzwanzigtausendzwölf" + , "siebenhunderteinundzwanzigtausendundzwölf" ] , examples (NumeralValue 31256721) - [ "ein und dreissig millionen zwei hundert sechs und fünfzig tausend sieben hundert ein und zwanzig" + [ "einunddreissig millionen zweihundertsechsundfünfzigtausendsiebenhunderteinundzwanzig" + , "einunddreißig Millionen zweihundertsechsundfünfzigtausendundsiebenhunderteinundzwanzig" ] , examples (NumeralValue 1416.15) [ "1416,15" - ] - , examples (NumeralValue 1416.15) - [ "1.416,15" + , "tausendvierhundertsechzehn Komma fünfzehn" ] , examples (NumeralValue 1000000.0) [ "1.000.000,00", "eine million" ] , examples (NumeralValue 2771090092000000.0) - [ "zwei billiarden sieben hundert ein und siebzig billionen neunzig milliarden zwei und neunzig millionen" + [ "zwei billiarden siebenhunderteinundsiebzig billionen neunzig milliarden zweiundneunzig millionen" ] ] diff --git a/Duckling/Numeral/DE/NumParser.hs b/Duckling/Numeral/DE/NumParser.hs new file mode 100644 index 00000000..0c4141cf --- /dev/null +++ b/Duckling/Numeral/DE/NumParser.hs @@ -0,0 +1,160 @@ +{-# LANGUAGE DeriveFunctor #-} + +module Duckling.Numeral.DE.NumParser (parseNumeral) where + +import Prelude +import Control.Applicative +import Data.Char +import Data.List +import Data.Foldable +import Data.String + +newtype Parser a + = Parser { runParser :: String -> Maybe (a, String) } + deriving Functor + +char :: Char -> Parser Char +char c = Parser p + where + p [] = Nothing + p (x:xs) + | x == c = Just (x, xs) + | otherwise = Nothing + +instance Applicative Parser where + pure a = Parser (\s -> Just (a, s)) + (Parser fp) <*> xp = Parser $ \s -> + case fp s of + Nothing -> Nothing + Just (f,s') -> runParser (f <$> xp) s' + +instance Alternative Parser where + empty = Parser (const Nothing) + Parser p1 <|> Parser p2 = Parser $ liftA2 (<|>) p1 p2 + +type NumParser = Parser Integer + +(.+.) :: NumParser -> NumParser -> NumParser +p .+. p' = (+) <$> p <*> p' + +(.*.) :: NumParser -> NumParser -> NumParser +p .*. p' = (*) <$> p <*> p' + +infixl 6 .+. +infixl 7 .*. + +opt :: NumParser -> NumParser +opt p = p <|> Parser p' + where + p' s = Just (0, s) + +data NumItem = NumItem { base :: NumParser + , plus10 :: NumParser + , times10 :: [NumParser] + } + +defaultNumItem :: Integer -> String -> NumItem +defaultNumItem value form = NumItem { base = p + , plus10 = p .+. ten + , times10 = [p .*. ty] + } where p = assign value form + +type Assignment = Integer -> String -> NumParser + +assign :: Assignment +assign value = foldr (\c p -> (1 <$ char c) .*. p) (pure value) + +ten :: NumParser +ten = assign 10 "zehn" + +ty :: NumParser +ty = assign 10 "zig" + +hundred :: NumParser +hundred = assign 100 "hundert" + +thousand :: NumParser +thousand = assign 1000 "tausend" + +und :: NumParser +und = assign 0 "und" + +one :: NumItem +one = (defaultNumItem 1 "ein") { plus10 = assign 11 "elf" + , times10 = [ ten ] } + +two :: NumItem +two = (defaultNumItem 2 "zwei") { plus10 = assign 12 "zwölf" + , times10 = [ assign 20 "zwanzig" ] } +three :: NumItem +three = (defaultNumItem 3 "drei") { times10 = [ assign 30 "dreißig" + , assign 30 "dreissig" ] } + +four :: NumItem +four = defaultNumItem 4 "vier" + +five :: NumItem +five = defaultNumItem 5 "fünf" + +six :: NumItem +six = (defaultNumItem 6 "sechs") { plus10 = assign 16 "sechzehn" + , times10 = [ assign 60 "sechzig" ] } + +seven :: NumItem +seven = (defaultNumItem 7 "sieben") { plus10 = assign 17 "siebzehn" + , times10 = [ assign 70 "siebzig" ] } + +eight :: NumItem +eight = defaultNumItem 8 "acht" + +nine :: NumItem +nine = defaultNumItem 9 "neun" + +digitLexicon :: [NumItem] +digitLexicon = [one, two, three, four, five, six, seven, eight, nine] + +from1to9 :: NumParser +from1to9 = foldr ((<|>) . base) empty digitLexicon + +tensFrom20 :: NumParser +tensFrom20 = asum (concatMap times10 (tail digitLexicon)) + +from1to99 :: NumParser +from1to99 = opt (from1to9 .+. und) .+. tensFrom20 + <|> foldr ((<|>) . plus10) empty digitLexicon + <|> ten + <|> from1to9 + +from1to999 :: NumParser +from1to999 = opt (from1to9 .*. hundred .+. opt und) .+. opt from1to99 + +from1to999999 :: NumParser +from1to999999 = opt (from1to999 .*. thousand .+. opt und) .+. opt from1to999 + +from1to999999' :: NumParser +from1to999999' = Parser p + where + p s + | isPrefixOf "hundert" s || isPrefixOf "tausend" s + = runParser from1to999999 ("ein" ++ s) + | otherwise + = runParser from1to999999 s + +fromYear1100to1999 :: NumParser +fromYear1100to1999 = asum ((\n -> plus10 n .*. hundred) <$> digitLexicon) + .+. opt (opt und .+. from1to99) + +allNumerals :: NumParser +allNumerals = fromYear1100to1999 + <|> from1to999999' + +removeInflection :: (Integer, String) -> Maybe Integer +removeInflection (n, suffix) + | n `mod` 10 == 1 && suffix `elem` inflection = Just n + where + inflection = ["s", "e", "em", "en", "er", "es"] +removeInflection (n, "") = Just n +removeInflection _ = Nothing + +parseNumeral :: String -> Maybe Integer +parseNumeral s = removeInflection =<< runParser allNumerals s diff --git a/Duckling/Numeral/DE/Rules.hs b/Duckling/Numeral/DE/Rules.hs index 93581e76..79553595 100644 --- a/Duckling/Numeral/DE/Rules.hs +++ b/Duckling/Numeral/DE/Rules.hs @@ -27,6 +27,7 @@ import Duckling.Numeral.Types (NumeralData (..)) import Duckling.Regex.Types import Duckling.Types import qualified Duckling.Numeral.Types as TNumeral +import Duckling.Numeral.DE.NumParser ruleNumeralsPrefixWithNegativeOrMinus :: Rule ruleNumeralsPrefixWithNegativeOrMinus = Rule @@ -50,15 +51,6 @@ ruleFew = Rule , prod = \_ -> integer 3 } -ruleTen :: Rule -ruleTen = Rule - { name = "ten" - , pattern = - [ regex "zehn" - ] - , prod = \_ -> integer 10 >>= withGrain 1 - } - ruleDecimalWithThousandsSeparator :: Rule ruleDecimalWithThousandsSeparator = Rule { name = "decimal with thousands separator" @@ -84,41 +76,6 @@ ruleDecimalNumeral = Rule _ -> Nothing } --- TODO: Single-word composition (#110) -ruleInteger3 :: Rule -ruleInteger3 = Rule - { name = "integer ([2-9][1-9])" - , pattern = - [ regex "(ein|zwei|drei|vier|fünf|sechs|sieben|acht|neun)und(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)" - ] - , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do - v1 <- case Text.toLower m1 of - "ein" -> Just 1 - "zwei" -> Just 2 - "drei" -> Just 3 - "vier" -> Just 4 - "fünf" -> Just 5 - "sechs" -> Just 6 - "sieben" -> Just 7 - "acht" -> Just 8 - "neun" -> Just 9 - _ -> Nothing - v2 <- case Text.toLower m2 of - "zwanzig" -> Just 20 - "dreissig" -> Just 30 - "dreißig" -> Just 30 - "vierzig" -> Just 40 - "fünfzig" -> Just 50 - "sechzig" -> Just 60 - "siebzig" -> Just 70 - "achtzig" -> Just 80 - "neunzig" -> Just 90 - _ -> Nothing - integer $ v1 + v2 - _ -> Nothing - } - ruleNumeralsUnd :: Rule ruleNumeralsUnd = Rule { name = "numbers und" @@ -221,78 +178,13 @@ rulePowersOfTen = Rule _ -> Nothing } -zeroNineteenMap :: HashMap Text Integer -zeroNineteenMap = HashMap.fromList - [ ("keine", 0) - , ("null", 0) - , ("nichts", 0) - , ("keiner", 0) - , ("kein", 0) - , ("keins", 0) - , ("keinen", 0) - , ("keines", 0) - , ("einer", 1) - , ("eins", 1) - , ("ein", 1) - , ("eine", 1) - , ("einser", 1) - , ("zwei", 2) - , ("drei", 3) - , ("vier", 4) - , ("fünf", 5) - , ("sechs", 6) - , ("sieben", 7) - , ("acht", 8) - , ("neun", 9) - , ("zehn", 10) - , ("elf", 11) - , ("zwölf", 12) - , ("dreizehn", 13) - , ("vierzehn", 14) - , ("fünfzehn", 15) - , ("sechzehn", 16) - , ("siebzehn", 17) - , ("achtzehn", 18) - , ("neunzehn", 19) - ] - --- TODO: Single-word composition (#110) -ruleZeroToNineteen :: Rule -ruleZeroToNineteen = Rule - { name = "integer (0..19)" +ruleZero :: Rule +ruleZero = Rule + { name = "integer 0" , pattern = - [ regex "(keine[rn]|keine?s?|null|nichts|eins?(er?)?|zwei|dreizehn|drei|vierzehn|vier|fünfzehn|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf)" + [ regex "(keine(m|n|r|s)?|keins?|null|nichts)" ] - , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)):_) -> - HashMap.lookup (Text.toLower match) zeroNineteenMap >>= integer - _ -> Nothing - } - -tensMap :: HashMap Text Integer -tensMap = HashMap.fromList - [ ( "zwanzig" , 20 ) - , ( "dreissig", 30 ) - , ( "dreißig" , 30 ) - , ( "vierzig" , 40 ) - , ( "fünfzig" , 50 ) - , ( "sechzig" , 60 ) - , ( "siebzig" , 70 ) - , ( "achtzig" , 80 ) - , ( "neunzig" , 90 ) - ] - --- TODO: Single-word composition (#110) -ruleInteger2 :: Rule -ruleInteger2 = Rule - { name = "integer (20..90)" - , pattern = - [ regex "(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)" - ] - , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)):_) -> - HashMap.lookup (Text.toLower match) tensMap >>= integer - _ -> Nothing + , prod = \_ -> integer 0 } ruleNumeralDotNumeral :: Rule @@ -323,6 +215,17 @@ ruleIntegerWithThousandsSeparator = Rule _ -> Nothing } +ruleAllNumeralWords :: Rule +ruleAllNumeralWords = Rule + { name = "simple and complex numerals written as one word" + , pattern = [regex "(ein|zwei|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch matches) : _) -> + (parseNumeral $ concat $ Text.unpack . Text.toLower <$> matches) + >>= integer + _ -> Nothing + } + rules :: [Rule] rules = [ ruleCouple @@ -330,8 +233,6 @@ rules = , ruleDecimalWithThousandsSeparator , ruleDozen , ruleFew - , ruleInteger2 - , ruleInteger3 , ruleIntegerWithThousandsSeparator , ruleIntersect , ruleMultiply @@ -340,6 +241,6 @@ rules = , ruleNumeralsSuffixesKMG , ruleNumeralsUnd , rulePowersOfTen - , ruleTen - , ruleZeroToNineteen + , ruleZero + , ruleAllNumeralWords ] diff --git a/Duckling/Ranking/Classifiers/DE_XX.hs b/Duckling/Ranking/Classifiers/DE_XX.hs index 89c9bfcb..f5997806 100644 --- a/Duckling/Ranking/Classifiers/DE_XX.hs +++ b/Duckling/Ranking/Classifiers/DE_XX.hs @@ -99,13 +99,6 @@ classifiers koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), - ("integer (20..90)", - Classifier{okData = - ClassData{prior = 0.0, unseen = -1.0986122886681098, - likelihoods = HashMap.fromList [("", 0.0)], n = 1}, - koData = - ClassData{prior = -infinity, unseen = -0.6931471805599453, - likelihoods = HashMap.fromList [], n = 0}}), ("Maha Shivaratri", Classifier{okData = ClassData{prior = 0.0, unseen = -1.0986122886681098, @@ -346,14 +339,6 @@ classifiers (" (ordinal)", -2.8622008809294686), ("hour", -2.8622008809294686), ("minute", -2.169053700369523)], n = 8}}), - ("integer (0..19)", - Classifier{okData = - ClassData{prior = -0.10008345855698253, - unseen = -3.6888794541139363, - likelihoods = HashMap.fromList [("", 0.0)], n = 38}, - koData = - ClassData{prior = -2.3513752571634776, unseen = -1.791759469228055, - likelihoods = HashMap.fromList [("", 0.0)], n = 4}}), ("between and (interval)", Classifier{okData = ClassData{prior = 0.0, unseen = -2.1972245773362196, @@ -566,7 +551,8 @@ classifiers unseen = -1.6094379124341003, likelihoods = HashMap.fromList - [("integer (0..19)hour (grain)", -0.6931471805599453), + [("simple and complex numerals written as one wordhour (grain)", + -0.6931471805599453), ("hour", -0.6931471805599453)], n = 1}, koData = @@ -574,9 +560,18 @@ classifiers unseen = -2.1972245773362196, likelihoods = HashMap.fromList - [("integer (0..19)hour (grain)", -0.6931471805599453), + [("simple and complex numerals written as one wordhour (grain)", + -0.6931471805599453), ("hour", -0.6931471805599453)], n = 3}}), + ("simple and complex numerals written as one word", + Classifier{okData = + ClassData{prior = -9.763846956391606e-2, + unseen = -3.713572066704308, + likelihoods = HashMap.fromList [("", 0.0)], n = 39}, + koData = + ClassData{prior = -2.374905754573672, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}}), ("intersect", Classifier{okData = ClassData{prior = -0.12020308869575518, @@ -1617,7 +1612,8 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -6.995858860691034e-2), - ("integer (0..19)", -2.6946271807700692)], + ("simple and complex numerals written as one word", + -2.6946271807700692)], n = 72}, koData = ClassData{prior = -1.5664205273504095, @@ -1625,7 +1621,8 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.2113090936672069), - ("integer (0..19)", -1.6582280766035324)], + ("simple and complex numerals written as one word", + -1.6582280766035324)], n = 19}}), ("year", Classifier{okData = @@ -1656,47 +1653,62 @@ classifiers likelihoods = HashMap.fromList [("week", -1.9339339580085977), - ("integer (0..19)year (grain)", -3.3202283191284883), + ("simple and complex numerals written as one wordhour (grain)", + -3.3202283191284883), ("integer (numeric)day (grain)", -2.8094026953624978), ("couplehour (grain)", -3.7256934272366524), - ("integer (0..19)hour (grain)", -3.3202283191284883), + ("simple and complex numerals written as one wordyear (grain)", + -3.3202283191284883), ("second", -3.7256934272366524), ("integer (numeric)year (grain)", -3.7256934272366524), - ("day", -2.8094026953624978), ("year", -3.0325462466767075), + ("day", -2.8094026953624978), + ("simple and complex numerals written as one wordmonth (grain)", + -3.7256934272366524), + ("year", -3.0325462466767075), + ("simple and complex numerals written as one wordsecond (grain)", + -3.7256934272366524), ("integer (numeric)week (grain)", -2.627081138568543), - ("integer (0..19)month (grain)", -3.7256934272366524), - ("integer (0..19)second (grain)", -3.7256934272366524), ("hour", -2.8094026953624978), ("month", -3.7256934272366524), ("integer (numeric)minute (grain)", -2.627081138568543), - ("integer (0..19)minute (grain)", -3.7256934272366524), + ("simple and complex numerals written as one wordweek (grain)", + -2.472930458741285), ("minute", -2.472930458741285), ("integer (numeric)hour (grain)", -3.7256934272366524), - ("integer (0..19)week (grain)", -2.472930458741285)], + ("simple and complex numerals written as one wordminute (grain)", + -3.7256934272366524)], n = 30}, koData = ClassData{prior = -0.5937747067467416, unseen = -4.584967478670572, likelihoods = HashMap.fromList [("week", -2.62880082944807), - ("integer (0..19)year (grain)", -2.9652730660692823), + ("simple and complex numerals written as one wordhour (grain)", + -3.8815637979434374), ("integer (numeric)day (grain)", -3.4760986898352733), - ("integer (0..19)hour (grain)", -3.8815637979434374), + ("simple and complex numerals written as one wordyear (grain)", + -2.9652730660692823), ("second", -2.9652730660692823), ("integer (numeric)second (grain)", -3.4760986898352733), ("integer (numeric)year (grain)", -3.4760986898352733), - ("day", -2.3774864011671633), ("year", -2.62880082944807), + ("day", -2.3774864011671633), + ("simple and complex numerals written as one wordmonth (grain)", + -2.9652730660692823), + ("year", -2.62880082944807), + ("simple and complex numerals written as one wordsecond (grain)", + -3.4760986898352733), ("integer (numeric)week (grain)", -3.188416617383492), - ("integer (0..19)month (grain)", -2.9652730660692823), - ("integer (0..19)second (grain)", -3.4760986898352733), ("hour", -3.188416617383492), ("month", -2.62880082944807), ("integer (numeric)minute (grain)", -3.4760986898352733), - ("integer (0..19)minute (grain)", -3.4760986898352733), + ("simple and complex numerals written as one wordday (grain)", + -2.9652730660692823), + ("simple and complex numerals written as one wordweek (grain)", + -3.188416617383492), ("integer (numeric)month (grain)", -3.4760986898352733), ("minute", -2.9652730660692823), ("coupleday (grain)", -3.4760986898352733), ("integer (numeric)hour (grain)", -3.4760986898352733), - ("integer (0..19)day (grain)", -2.9652730660692823), - ("integer (0..19)week (grain)", -3.188416617383492)], + ("simple and complex numerals written as one wordminute (grain)", + -3.4760986898352733)], n = 37}}), ("Dienstag", Classifier{okData = @@ -1754,9 +1766,10 @@ classifiers ClassData{prior = 0.0, unseen = -2.0794415416798357, likelihoods = HashMap.fromList - [("hour", -0.8472978603872037), - ("integer (numeric)time-of-day (latent)", -1.252762968495368), - ("integer (20..90)time-of-day (latent)", -1.252762968495368)], + [("simple and complex numerals written as one wordtime-of-day (latent)", + -1.252762968495368), + ("hour", -0.8472978603872037), + ("integer (numeric)time-of-day (latent)", -1.252762968495368)], n = 2}, koData = ClassData{prior = -infinity, unseen = -1.3862943611198906, @@ -2242,25 +2255,33 @@ classifiers likelihoods = HashMap.fromList [("week", -2.740840023925201), - ("integer (0..19)year (grain)", -3.028522096376982), + ("simple and complex numerals written as one wordhour (grain)", + -3.4339872044851463), ("integer (numeric)day (grain)", -3.4339872044851463), - ("integer (0..19)hour (grain)", -3.4339872044851463), + ("simple and complex numerals written as one wordyear (grain)", + -3.028522096376982), ("second", -3.028522096376982), ("integer (numeric)second (grain)", -3.4339872044851463), ("integer (numeric)year (grain)", -3.4339872044851463), - ("day", -2.3353749158170367), ("year", -2.740840023925201), + ("day", -2.3353749158170367), + ("simple and complex numerals written as one wordmonth (grain)", + -3.028522096376982), + ("year", -2.740840023925201), + ("simple and complex numerals written as one wordsecond (grain)", + -3.4339872044851463), ("integer (numeric)week (grain)", -3.4339872044851463), - ("integer (0..19)month (grain)", -3.028522096376982), - ("integer (0..19)second (grain)", -3.4339872044851463), ("hour", -3.028522096376982), ("month", -2.740840023925201), ("integer (numeric)minute (grain)", -3.4339872044851463), - ("integer (0..19)minute (grain)", -3.4339872044851463), + ("simple and complex numerals written as one wordday (grain)", + -3.028522096376982), + ("simple and complex numerals written as one wordweek (grain)", + -3.028522096376982), ("integer (numeric)month (grain)", -3.4339872044851463), ("minute", -3.028522096376982), ("coupleday (grain)", -3.028522096376982), ("integer (numeric)hour (grain)", -3.4339872044851463), - ("integer (0..19)day (grain)", -3.028522096376982), - ("integer (0..19)week (grain)", -3.028522096376982)], + ("simple and complex numerals written as one wordminute (grain)", + -3.4339872044851463)], n = 20}, koData = ClassData{prior = -infinity, unseen = -3.1354942159291497, @@ -2437,22 +2458,29 @@ classifiers likelihoods = HashMap.fromList [("week", -2.5257286443082556), - ("integer (0..19)year (grain)", -2.8134107167600364), ("integer (numeric)day (grain)", -3.2188758248682006), + ("simple and complex numerals written as one wordyear (grain)", + -2.8134107167600364), ("second", -2.8134107167600364), ("integer (numeric)second (grain)", -3.2188758248682006), ("integer (numeric)year (grain)", -3.2188758248682006), - ("day", -2.5257286443082556), ("year", -2.5257286443082556), + ("day", -2.5257286443082556), + ("simple and complex numerals written as one wordmonth (grain)", + -2.8134107167600364), + ("year", -2.5257286443082556), + ("simple and complex numerals written as one wordsecond (grain)", + -3.2188758248682006), ("integer (numeric)week (grain)", -2.8134107167600364), - ("integer (0..19)month (grain)", -2.8134107167600364), - ("integer (0..19)second (grain)", -3.2188758248682006), ("month", -2.5257286443082556), ("integer (numeric)minute (grain)", -3.2188758248682006), - ("integer (0..19)minute (grain)", -3.2188758248682006), + ("simple and complex numerals written as one wordday (grain)", + -2.8134107167600364), + ("simple and complex numerals written as one wordweek (grain)", + -3.2188758248682006), ("integer (numeric)month (grain)", -3.2188758248682006), ("minute", -2.8134107167600364), - ("integer (0..19)day (grain)", -2.8134107167600364), - ("integer (0..19)week (grain)", -3.2188758248682006)], + ("simple and complex numerals written as one wordminute (grain)", + -3.2188758248682006)], n = 16}, koData = ClassData{prior = -infinity, unseen = -2.9444389791664407, diff --git a/Duckling/Ranking/Classifiers/IT_XX.hs b/Duckling/Ranking/Classifiers/IT_XX.hs index b9981cbb..a3408b08 100644 --- a/Duckling/Ranking/Classifiers/IT_XX.hs +++ b/Duckling/Ranking/Classifiers/IT_XX.hs @@ -327,10 +327,10 @@ classifiers unseen = -4.31748811353631, likelihoods = HashMap.fromList - [(" (latent time-of-day)", -0.9718605830289658), + [(" (latent time-of-day)", -0.9718605830289657), ("intersect by \"di\", \"della\", \"del\"", -3.20545280453606), ("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243), - ("hour", -0.9718605830289658), + ("hour", -0.9718605830289657), ("two time tokens separated by `di`", -3.20545280453606), ("Domenica", -3.6109179126442243)], n = 33}}), diff --git a/duckling.cabal b/duckling.cabal index 06e9e690..ca1f25b3 100644 --- a/duckling.cabal +++ b/duckling.cabal @@ -434,6 +434,7 @@ library , Duckling.Numeral.DA.Rules , Duckling.Numeral.DE.Corpus , Duckling.Numeral.DE.Rules + , Duckling.Numeral.DE.NumParser , Duckling.Numeral.EL.Corpus , Duckling.Numeral.EL.Rules , Duckling.Numeral.EN.Corpus