DE-Numeral-complex-German-numerals (#699)

Summary:
The commit adds a rule and an underlying parser for German numeral expressions representing (integer) numbers smaller than 1 million. Other than in English, those numbers are represented by single words, e.g. "neunhundertsiebenundachtzigtausendsechshundertvierundfünfzig" (987654). Other rukes are simplified or removed to eliminate redundancies.

Pull Request resolved: https://github.com/facebook/duckling/pull/699

Reviewed By: patapizza

Differential Revision: D37716120

Pulled By: stroxler

fbshipit-source-id: 90b26e253259c5bc1aaa76f3972537c2361f6bb3
This commit is contained in:
andhai 2022-07-14 16:47:23 -07:00 committed by Facebook GitHub Bot
parent 1faab00741
commit 9509e042dc
6 changed files with 283 additions and 183 deletions

View File

@ -33,6 +33,8 @@ allExamples = concat
, examples (NumeralValue 1) , examples (NumeralValue 1)
[ "1" [ "1"
, "eins" , "eins"
, "Eine"
, "einen"
] ]
, examples (NumeralValue 3) , examples (NumeralValue 3)
[ "3" [ "3"
@ -46,11 +48,12 @@ allExamples = concat
] ]
, examples (NumeralValue 30) , examples (NumeralValue 30)
[ "30" [ "30"
, "dreißig"
, "dreissig" , "dreissig"
] ]
, examples (NumeralValue 33) , examples (NumeralValue 33)
[ "33" [ "33"
, "drei Und dreissig" , "dreiunddreißig"
, "dreiunddreissig" , "dreiunddreissig"
, "0033" , "0033"
] ]
@ -76,11 +79,11 @@ allExamples = concat
] ]
, examples (NumeralValue 200) , examples (NumeralValue 200)
[ "200" [ "200"
, "zwei hundert" , "zweihundert"
] ]
, examples (NumeralValue 102) , examples (NumeralValue 102)
[ "102" [ "102"
, "Hundert zwei" , "Hundertzwei"
] ]
, examples (NumeralValue 1.1) , examples (NumeralValue 1.1)
[ "1,1" [ "1,1"
@ -97,12 +100,15 @@ allExamples = concat
, "100000" , "100000"
, "100K" , "100K"
, "100k" , "100k"
, "einhunderttausend"
, "hunderttausend"
] ]
, examples (NumeralValue 3000000) , examples (NumeralValue 3000000)
[ "3M" [ "3M"
, "3000K" , "3000K"
, "3000000" , "3000000"
, "3.000.000" , "3.000.000"
, "drei Millionen"
] ]
, examples (NumeralValue 1200000) , examples (NumeralValue 1200000)
[ "1.200.000" [ "1.200.000"
@ -120,30 +126,34 @@ allExamples = concat
, "-1200K" , "-1200K"
, "-,0012G" , "-,0012G"
] ]
, examples (NumeralValue 1852)
[ "eintausendachthundertzweiundfünfzig"
, "tausendachthundertzweiundfünfzig"
, "achtzehnhundertzweiundfünfzig"]
, examples (NumeralValue 5000) , examples (NumeralValue 5000)
[ "5 tausend" [ "5 tausend"
, "fünf tausend" , "fünftausend"
] ]
, examples (NumeralValue 200000) , examples (NumeralValue 200000)
[ "zwei hundert tausend" [ "zweihunderttausend"
] ]
, examples (NumeralValue 721012) , examples (NumeralValue 721012)
[ "sieben hundert einundzwanzig tausend zwölf" [ "siebenhunderteinundzwanzigtausendzwölf"
, "siebenhunderteinundzwanzigtausendundzwölf"
] ]
, examples (NumeralValue 31256721) , examples (NumeralValue 31256721)
[ "ein und dreissig millionen zwei hundert sechs und fünfzig tausend sieben hundert ein und zwanzig" [ "einunddreissig millionen zweihundertsechsundfünfzigtausendsiebenhunderteinundzwanzig"
, "einunddreißig Millionen zweihundertsechsundfünfzigtausendundsiebenhunderteinundzwanzig"
] ]
, examples (NumeralValue 1416.15) , examples (NumeralValue 1416.15)
[ "1416,15" [ "1416,15"
] , "tausendvierhundertsechzehn Komma fünfzehn"
, examples (NumeralValue 1416.15)
[ "1.416,15"
] ]
, examples (NumeralValue 1000000.0) , examples (NumeralValue 1000000.0)
[ "1.000.000,00", [ "1.000.000,00",
"eine million" "eine million"
] ]
, examples (NumeralValue 2771090092000000.0) , examples (NumeralValue 2771090092000000.0)
[ "zwei billiarden sieben hundert ein und siebzig billionen neunzig milliarden zwei und neunzig millionen" [ "zwei billiarden siebenhunderteinundsiebzig billionen neunzig milliarden zweiundneunzig millionen"
] ]
] ]

View File

@ -0,0 +1,160 @@
{-# LANGUAGE DeriveFunctor #-}
module Duckling.Numeral.DE.NumParser (parseNumeral) where
import Prelude
import Control.Applicative
import Data.Char
import Data.List
import Data.Foldable
import Data.String
newtype Parser a
= Parser { runParser :: String -> Maybe (a, String) }
deriving Functor
char :: Char -> Parser Char
char c = Parser p
where
p [] = Nothing
p (x:xs)
| x == c = Just (x, xs)
| otherwise = Nothing
instance Applicative Parser where
pure a = Parser (\s -> Just (a, s))
(Parser fp) <*> xp = Parser $ \s ->
case fp s of
Nothing -> Nothing
Just (f,s') -> runParser (f <$> xp) s'
instance Alternative Parser where
empty = Parser (const Nothing)
Parser p1 <|> Parser p2 = Parser $ liftA2 (<|>) p1 p2
type NumParser = Parser Integer
(.+.) :: NumParser -> NumParser -> NumParser
p .+. p' = (+) <$> p <*> p'
(.*.) :: NumParser -> NumParser -> NumParser
p .*. p' = (*) <$> p <*> p'
infixl 6 .+.
infixl 7 .*.
opt :: NumParser -> NumParser
opt p = p <|> Parser p'
where
p' s = Just (0, s)
data NumItem = NumItem { base :: NumParser
, plus10 :: NumParser
, times10 :: [NumParser]
}
defaultNumItem :: Integer -> String -> NumItem
defaultNumItem value form = NumItem { base = p
, plus10 = p .+. ten
, times10 = [p .*. ty]
} where p = assign value form
type Assignment = Integer -> String -> NumParser
assign :: Assignment
assign value = foldr (\c p -> (1 <$ char c) .*. p) (pure value)
ten :: NumParser
ten = assign 10 "zehn"
ty :: NumParser
ty = assign 10 "zig"
hundred :: NumParser
hundred = assign 100 "hundert"
thousand :: NumParser
thousand = assign 1000 "tausend"
und :: NumParser
und = assign 0 "und"
one :: NumItem
one = (defaultNumItem 1 "ein") { plus10 = assign 11 "elf"
, times10 = [ ten ] }
two :: NumItem
two = (defaultNumItem 2 "zwei") { plus10 = assign 12 "zwölf"
, times10 = [ assign 20 "zwanzig" ] }
three :: NumItem
three = (defaultNumItem 3 "drei") { times10 = [ assign 30 "dreißig"
, assign 30 "dreissig" ] }
four :: NumItem
four = defaultNumItem 4 "vier"
five :: NumItem
five = defaultNumItem 5 "fünf"
six :: NumItem
six = (defaultNumItem 6 "sechs") { plus10 = assign 16 "sechzehn"
, times10 = [ assign 60 "sechzig" ] }
seven :: NumItem
seven = (defaultNumItem 7 "sieben") { plus10 = assign 17 "siebzehn"
, times10 = [ assign 70 "siebzig" ] }
eight :: NumItem
eight = defaultNumItem 8 "acht"
nine :: NumItem
nine = defaultNumItem 9 "neun"
digitLexicon :: [NumItem]
digitLexicon = [one, two, three, four, five, six, seven, eight, nine]
from1to9 :: NumParser
from1to9 = foldr ((<|>) . base) empty digitLexicon
tensFrom20 :: NumParser
tensFrom20 = asum (concatMap times10 (tail digitLexicon))
from1to99 :: NumParser
from1to99 = opt (from1to9 .+. und) .+. tensFrom20
<|> foldr ((<|>) . plus10) empty digitLexicon
<|> ten
<|> from1to9
from1to999 :: NumParser
from1to999 = opt (from1to9 .*. hundred .+. opt und) .+. opt from1to99
from1to999999 :: NumParser
from1to999999 = opt (from1to999 .*. thousand .+. opt und) .+. opt from1to999
from1to999999' :: NumParser
from1to999999' = Parser p
where
p s
| isPrefixOf "hundert" s || isPrefixOf "tausend" s
= runParser from1to999999 ("ein" ++ s)
| otherwise
= runParser from1to999999 s
fromYear1100to1999 :: NumParser
fromYear1100to1999 = asum ((\n -> plus10 n .*. hundred) <$> digitLexicon)
.+. opt (opt und .+. from1to99)
allNumerals :: NumParser
allNumerals = fromYear1100to1999
<|> from1to999999'
removeInflection :: (Integer, String) -> Maybe Integer
removeInflection (n, suffix)
| n `mod` 10 == 1 && suffix `elem` inflection = Just n
where
inflection = ["s", "e", "em", "en", "er", "es"]
removeInflection (n, "") = Just n
removeInflection _ = Nothing
parseNumeral :: String -> Maybe Integer
parseNumeral s = removeInflection =<< runParser allNumerals s

View File

@ -27,6 +27,7 @@ import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types import Duckling.Regex.Types
import Duckling.Types import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Numeral.DE.NumParser
ruleNumeralsPrefixWithNegativeOrMinus :: Rule ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule ruleNumeralsPrefixWithNegativeOrMinus = Rule
@ -50,15 +51,6 @@ ruleFew = Rule
, prod = \_ -> integer 3 , prod = \_ -> integer 3
} }
ruleTen :: Rule
ruleTen = Rule
{ name = "ten"
, pattern =
[ regex "zehn"
]
, prod = \_ -> integer 10 >>= withGrain 1
}
ruleDecimalWithThousandsSeparator :: Rule ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator" { name = "decimal with thousands separator"
@ -84,41 +76,6 @@ ruleDecimalNumeral = Rule
_ -> Nothing _ -> Nothing
} }
-- TODO: Single-word composition (#110)
ruleInteger3 :: Rule
ruleInteger3 = Rule
{ name = "integer ([2-9][1-9])"
, pattern =
[ regex "(ein|zwei|drei|vier|fünf|sechs|sieben|acht|neun)und(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
v1 <- case Text.toLower m1 of
"ein" -> Just 1
"zwei" -> Just 2
"drei" -> Just 3
"vier" -> Just 4
"fünf" -> Just 5
"sechs" -> Just 6
"sieben" -> Just 7
"acht" -> Just 8
"neun" -> Just 9
_ -> Nothing
v2 <- case Text.toLower m2 of
"zwanzig" -> Just 20
"dreissig" -> Just 30
"dreißig" -> Just 30
"vierzig" -> Just 40
"fünfzig" -> Just 50
"sechzig" -> Just 60
"siebzig" -> Just 70
"achtzig" -> Just 80
"neunzig" -> Just 90
_ -> Nothing
integer $ v1 + v2
_ -> Nothing
}
ruleNumeralsUnd :: Rule ruleNumeralsUnd :: Rule
ruleNumeralsUnd = Rule ruleNumeralsUnd = Rule
{ name = "numbers und" { name = "numbers und"
@ -221,78 +178,13 @@ rulePowersOfTen = Rule
_ -> Nothing _ -> Nothing
} }
zeroNineteenMap :: HashMap Text Integer ruleZero :: Rule
zeroNineteenMap = HashMap.fromList ruleZero = Rule
[ ("keine", 0) { name = "integer 0"
, ("null", 0)
, ("nichts", 0)
, ("keiner", 0)
, ("kein", 0)
, ("keins", 0)
, ("keinen", 0)
, ("keines", 0)
, ("einer", 1)
, ("eins", 1)
, ("ein", 1)
, ("eine", 1)
, ("einser", 1)
, ("zwei", 2)
, ("drei", 3)
, ("vier", 4)
, ("fünf", 5)
, ("sechs", 6)
, ("sieben", 7)
, ("acht", 8)
, ("neun", 9)
, ("zehn", 10)
, ("elf", 11)
, ("zwölf", 12)
, ("dreizehn", 13)
, ("vierzehn", 14)
, ("fünfzehn", 15)
, ("sechzehn", 16)
, ("siebzehn", 17)
, ("achtzehn", 18)
, ("neunzehn", 19)
]
-- TODO: Single-word composition (#110)
ruleZeroToNineteen :: Rule
ruleZeroToNineteen = Rule
{ name = "integer (0..19)"
, pattern = , pattern =
[ regex "(keine[rn]|keine?s?|null|nichts|eins?(er?)?|zwei|dreizehn|drei|vierzehn|vier|fünfzehn|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf)" [ regex "(keine(m|n|r|s)?|keins?|null|nichts)"
] ]
, prod = \tokens -> case tokens of , prod = \_ -> integer 0
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) zeroNineteenMap >>= integer
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "zwanzig" , 20 )
, ( "dreissig", 30 )
, ( "dreißig" , 30 )
, ( "vierzig" , 40 )
, ( "fünfzig" , 50 )
, ( "sechzig" , 60 )
, ( "siebzig" , 70 )
, ( "achtzig" , 80 )
, ( "neunzig" , 90 )
]
-- TODO: Single-word composition (#110)
ruleInteger2 :: Rule
ruleInteger2 = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
} }
ruleNumeralDotNumeral :: Rule ruleNumeralDotNumeral :: Rule
@ -323,6 +215,17 @@ ruleIntegerWithThousandsSeparator = Rule
_ -> Nothing _ -> Nothing
} }
ruleAllNumeralWords :: Rule
ruleAllNumeralWords = Rule
{ name = "simple and complex numerals written as one word"
, pattern = [regex "(ein|zwei|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch matches) : _) ->
(parseNumeral $ concat $ Text.unpack . Text.toLower <$> matches)
>>= integer
_ -> Nothing
}
rules :: [Rule] rules :: [Rule]
rules = rules =
[ ruleCouple [ ruleCouple
@ -330,8 +233,6 @@ rules =
, ruleDecimalWithThousandsSeparator , ruleDecimalWithThousandsSeparator
, ruleDozen , ruleDozen
, ruleFew , ruleFew
, ruleInteger2
, ruleInteger3
, ruleIntegerWithThousandsSeparator , ruleIntegerWithThousandsSeparator
, ruleIntersect , ruleIntersect
, ruleMultiply , ruleMultiply
@ -340,6 +241,6 @@ rules =
, ruleNumeralsSuffixesKMG , ruleNumeralsSuffixesKMG
, ruleNumeralsUnd , ruleNumeralsUnd
, rulePowersOfTen , rulePowersOfTen
, ruleTen , ruleZero
, ruleZeroToNineteen , ruleAllNumeralWords
] ]

View File

@ -99,13 +99,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("integer (20..90)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Maha Shivaratri", ("Maha Shivaratri",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098, ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -346,14 +339,6 @@ classifiers
("<day-of-month> (ordinal)", -2.8622008809294686), ("<day-of-month> (ordinal)", -2.8622008809294686),
("hour", -2.8622008809294686), ("minute", -2.169053700369523)], ("hour", -2.8622008809294686), ("minute", -2.169053700369523)],
n = 8}}), n = 8}}),
("integer (0..19)",
Classifier{okData =
ClassData{prior = -0.10008345855698253,
unseen = -3.6888794541139363,
likelihoods = HashMap.fromList [("", 0.0)], n = 38},
koData =
ClassData{prior = -2.3513752571634776, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
("between <time-of-day> and <time-of-day> (interval)", ("between <time-of-day> and <time-of-day> (interval)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.1972245773362196, ClassData{prior = 0.0, unseen = -2.1972245773362196,
@ -566,7 +551,8 @@ classifiers
unseen = -1.6094379124341003, unseen = -1.6094379124341003,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer (0..19)hour (grain)", -0.6931471805599453), [("simple and complex numerals written as one wordhour (grain)",
-0.6931471805599453),
("hour", -0.6931471805599453)], ("hour", -0.6931471805599453)],
n = 1}, n = 1},
koData = koData =
@ -574,9 +560,18 @@ classifiers
unseen = -2.1972245773362196, unseen = -2.1972245773362196,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer (0..19)hour (grain)", -0.6931471805599453), [("simple and complex numerals written as one wordhour (grain)",
-0.6931471805599453),
("hour", -0.6931471805599453)], ("hour", -0.6931471805599453)],
n = 3}}), n = 3}}),
("simple and complex numerals written as one word",
Classifier{okData =
ClassData{prior = -9.763846956391606e-2,
unseen = -3.713572066704308,
likelihoods = HashMap.fromList [("", 0.0)], n = 39},
koData =
ClassData{prior = -2.374905754573672, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
("intersect", ("intersect",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.12020308869575518, ClassData{prior = -0.12020308869575518,
@ -1617,7 +1612,8 @@ classifiers
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer (numeric)", -6.995858860691034e-2), [("integer (numeric)", -6.995858860691034e-2),
("integer (0..19)", -2.6946271807700692)], ("simple and complex numerals written as one word",
-2.6946271807700692)],
n = 72}, n = 72},
koData = koData =
ClassData{prior = -1.5664205273504095, ClassData{prior = -1.5664205273504095,
@ -1625,7 +1621,8 @@ classifiers
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer (numeric)", -0.2113090936672069), [("integer (numeric)", -0.2113090936672069),
("integer (0..19)", -1.6582280766035324)], ("simple and complex numerals written as one word",
-1.6582280766035324)],
n = 19}}), n = 19}}),
("year", ("year",
Classifier{okData = Classifier{okData =
@ -1656,47 +1653,62 @@ classifiers
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -1.9339339580085977), [("week", -1.9339339580085977),
("integer (0..19)year (grain)", -3.3202283191284883), ("simple and complex numerals written as one wordhour (grain)",
-3.3202283191284883),
("integer (numeric)day (grain)", -2.8094026953624978), ("integer (numeric)day (grain)", -2.8094026953624978),
("couplehour (grain)", -3.7256934272366524), ("couplehour (grain)", -3.7256934272366524),
("integer (0..19)hour (grain)", -3.3202283191284883), ("simple and complex numerals written as one wordyear (grain)",
-3.3202283191284883),
("second", -3.7256934272366524), ("second", -3.7256934272366524),
("integer (numeric)year (grain)", -3.7256934272366524), ("integer (numeric)year (grain)", -3.7256934272366524),
("day", -2.8094026953624978), ("year", -3.0325462466767075), ("day", -2.8094026953624978),
("simple and complex numerals written as one wordmonth (grain)",
-3.7256934272366524),
("year", -3.0325462466767075),
("simple and complex numerals written as one wordsecond (grain)",
-3.7256934272366524),
("integer (numeric)week (grain)", -2.627081138568543), ("integer (numeric)week (grain)", -2.627081138568543),
("integer (0..19)month (grain)", -3.7256934272366524),
("integer (0..19)second (grain)", -3.7256934272366524),
("hour", -2.8094026953624978), ("month", -3.7256934272366524), ("hour", -2.8094026953624978), ("month", -3.7256934272366524),
("integer (numeric)minute (grain)", -2.627081138568543), ("integer (numeric)minute (grain)", -2.627081138568543),
("integer (0..19)minute (grain)", -3.7256934272366524), ("simple and complex numerals written as one wordweek (grain)",
-2.472930458741285),
("minute", -2.472930458741285), ("minute", -2.472930458741285),
("integer (numeric)hour (grain)", -3.7256934272366524), ("integer (numeric)hour (grain)", -3.7256934272366524),
("integer (0..19)week (grain)", -2.472930458741285)], ("simple and complex numerals written as one wordminute (grain)",
-3.7256934272366524)],
n = 30}, n = 30},
koData = koData =
ClassData{prior = -0.5937747067467416, unseen = -4.584967478670572, ClassData{prior = -0.5937747067467416, unseen = -4.584967478670572,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -2.62880082944807), [("week", -2.62880082944807),
("integer (0..19)year (grain)", -2.9652730660692823), ("simple and complex numerals written as one wordhour (grain)",
-3.8815637979434374),
("integer (numeric)day (grain)", -3.4760986898352733), ("integer (numeric)day (grain)", -3.4760986898352733),
("integer (0..19)hour (grain)", -3.8815637979434374), ("simple and complex numerals written as one wordyear (grain)",
-2.9652730660692823),
("second", -2.9652730660692823), ("second", -2.9652730660692823),
("integer (numeric)second (grain)", -3.4760986898352733), ("integer (numeric)second (grain)", -3.4760986898352733),
("integer (numeric)year (grain)", -3.4760986898352733), ("integer (numeric)year (grain)", -3.4760986898352733),
("day", -2.3774864011671633), ("year", -2.62880082944807), ("day", -2.3774864011671633),
("simple and complex numerals written as one wordmonth (grain)",
-2.9652730660692823),
("year", -2.62880082944807),
("simple and complex numerals written as one wordsecond (grain)",
-3.4760986898352733),
("integer (numeric)week (grain)", -3.188416617383492), ("integer (numeric)week (grain)", -3.188416617383492),
("integer (0..19)month (grain)", -2.9652730660692823),
("integer (0..19)second (grain)", -3.4760986898352733),
("hour", -3.188416617383492), ("month", -2.62880082944807), ("hour", -3.188416617383492), ("month", -2.62880082944807),
("integer (numeric)minute (grain)", -3.4760986898352733), ("integer (numeric)minute (grain)", -3.4760986898352733),
("integer (0..19)minute (grain)", -3.4760986898352733), ("simple and complex numerals written as one wordday (grain)",
-2.9652730660692823),
("simple and complex numerals written as one wordweek (grain)",
-3.188416617383492),
("integer (numeric)month (grain)", -3.4760986898352733), ("integer (numeric)month (grain)", -3.4760986898352733),
("minute", -2.9652730660692823), ("minute", -2.9652730660692823),
("coupleday (grain)", -3.4760986898352733), ("coupleday (grain)", -3.4760986898352733),
("integer (numeric)hour (grain)", -3.4760986898352733), ("integer (numeric)hour (grain)", -3.4760986898352733),
("integer (0..19)day (grain)", -2.9652730660692823), ("simple and complex numerals written as one wordminute (grain)",
("integer (0..19)week (grain)", -3.188416617383492)], -3.4760986898352733)],
n = 37}}), n = 37}}),
("Dienstag", ("Dienstag",
Classifier{okData = Classifier{okData =
@ -1754,9 +1766,10 @@ classifiers
ClassData{prior = 0.0, unseen = -2.0794415416798357, ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("hour", -0.8472978603872037), [("simple and complex numerals written as one wordtime-of-day (latent)",
("integer (numeric)time-of-day (latent)", -1.252762968495368), -1.252762968495368),
("integer (20..90)time-of-day (latent)", -1.252762968495368)], ("hour", -0.8472978603872037),
("integer (numeric)time-of-day (latent)", -1.252762968495368)],
n = 2}, n = 2},
koData = koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906, ClassData{prior = -infinity, unseen = -1.3862943611198906,
@ -2242,25 +2255,33 @@ classifiers
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -2.740840023925201), [("week", -2.740840023925201),
("integer (0..19)year (grain)", -3.028522096376982), ("simple and complex numerals written as one wordhour (grain)",
-3.4339872044851463),
("integer (numeric)day (grain)", -3.4339872044851463), ("integer (numeric)day (grain)", -3.4339872044851463),
("integer (0..19)hour (grain)", -3.4339872044851463), ("simple and complex numerals written as one wordyear (grain)",
-3.028522096376982),
("second", -3.028522096376982), ("second", -3.028522096376982),
("integer (numeric)second (grain)", -3.4339872044851463), ("integer (numeric)second (grain)", -3.4339872044851463),
("integer (numeric)year (grain)", -3.4339872044851463), ("integer (numeric)year (grain)", -3.4339872044851463),
("day", -2.3353749158170367), ("year", -2.740840023925201), ("day", -2.3353749158170367),
("simple and complex numerals written as one wordmonth (grain)",
-3.028522096376982),
("year", -2.740840023925201),
("simple and complex numerals written as one wordsecond (grain)",
-3.4339872044851463),
("integer (numeric)week (grain)", -3.4339872044851463), ("integer (numeric)week (grain)", -3.4339872044851463),
("integer (0..19)month (grain)", -3.028522096376982),
("integer (0..19)second (grain)", -3.4339872044851463),
("hour", -3.028522096376982), ("month", -2.740840023925201), ("hour", -3.028522096376982), ("month", -2.740840023925201),
("integer (numeric)minute (grain)", -3.4339872044851463), ("integer (numeric)minute (grain)", -3.4339872044851463),
("integer (0..19)minute (grain)", -3.4339872044851463), ("simple and complex numerals written as one wordday (grain)",
-3.028522096376982),
("simple and complex numerals written as one wordweek (grain)",
-3.028522096376982),
("integer (numeric)month (grain)", -3.4339872044851463), ("integer (numeric)month (grain)", -3.4339872044851463),
("minute", -3.028522096376982), ("minute", -3.028522096376982),
("coupleday (grain)", -3.028522096376982), ("coupleday (grain)", -3.028522096376982),
("integer (numeric)hour (grain)", -3.4339872044851463), ("integer (numeric)hour (grain)", -3.4339872044851463),
("integer (0..19)day (grain)", -3.028522096376982), ("simple and complex numerals written as one wordminute (grain)",
("integer (0..19)week (grain)", -3.028522096376982)], -3.4339872044851463)],
n = 20}, n = 20},
koData = koData =
ClassData{prior = -infinity, unseen = -3.1354942159291497, ClassData{prior = -infinity, unseen = -3.1354942159291497,
@ -2437,22 +2458,29 @@ classifiers
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -2.5257286443082556), [("week", -2.5257286443082556),
("integer (0..19)year (grain)", -2.8134107167600364),
("integer (numeric)day (grain)", -3.2188758248682006), ("integer (numeric)day (grain)", -3.2188758248682006),
("simple and complex numerals written as one wordyear (grain)",
-2.8134107167600364),
("second", -2.8134107167600364), ("second", -2.8134107167600364),
("integer (numeric)second (grain)", -3.2188758248682006), ("integer (numeric)second (grain)", -3.2188758248682006),
("integer (numeric)year (grain)", -3.2188758248682006), ("integer (numeric)year (grain)", -3.2188758248682006),
("day", -2.5257286443082556), ("year", -2.5257286443082556), ("day", -2.5257286443082556),
("simple and complex numerals written as one wordmonth (grain)",
-2.8134107167600364),
("year", -2.5257286443082556),
("simple and complex numerals written as one wordsecond (grain)",
-3.2188758248682006),
("integer (numeric)week (grain)", -2.8134107167600364), ("integer (numeric)week (grain)", -2.8134107167600364),
("integer (0..19)month (grain)", -2.8134107167600364),
("integer (0..19)second (grain)", -3.2188758248682006),
("month", -2.5257286443082556), ("month", -2.5257286443082556),
("integer (numeric)minute (grain)", -3.2188758248682006), ("integer (numeric)minute (grain)", -3.2188758248682006),
("integer (0..19)minute (grain)", -3.2188758248682006), ("simple and complex numerals written as one wordday (grain)",
-2.8134107167600364),
("simple and complex numerals written as one wordweek (grain)",
-3.2188758248682006),
("integer (numeric)month (grain)", -3.2188758248682006), ("integer (numeric)month (grain)", -3.2188758248682006),
("minute", -2.8134107167600364), ("minute", -2.8134107167600364),
("integer (0..19)day (grain)", -2.8134107167600364), ("simple and complex numerals written as one wordminute (grain)",
("integer (0..19)week (grain)", -3.2188758248682006)], -3.2188758248682006)],
n = 16}, n = 16},
koData = koData =
ClassData{prior = -infinity, unseen = -2.9444389791664407, ClassData{prior = -infinity, unseen = -2.9444389791664407,

View File

@ -327,10 +327,10 @@ classifiers
unseen = -4.31748811353631, unseen = -4.31748811353631,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("<integer> (latent time-of-day)", -0.9718605830289658), [("<integer> (latent time-of-day)", -0.9718605830289657),
("intersect by \"di\", \"della\", \"del\"", -3.20545280453606), ("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243), ("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
("hour", -0.9718605830289658), ("hour", -0.9718605830289657),
("two time tokens separated by `di`", -3.20545280453606), ("two time tokens separated by `di`", -3.20545280453606),
("Domenica", -3.6109179126442243)], ("Domenica", -3.6109179126442243)],
n = 33}}), n = 33}}),

View File

@ -434,6 +434,7 @@ library
, Duckling.Numeral.DA.Rules , Duckling.Numeral.DA.Rules
, Duckling.Numeral.DE.Corpus , Duckling.Numeral.DE.Corpus
, Duckling.Numeral.DE.Rules , Duckling.Numeral.DE.Rules
, Duckling.Numeral.DE.NumParser
, Duckling.Numeral.EL.Corpus , Duckling.Numeral.EL.Corpus
, Duckling.Numeral.EL.Rules , Duckling.Numeral.EL.Rules
, Duckling.Numeral.EN.Corpus , Duckling.Numeral.EN.Corpus