mirror of
https://github.com/facebook/duckling.git
synced 2024-11-24 07:23:03 +03:00
DE-Numeral-complex-German-numerals (#699)
Summary: The commit adds a rule and an underlying parser for German numeral expressions representing (integer) numbers smaller than 1 million. Other than in English, those numbers are represented by single words, e.g. "neunhundertsiebenundachtzigtausendsechshundertvierundfünfzig" (987654). Other rukes are simplified or removed to eliminate redundancies. Pull Request resolved: https://github.com/facebook/duckling/pull/699 Reviewed By: patapizza Differential Revision: D37716120 Pulled By: stroxler fbshipit-source-id: 90b26e253259c5bc1aaa76f3972537c2361f6bb3
This commit is contained in:
parent
1faab00741
commit
9509e042dc
@ -33,6 +33,8 @@ allExamples = concat
|
||||
, examples (NumeralValue 1)
|
||||
[ "1"
|
||||
, "eins"
|
||||
, "Eine"
|
||||
, "einen"
|
||||
]
|
||||
, examples (NumeralValue 3)
|
||||
[ "3"
|
||||
@ -46,11 +48,12 @@ allExamples = concat
|
||||
]
|
||||
, examples (NumeralValue 30)
|
||||
[ "30"
|
||||
, "dreißig"
|
||||
, "dreissig"
|
||||
]
|
||||
, examples (NumeralValue 33)
|
||||
[ "33"
|
||||
, "drei Und dreissig"
|
||||
, "dreiunddreißig"
|
||||
, "dreiunddreissig"
|
||||
, "0033"
|
||||
]
|
||||
@ -76,11 +79,11 @@ allExamples = concat
|
||||
]
|
||||
, examples (NumeralValue 200)
|
||||
[ "200"
|
||||
, "zwei hundert"
|
||||
, "zweihundert"
|
||||
]
|
||||
, examples (NumeralValue 102)
|
||||
[ "102"
|
||||
, "Hundert zwei"
|
||||
, "Hundertzwei"
|
||||
]
|
||||
, examples (NumeralValue 1.1)
|
||||
[ "1,1"
|
||||
@ -97,12 +100,15 @@ allExamples = concat
|
||||
, "100000"
|
||||
, "100K"
|
||||
, "100k"
|
||||
, "einhunderttausend"
|
||||
, "hunderttausend"
|
||||
]
|
||||
, examples (NumeralValue 3000000)
|
||||
[ "3M"
|
||||
, "3000K"
|
||||
, "3000000"
|
||||
, "3.000.000"
|
||||
, "drei Millionen"
|
||||
]
|
||||
, examples (NumeralValue 1200000)
|
||||
[ "1.200.000"
|
||||
@ -120,30 +126,34 @@ allExamples = concat
|
||||
, "-1200K"
|
||||
, "-,0012G"
|
||||
]
|
||||
, examples (NumeralValue 1852)
|
||||
[ "eintausendachthundertzweiundfünfzig"
|
||||
, "tausendachthundertzweiundfünfzig"
|
||||
, "achtzehnhundertzweiundfünfzig"]
|
||||
, examples (NumeralValue 5000)
|
||||
[ "5 tausend"
|
||||
, "fünf tausend"
|
||||
, "fünftausend"
|
||||
]
|
||||
, examples (NumeralValue 200000)
|
||||
[ "zwei hundert tausend"
|
||||
[ "zweihunderttausend"
|
||||
]
|
||||
, examples (NumeralValue 721012)
|
||||
[ "sieben hundert einundzwanzig tausend zwölf"
|
||||
[ "siebenhunderteinundzwanzigtausendzwölf"
|
||||
, "siebenhunderteinundzwanzigtausendundzwölf"
|
||||
]
|
||||
, examples (NumeralValue 31256721)
|
||||
[ "ein und dreissig millionen zwei hundert sechs und fünfzig tausend sieben hundert ein und zwanzig"
|
||||
[ "einunddreissig millionen zweihundertsechsundfünfzigtausendsiebenhunderteinundzwanzig"
|
||||
, "einunddreißig Millionen zweihundertsechsundfünfzigtausendundsiebenhunderteinundzwanzig"
|
||||
]
|
||||
, examples (NumeralValue 1416.15)
|
||||
[ "1416,15"
|
||||
]
|
||||
, examples (NumeralValue 1416.15)
|
||||
[ "1.416,15"
|
||||
, "tausendvierhundertsechzehn Komma fünfzehn"
|
||||
]
|
||||
, examples (NumeralValue 1000000.0)
|
||||
[ "1.000.000,00",
|
||||
"eine million"
|
||||
]
|
||||
, examples (NumeralValue 2771090092000000.0)
|
||||
[ "zwei billiarden sieben hundert ein und siebzig billionen neunzig milliarden zwei und neunzig millionen"
|
||||
[ "zwei billiarden siebenhunderteinundsiebzig billionen neunzig milliarden zweiundneunzig millionen"
|
||||
]
|
||||
]
|
||||
|
160
Duckling/Numeral/DE/NumParser.hs
Normal file
160
Duckling/Numeral/DE/NumParser.hs
Normal file
@ -0,0 +1,160 @@
|
||||
{-# LANGUAGE DeriveFunctor #-}
|
||||
|
||||
module Duckling.Numeral.DE.NumParser (parseNumeral) where
|
||||
|
||||
import Prelude
|
||||
import Control.Applicative
|
||||
import Data.Char
|
||||
import Data.List
|
||||
import Data.Foldable
|
||||
import Data.String
|
||||
|
||||
newtype Parser a
|
||||
= Parser { runParser :: String -> Maybe (a, String) }
|
||||
deriving Functor
|
||||
|
||||
char :: Char -> Parser Char
|
||||
char c = Parser p
|
||||
where
|
||||
p [] = Nothing
|
||||
p (x:xs)
|
||||
| x == c = Just (x, xs)
|
||||
| otherwise = Nothing
|
||||
|
||||
instance Applicative Parser where
|
||||
pure a = Parser (\s -> Just (a, s))
|
||||
(Parser fp) <*> xp = Parser $ \s ->
|
||||
case fp s of
|
||||
Nothing -> Nothing
|
||||
Just (f,s') -> runParser (f <$> xp) s'
|
||||
|
||||
instance Alternative Parser where
|
||||
empty = Parser (const Nothing)
|
||||
Parser p1 <|> Parser p2 = Parser $ liftA2 (<|>) p1 p2
|
||||
|
||||
type NumParser = Parser Integer
|
||||
|
||||
(.+.) :: NumParser -> NumParser -> NumParser
|
||||
p .+. p' = (+) <$> p <*> p'
|
||||
|
||||
(.*.) :: NumParser -> NumParser -> NumParser
|
||||
p .*. p' = (*) <$> p <*> p'
|
||||
|
||||
infixl 6 .+.
|
||||
infixl 7 .*.
|
||||
|
||||
opt :: NumParser -> NumParser
|
||||
opt p = p <|> Parser p'
|
||||
where
|
||||
p' s = Just (0, s)
|
||||
|
||||
data NumItem = NumItem { base :: NumParser
|
||||
, plus10 :: NumParser
|
||||
, times10 :: [NumParser]
|
||||
}
|
||||
|
||||
defaultNumItem :: Integer -> String -> NumItem
|
||||
defaultNumItem value form = NumItem { base = p
|
||||
, plus10 = p .+. ten
|
||||
, times10 = [p .*. ty]
|
||||
} where p = assign value form
|
||||
|
||||
type Assignment = Integer -> String -> NumParser
|
||||
|
||||
assign :: Assignment
|
||||
assign value = foldr (\c p -> (1 <$ char c) .*. p) (pure value)
|
||||
|
||||
ten :: NumParser
|
||||
ten = assign 10 "zehn"
|
||||
|
||||
ty :: NumParser
|
||||
ty = assign 10 "zig"
|
||||
|
||||
hundred :: NumParser
|
||||
hundred = assign 100 "hundert"
|
||||
|
||||
thousand :: NumParser
|
||||
thousand = assign 1000 "tausend"
|
||||
|
||||
und :: NumParser
|
||||
und = assign 0 "und"
|
||||
|
||||
one :: NumItem
|
||||
one = (defaultNumItem 1 "ein") { plus10 = assign 11 "elf"
|
||||
, times10 = [ ten ] }
|
||||
|
||||
two :: NumItem
|
||||
two = (defaultNumItem 2 "zwei") { plus10 = assign 12 "zwölf"
|
||||
, times10 = [ assign 20 "zwanzig" ] }
|
||||
three :: NumItem
|
||||
three = (defaultNumItem 3 "drei") { times10 = [ assign 30 "dreißig"
|
||||
, assign 30 "dreissig" ] }
|
||||
|
||||
four :: NumItem
|
||||
four = defaultNumItem 4 "vier"
|
||||
|
||||
five :: NumItem
|
||||
five = defaultNumItem 5 "fünf"
|
||||
|
||||
six :: NumItem
|
||||
six = (defaultNumItem 6 "sechs") { plus10 = assign 16 "sechzehn"
|
||||
, times10 = [ assign 60 "sechzig" ] }
|
||||
|
||||
seven :: NumItem
|
||||
seven = (defaultNumItem 7 "sieben") { plus10 = assign 17 "siebzehn"
|
||||
, times10 = [ assign 70 "siebzig" ] }
|
||||
|
||||
eight :: NumItem
|
||||
eight = defaultNumItem 8 "acht"
|
||||
|
||||
nine :: NumItem
|
||||
nine = defaultNumItem 9 "neun"
|
||||
|
||||
digitLexicon :: [NumItem]
|
||||
digitLexicon = [one, two, three, four, five, six, seven, eight, nine]
|
||||
|
||||
from1to9 :: NumParser
|
||||
from1to9 = foldr ((<|>) . base) empty digitLexicon
|
||||
|
||||
tensFrom20 :: NumParser
|
||||
tensFrom20 = asum (concatMap times10 (tail digitLexicon))
|
||||
|
||||
from1to99 :: NumParser
|
||||
from1to99 = opt (from1to9 .+. und) .+. tensFrom20
|
||||
<|> foldr ((<|>) . plus10) empty digitLexicon
|
||||
<|> ten
|
||||
<|> from1to9
|
||||
|
||||
from1to999 :: NumParser
|
||||
from1to999 = opt (from1to9 .*. hundred .+. opt und) .+. opt from1to99
|
||||
|
||||
from1to999999 :: NumParser
|
||||
from1to999999 = opt (from1to999 .*. thousand .+. opt und) .+. opt from1to999
|
||||
|
||||
from1to999999' :: NumParser
|
||||
from1to999999' = Parser p
|
||||
where
|
||||
p s
|
||||
| isPrefixOf "hundert" s || isPrefixOf "tausend" s
|
||||
= runParser from1to999999 ("ein" ++ s)
|
||||
| otherwise
|
||||
= runParser from1to999999 s
|
||||
|
||||
fromYear1100to1999 :: NumParser
|
||||
fromYear1100to1999 = asum ((\n -> plus10 n .*. hundred) <$> digitLexicon)
|
||||
.+. opt (opt und .+. from1to99)
|
||||
|
||||
allNumerals :: NumParser
|
||||
allNumerals = fromYear1100to1999
|
||||
<|> from1to999999'
|
||||
|
||||
removeInflection :: (Integer, String) -> Maybe Integer
|
||||
removeInflection (n, suffix)
|
||||
| n `mod` 10 == 1 && suffix `elem` inflection = Just n
|
||||
where
|
||||
inflection = ["s", "e", "em", "en", "er", "es"]
|
||||
removeInflection (n, "") = Just n
|
||||
removeInflection _ = Nothing
|
||||
|
||||
parseNumeral :: String -> Maybe Integer
|
||||
parseNumeral s = removeInflection =<< runParser allNumerals s
|
@ -27,6 +27,7 @@ import Duckling.Numeral.Types (NumeralData (..))
|
||||
import Duckling.Regex.Types
|
||||
import Duckling.Types
|
||||
import qualified Duckling.Numeral.Types as TNumeral
|
||||
import Duckling.Numeral.DE.NumParser
|
||||
|
||||
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
|
||||
ruleNumeralsPrefixWithNegativeOrMinus = Rule
|
||||
@ -50,15 +51,6 @@ ruleFew = Rule
|
||||
, prod = \_ -> integer 3
|
||||
}
|
||||
|
||||
ruleTen :: Rule
|
||||
ruleTen = Rule
|
||||
{ name = "ten"
|
||||
, pattern =
|
||||
[ regex "zehn"
|
||||
]
|
||||
, prod = \_ -> integer 10 >>= withGrain 1
|
||||
}
|
||||
|
||||
ruleDecimalWithThousandsSeparator :: Rule
|
||||
ruleDecimalWithThousandsSeparator = Rule
|
||||
{ name = "decimal with thousands separator"
|
||||
@ -84,41 +76,6 @@ ruleDecimalNumeral = Rule
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
-- TODO: Single-word composition (#110)
|
||||
ruleInteger3 :: Rule
|
||||
ruleInteger3 = Rule
|
||||
{ name = "integer ([2-9][1-9])"
|
||||
, pattern =
|
||||
[ regex "(ein|zwei|drei|vier|fünf|sechs|sieben|acht|neun)und(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
|
||||
v1 <- case Text.toLower m1 of
|
||||
"ein" -> Just 1
|
||||
"zwei" -> Just 2
|
||||
"drei" -> Just 3
|
||||
"vier" -> Just 4
|
||||
"fünf" -> Just 5
|
||||
"sechs" -> Just 6
|
||||
"sieben" -> Just 7
|
||||
"acht" -> Just 8
|
||||
"neun" -> Just 9
|
||||
_ -> Nothing
|
||||
v2 <- case Text.toLower m2 of
|
||||
"zwanzig" -> Just 20
|
||||
"dreissig" -> Just 30
|
||||
"dreißig" -> Just 30
|
||||
"vierzig" -> Just 40
|
||||
"fünfzig" -> Just 50
|
||||
"sechzig" -> Just 60
|
||||
"siebzig" -> Just 70
|
||||
"achtzig" -> Just 80
|
||||
"neunzig" -> Just 90
|
||||
_ -> Nothing
|
||||
integer $ v1 + v2
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleNumeralsUnd :: Rule
|
||||
ruleNumeralsUnd = Rule
|
||||
{ name = "numbers und"
|
||||
@ -221,78 +178,13 @@ rulePowersOfTen = Rule
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
zeroNineteenMap :: HashMap Text Integer
|
||||
zeroNineteenMap = HashMap.fromList
|
||||
[ ("keine", 0)
|
||||
, ("null", 0)
|
||||
, ("nichts", 0)
|
||||
, ("keiner", 0)
|
||||
, ("kein", 0)
|
||||
, ("keins", 0)
|
||||
, ("keinen", 0)
|
||||
, ("keines", 0)
|
||||
, ("einer", 1)
|
||||
, ("eins", 1)
|
||||
, ("ein", 1)
|
||||
, ("eine", 1)
|
||||
, ("einser", 1)
|
||||
, ("zwei", 2)
|
||||
, ("drei", 3)
|
||||
, ("vier", 4)
|
||||
, ("fünf", 5)
|
||||
, ("sechs", 6)
|
||||
, ("sieben", 7)
|
||||
, ("acht", 8)
|
||||
, ("neun", 9)
|
||||
, ("zehn", 10)
|
||||
, ("elf", 11)
|
||||
, ("zwölf", 12)
|
||||
, ("dreizehn", 13)
|
||||
, ("vierzehn", 14)
|
||||
, ("fünfzehn", 15)
|
||||
, ("sechzehn", 16)
|
||||
, ("siebzehn", 17)
|
||||
, ("achtzehn", 18)
|
||||
, ("neunzehn", 19)
|
||||
]
|
||||
|
||||
-- TODO: Single-word composition (#110)
|
||||
ruleZeroToNineteen :: Rule
|
||||
ruleZeroToNineteen = Rule
|
||||
{ name = "integer (0..19)"
|
||||
ruleZero :: Rule
|
||||
ruleZero = Rule
|
||||
{ name = "integer 0"
|
||||
, pattern =
|
||||
[ regex "(keine[rn]|keine?s?|null|nichts|eins?(er?)?|zwei|dreizehn|drei|vierzehn|vier|fünfzehn|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf)"
|
||||
[ regex "(keine(m|n|r|s)?|keins?|null|nichts)"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
||||
HashMap.lookup (Text.toLower match) zeroNineteenMap >>= integer
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
tensMap :: HashMap Text Integer
|
||||
tensMap = HashMap.fromList
|
||||
[ ( "zwanzig" , 20 )
|
||||
, ( "dreissig", 30 )
|
||||
, ( "dreißig" , 30 )
|
||||
, ( "vierzig" , 40 )
|
||||
, ( "fünfzig" , 50 )
|
||||
, ( "sechzig" , 60 )
|
||||
, ( "siebzig" , 70 )
|
||||
, ( "achtzig" , 80 )
|
||||
, ( "neunzig" , 90 )
|
||||
]
|
||||
|
||||
-- TODO: Single-word composition (#110)
|
||||
ruleInteger2 :: Rule
|
||||
ruleInteger2 = Rule
|
||||
{ name = "integer (20..90)"
|
||||
, pattern =
|
||||
[ regex "(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
||||
HashMap.lookup (Text.toLower match) tensMap >>= integer
|
||||
_ -> Nothing
|
||||
, prod = \_ -> integer 0
|
||||
}
|
||||
|
||||
ruleNumeralDotNumeral :: Rule
|
||||
@ -323,6 +215,17 @@ ruleIntegerWithThousandsSeparator = Rule
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleAllNumeralWords :: Rule
|
||||
ruleAllNumeralWords = Rule
|
||||
{ name = "simple and complex numerals written as one word"
|
||||
, pattern = [regex "(ein|zwei|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch matches) : _) ->
|
||||
(parseNumeral $ concat $ Text.unpack . Text.toLower <$> matches)
|
||||
>>= integer
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
rules :: [Rule]
|
||||
rules =
|
||||
[ ruleCouple
|
||||
@ -330,8 +233,6 @@ rules =
|
||||
, ruleDecimalWithThousandsSeparator
|
||||
, ruleDozen
|
||||
, ruleFew
|
||||
, ruleInteger2
|
||||
, ruleInteger3
|
||||
, ruleIntegerWithThousandsSeparator
|
||||
, ruleIntersect
|
||||
, ruleMultiply
|
||||
@ -340,6 +241,6 @@ rules =
|
||||
, ruleNumeralsSuffixesKMG
|
||||
, ruleNumeralsUnd
|
||||
, rulePowersOfTen
|
||||
, ruleTen
|
||||
, ruleZeroToNineteen
|
||||
, ruleZero
|
||||
, ruleAllNumeralWords
|
||||
]
|
||||
|
@ -99,13 +99,6 @@ classifiers
|
||||
koData =
|
||||
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("integer (20..90)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
||||
koData =
|
||||
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("Maha Shivaratri",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
||||
@ -346,14 +339,6 @@ classifiers
|
||||
("<day-of-month> (ordinal)", -2.8622008809294686),
|
||||
("hour", -2.8622008809294686), ("minute", -2.169053700369523)],
|
||||
n = 8}}),
|
||||
("integer (0..19)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.10008345855698253,
|
||||
unseen = -3.6888794541139363,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 38},
|
||||
koData =
|
||||
ClassData{prior = -2.3513752571634776, unseen = -1.791759469228055,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
|
||||
("between <time-of-day> and <time-of-day> (interval)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -2.1972245773362196,
|
||||
@ -566,7 +551,8 @@ classifiers
|
||||
unseen = -1.6094379124341003,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("integer (0..19)hour (grain)", -0.6931471805599453),
|
||||
[("simple and complex numerals written as one wordhour (grain)",
|
||||
-0.6931471805599453),
|
||||
("hour", -0.6931471805599453)],
|
||||
n = 1},
|
||||
koData =
|
||||
@ -574,9 +560,18 @@ classifiers
|
||||
unseen = -2.1972245773362196,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("integer (0..19)hour (grain)", -0.6931471805599453),
|
||||
[("simple and complex numerals written as one wordhour (grain)",
|
||||
-0.6931471805599453),
|
||||
("hour", -0.6931471805599453)],
|
||||
n = 3}}),
|
||||
("simple and complex numerals written as one word",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -9.763846956391606e-2,
|
||||
unseen = -3.713572066704308,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 39},
|
||||
koData =
|
||||
ClassData{prior = -2.374905754573672, unseen = -1.791759469228055,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
|
||||
("intersect",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.12020308869575518,
|
||||
@ -1617,7 +1612,8 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("integer (numeric)", -6.995858860691034e-2),
|
||||
("integer (0..19)", -2.6946271807700692)],
|
||||
("simple and complex numerals written as one word",
|
||||
-2.6946271807700692)],
|
||||
n = 72},
|
||||
koData =
|
||||
ClassData{prior = -1.5664205273504095,
|
||||
@ -1625,7 +1621,8 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("integer (numeric)", -0.2113090936672069),
|
||||
("integer (0..19)", -1.6582280766035324)],
|
||||
("simple and complex numerals written as one word",
|
||||
-1.6582280766035324)],
|
||||
n = 19}}),
|
||||
("year",
|
||||
Classifier{okData =
|
||||
@ -1656,47 +1653,62 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("week", -1.9339339580085977),
|
||||
("integer (0..19)year (grain)", -3.3202283191284883),
|
||||
("simple and complex numerals written as one wordhour (grain)",
|
||||
-3.3202283191284883),
|
||||
("integer (numeric)day (grain)", -2.8094026953624978),
|
||||
("couplehour (grain)", -3.7256934272366524),
|
||||
("integer (0..19)hour (grain)", -3.3202283191284883),
|
||||
("simple and complex numerals written as one wordyear (grain)",
|
||||
-3.3202283191284883),
|
||||
("second", -3.7256934272366524),
|
||||
("integer (numeric)year (grain)", -3.7256934272366524),
|
||||
("day", -2.8094026953624978), ("year", -3.0325462466767075),
|
||||
("day", -2.8094026953624978),
|
||||
("simple and complex numerals written as one wordmonth (grain)",
|
||||
-3.7256934272366524),
|
||||
("year", -3.0325462466767075),
|
||||
("simple and complex numerals written as one wordsecond (grain)",
|
||||
-3.7256934272366524),
|
||||
("integer (numeric)week (grain)", -2.627081138568543),
|
||||
("integer (0..19)month (grain)", -3.7256934272366524),
|
||||
("integer (0..19)second (grain)", -3.7256934272366524),
|
||||
("hour", -2.8094026953624978), ("month", -3.7256934272366524),
|
||||
("integer (numeric)minute (grain)", -2.627081138568543),
|
||||
("integer (0..19)minute (grain)", -3.7256934272366524),
|
||||
("simple and complex numerals written as one wordweek (grain)",
|
||||
-2.472930458741285),
|
||||
("minute", -2.472930458741285),
|
||||
("integer (numeric)hour (grain)", -3.7256934272366524),
|
||||
("integer (0..19)week (grain)", -2.472930458741285)],
|
||||
("simple and complex numerals written as one wordminute (grain)",
|
||||
-3.7256934272366524)],
|
||||
n = 30},
|
||||
koData =
|
||||
ClassData{prior = -0.5937747067467416, unseen = -4.584967478670572,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("week", -2.62880082944807),
|
||||
("integer (0..19)year (grain)", -2.9652730660692823),
|
||||
("simple and complex numerals written as one wordhour (grain)",
|
||||
-3.8815637979434374),
|
||||
("integer (numeric)day (grain)", -3.4760986898352733),
|
||||
("integer (0..19)hour (grain)", -3.8815637979434374),
|
||||
("simple and complex numerals written as one wordyear (grain)",
|
||||
-2.9652730660692823),
|
||||
("second", -2.9652730660692823),
|
||||
("integer (numeric)second (grain)", -3.4760986898352733),
|
||||
("integer (numeric)year (grain)", -3.4760986898352733),
|
||||
("day", -2.3774864011671633), ("year", -2.62880082944807),
|
||||
("day", -2.3774864011671633),
|
||||
("simple and complex numerals written as one wordmonth (grain)",
|
||||
-2.9652730660692823),
|
||||
("year", -2.62880082944807),
|
||||
("simple and complex numerals written as one wordsecond (grain)",
|
||||
-3.4760986898352733),
|
||||
("integer (numeric)week (grain)", -3.188416617383492),
|
||||
("integer (0..19)month (grain)", -2.9652730660692823),
|
||||
("integer (0..19)second (grain)", -3.4760986898352733),
|
||||
("hour", -3.188416617383492), ("month", -2.62880082944807),
|
||||
("integer (numeric)minute (grain)", -3.4760986898352733),
|
||||
("integer (0..19)minute (grain)", -3.4760986898352733),
|
||||
("simple and complex numerals written as one wordday (grain)",
|
||||
-2.9652730660692823),
|
||||
("simple and complex numerals written as one wordweek (grain)",
|
||||
-3.188416617383492),
|
||||
("integer (numeric)month (grain)", -3.4760986898352733),
|
||||
("minute", -2.9652730660692823),
|
||||
("coupleday (grain)", -3.4760986898352733),
|
||||
("integer (numeric)hour (grain)", -3.4760986898352733),
|
||||
("integer (0..19)day (grain)", -2.9652730660692823),
|
||||
("integer (0..19)week (grain)", -3.188416617383492)],
|
||||
("simple and complex numerals written as one wordminute (grain)",
|
||||
-3.4760986898352733)],
|
||||
n = 37}}),
|
||||
("Dienstag",
|
||||
Classifier{okData =
|
||||
@ -1754,9 +1766,10 @@ classifiers
|
||||
ClassData{prior = 0.0, unseen = -2.0794415416798357,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("hour", -0.8472978603872037),
|
||||
("integer (numeric)time-of-day (latent)", -1.252762968495368),
|
||||
("integer (20..90)time-of-day (latent)", -1.252762968495368)],
|
||||
[("simple and complex numerals written as one wordtime-of-day (latent)",
|
||||
-1.252762968495368),
|
||||
("hour", -0.8472978603872037),
|
||||
("integer (numeric)time-of-day (latent)", -1.252762968495368)],
|
||||
n = 2},
|
||||
koData =
|
||||
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
||||
@ -2242,25 +2255,33 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("week", -2.740840023925201),
|
||||
("integer (0..19)year (grain)", -3.028522096376982),
|
||||
("simple and complex numerals written as one wordhour (grain)",
|
||||
-3.4339872044851463),
|
||||
("integer (numeric)day (grain)", -3.4339872044851463),
|
||||
("integer (0..19)hour (grain)", -3.4339872044851463),
|
||||
("simple and complex numerals written as one wordyear (grain)",
|
||||
-3.028522096376982),
|
||||
("second", -3.028522096376982),
|
||||
("integer (numeric)second (grain)", -3.4339872044851463),
|
||||
("integer (numeric)year (grain)", -3.4339872044851463),
|
||||
("day", -2.3353749158170367), ("year", -2.740840023925201),
|
||||
("day", -2.3353749158170367),
|
||||
("simple and complex numerals written as one wordmonth (grain)",
|
||||
-3.028522096376982),
|
||||
("year", -2.740840023925201),
|
||||
("simple and complex numerals written as one wordsecond (grain)",
|
||||
-3.4339872044851463),
|
||||
("integer (numeric)week (grain)", -3.4339872044851463),
|
||||
("integer (0..19)month (grain)", -3.028522096376982),
|
||||
("integer (0..19)second (grain)", -3.4339872044851463),
|
||||
("hour", -3.028522096376982), ("month", -2.740840023925201),
|
||||
("integer (numeric)minute (grain)", -3.4339872044851463),
|
||||
("integer (0..19)minute (grain)", -3.4339872044851463),
|
||||
("simple and complex numerals written as one wordday (grain)",
|
||||
-3.028522096376982),
|
||||
("simple and complex numerals written as one wordweek (grain)",
|
||||
-3.028522096376982),
|
||||
("integer (numeric)month (grain)", -3.4339872044851463),
|
||||
("minute", -3.028522096376982),
|
||||
("coupleday (grain)", -3.028522096376982),
|
||||
("integer (numeric)hour (grain)", -3.4339872044851463),
|
||||
("integer (0..19)day (grain)", -3.028522096376982),
|
||||
("integer (0..19)week (grain)", -3.028522096376982)],
|
||||
("simple and complex numerals written as one wordminute (grain)",
|
||||
-3.4339872044851463)],
|
||||
n = 20},
|
||||
koData =
|
||||
ClassData{prior = -infinity, unseen = -3.1354942159291497,
|
||||
@ -2437,22 +2458,29 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("week", -2.5257286443082556),
|
||||
("integer (0..19)year (grain)", -2.8134107167600364),
|
||||
("integer (numeric)day (grain)", -3.2188758248682006),
|
||||
("simple and complex numerals written as one wordyear (grain)",
|
||||
-2.8134107167600364),
|
||||
("second", -2.8134107167600364),
|
||||
("integer (numeric)second (grain)", -3.2188758248682006),
|
||||
("integer (numeric)year (grain)", -3.2188758248682006),
|
||||
("day", -2.5257286443082556), ("year", -2.5257286443082556),
|
||||
("day", -2.5257286443082556),
|
||||
("simple and complex numerals written as one wordmonth (grain)",
|
||||
-2.8134107167600364),
|
||||
("year", -2.5257286443082556),
|
||||
("simple and complex numerals written as one wordsecond (grain)",
|
||||
-3.2188758248682006),
|
||||
("integer (numeric)week (grain)", -2.8134107167600364),
|
||||
("integer (0..19)month (grain)", -2.8134107167600364),
|
||||
("integer (0..19)second (grain)", -3.2188758248682006),
|
||||
("month", -2.5257286443082556),
|
||||
("integer (numeric)minute (grain)", -3.2188758248682006),
|
||||
("integer (0..19)minute (grain)", -3.2188758248682006),
|
||||
("simple and complex numerals written as one wordday (grain)",
|
||||
-2.8134107167600364),
|
||||
("simple and complex numerals written as one wordweek (grain)",
|
||||
-3.2188758248682006),
|
||||
("integer (numeric)month (grain)", -3.2188758248682006),
|
||||
("minute", -2.8134107167600364),
|
||||
("integer (0..19)day (grain)", -2.8134107167600364),
|
||||
("integer (0..19)week (grain)", -3.2188758248682006)],
|
||||
("simple and complex numerals written as one wordminute (grain)",
|
||||
-3.2188758248682006)],
|
||||
n = 16},
|
||||
koData =
|
||||
ClassData{prior = -infinity, unseen = -2.9444389791664407,
|
||||
|
@ -327,10 +327,10 @@ classifiers
|
||||
unseen = -4.31748811353631,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("<integer> (latent time-of-day)", -0.9718605830289658),
|
||||
[("<integer> (latent time-of-day)", -0.9718605830289657),
|
||||
("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
|
||||
("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
|
||||
("hour", -0.9718605830289658),
|
||||
("hour", -0.9718605830289657),
|
||||
("two time tokens separated by `di`", -3.20545280453606),
|
||||
("Domenica", -3.6109179126442243)],
|
||||
n = 33}}),
|
||||
|
@ -434,6 +434,7 @@ library
|
||||
, Duckling.Numeral.DA.Rules
|
||||
, Duckling.Numeral.DE.Corpus
|
||||
, Duckling.Numeral.DE.Rules
|
||||
, Duckling.Numeral.DE.NumParser
|
||||
, Duckling.Numeral.EL.Corpus
|
||||
, Duckling.Numeral.EL.Rules
|
||||
, Duckling.Numeral.EN.Corpus
|
||||
|
Loading…
Reference in New Issue
Block a user