mirror of
https://github.com/facebook/duckling.git
synced 2024-11-24 07:23:03 +03:00
DE-Numeral-complex-German-numerals (#699)
Summary: The commit adds a rule and an underlying parser for German numeral expressions representing (integer) numbers smaller than 1 million. Other than in English, those numbers are represented by single words, e.g. "neunhundertsiebenundachtzigtausendsechshundertvierundfünfzig" (987654). Other rukes are simplified or removed to eliminate redundancies. Pull Request resolved: https://github.com/facebook/duckling/pull/699 Reviewed By: patapizza Differential Revision: D37716120 Pulled By: stroxler fbshipit-source-id: 90b26e253259c5bc1aaa76f3972537c2361f6bb3
This commit is contained in:
parent
1faab00741
commit
9509e042dc
@ -33,6 +33,8 @@ allExamples = concat
|
|||||||
, examples (NumeralValue 1)
|
, examples (NumeralValue 1)
|
||||||
[ "1"
|
[ "1"
|
||||||
, "eins"
|
, "eins"
|
||||||
|
, "Eine"
|
||||||
|
, "einen"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 3)
|
, examples (NumeralValue 3)
|
||||||
[ "3"
|
[ "3"
|
||||||
@ -46,11 +48,12 @@ allExamples = concat
|
|||||||
]
|
]
|
||||||
, examples (NumeralValue 30)
|
, examples (NumeralValue 30)
|
||||||
[ "30"
|
[ "30"
|
||||||
|
, "dreißig"
|
||||||
, "dreissig"
|
, "dreissig"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 33)
|
, examples (NumeralValue 33)
|
||||||
[ "33"
|
[ "33"
|
||||||
, "drei Und dreissig"
|
, "dreiunddreißig"
|
||||||
, "dreiunddreissig"
|
, "dreiunddreissig"
|
||||||
, "0033"
|
, "0033"
|
||||||
]
|
]
|
||||||
@ -76,11 +79,11 @@ allExamples = concat
|
|||||||
]
|
]
|
||||||
, examples (NumeralValue 200)
|
, examples (NumeralValue 200)
|
||||||
[ "200"
|
[ "200"
|
||||||
, "zwei hundert"
|
, "zweihundert"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 102)
|
, examples (NumeralValue 102)
|
||||||
[ "102"
|
[ "102"
|
||||||
, "Hundert zwei"
|
, "Hundertzwei"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 1.1)
|
, examples (NumeralValue 1.1)
|
||||||
[ "1,1"
|
[ "1,1"
|
||||||
@ -97,12 +100,15 @@ allExamples = concat
|
|||||||
, "100000"
|
, "100000"
|
||||||
, "100K"
|
, "100K"
|
||||||
, "100k"
|
, "100k"
|
||||||
|
, "einhunderttausend"
|
||||||
|
, "hunderttausend"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 3000000)
|
, examples (NumeralValue 3000000)
|
||||||
[ "3M"
|
[ "3M"
|
||||||
, "3000K"
|
, "3000K"
|
||||||
, "3000000"
|
, "3000000"
|
||||||
, "3.000.000"
|
, "3.000.000"
|
||||||
|
, "drei Millionen"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 1200000)
|
, examples (NumeralValue 1200000)
|
||||||
[ "1.200.000"
|
[ "1.200.000"
|
||||||
@ -120,30 +126,34 @@ allExamples = concat
|
|||||||
, "-1200K"
|
, "-1200K"
|
||||||
, "-,0012G"
|
, "-,0012G"
|
||||||
]
|
]
|
||||||
|
, examples (NumeralValue 1852)
|
||||||
|
[ "eintausendachthundertzweiundfünfzig"
|
||||||
|
, "tausendachthundertzweiundfünfzig"
|
||||||
|
, "achtzehnhundertzweiundfünfzig"]
|
||||||
, examples (NumeralValue 5000)
|
, examples (NumeralValue 5000)
|
||||||
[ "5 tausend"
|
[ "5 tausend"
|
||||||
, "fünf tausend"
|
, "fünftausend"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 200000)
|
, examples (NumeralValue 200000)
|
||||||
[ "zwei hundert tausend"
|
[ "zweihunderttausend"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 721012)
|
, examples (NumeralValue 721012)
|
||||||
[ "sieben hundert einundzwanzig tausend zwölf"
|
[ "siebenhunderteinundzwanzigtausendzwölf"
|
||||||
|
, "siebenhunderteinundzwanzigtausendundzwölf"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 31256721)
|
, examples (NumeralValue 31256721)
|
||||||
[ "ein und dreissig millionen zwei hundert sechs und fünfzig tausend sieben hundert ein und zwanzig"
|
[ "einunddreissig millionen zweihundertsechsundfünfzigtausendsiebenhunderteinundzwanzig"
|
||||||
|
, "einunddreißig Millionen zweihundertsechsundfünfzigtausendundsiebenhunderteinundzwanzig"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 1416.15)
|
, examples (NumeralValue 1416.15)
|
||||||
[ "1416,15"
|
[ "1416,15"
|
||||||
]
|
, "tausendvierhundertsechzehn Komma fünfzehn"
|
||||||
, examples (NumeralValue 1416.15)
|
|
||||||
[ "1.416,15"
|
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 1000000.0)
|
, examples (NumeralValue 1000000.0)
|
||||||
[ "1.000.000,00",
|
[ "1.000.000,00",
|
||||||
"eine million"
|
"eine million"
|
||||||
]
|
]
|
||||||
, examples (NumeralValue 2771090092000000.0)
|
, examples (NumeralValue 2771090092000000.0)
|
||||||
[ "zwei billiarden sieben hundert ein und siebzig billionen neunzig milliarden zwei und neunzig millionen"
|
[ "zwei billiarden siebenhunderteinundsiebzig billionen neunzig milliarden zweiundneunzig millionen"
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
|
160
Duckling/Numeral/DE/NumParser.hs
Normal file
160
Duckling/Numeral/DE/NumParser.hs
Normal file
@ -0,0 +1,160 @@
|
|||||||
|
{-# LANGUAGE DeriveFunctor #-}
|
||||||
|
|
||||||
|
module Duckling.Numeral.DE.NumParser (parseNumeral) where
|
||||||
|
|
||||||
|
import Prelude
|
||||||
|
import Control.Applicative
|
||||||
|
import Data.Char
|
||||||
|
import Data.List
|
||||||
|
import Data.Foldable
|
||||||
|
import Data.String
|
||||||
|
|
||||||
|
newtype Parser a
|
||||||
|
= Parser { runParser :: String -> Maybe (a, String) }
|
||||||
|
deriving Functor
|
||||||
|
|
||||||
|
char :: Char -> Parser Char
|
||||||
|
char c = Parser p
|
||||||
|
where
|
||||||
|
p [] = Nothing
|
||||||
|
p (x:xs)
|
||||||
|
| x == c = Just (x, xs)
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
instance Applicative Parser where
|
||||||
|
pure a = Parser (\s -> Just (a, s))
|
||||||
|
(Parser fp) <*> xp = Parser $ \s ->
|
||||||
|
case fp s of
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just (f,s') -> runParser (f <$> xp) s'
|
||||||
|
|
||||||
|
instance Alternative Parser where
|
||||||
|
empty = Parser (const Nothing)
|
||||||
|
Parser p1 <|> Parser p2 = Parser $ liftA2 (<|>) p1 p2
|
||||||
|
|
||||||
|
type NumParser = Parser Integer
|
||||||
|
|
||||||
|
(.+.) :: NumParser -> NumParser -> NumParser
|
||||||
|
p .+. p' = (+) <$> p <*> p'
|
||||||
|
|
||||||
|
(.*.) :: NumParser -> NumParser -> NumParser
|
||||||
|
p .*. p' = (*) <$> p <*> p'
|
||||||
|
|
||||||
|
infixl 6 .+.
|
||||||
|
infixl 7 .*.
|
||||||
|
|
||||||
|
opt :: NumParser -> NumParser
|
||||||
|
opt p = p <|> Parser p'
|
||||||
|
where
|
||||||
|
p' s = Just (0, s)
|
||||||
|
|
||||||
|
data NumItem = NumItem { base :: NumParser
|
||||||
|
, plus10 :: NumParser
|
||||||
|
, times10 :: [NumParser]
|
||||||
|
}
|
||||||
|
|
||||||
|
defaultNumItem :: Integer -> String -> NumItem
|
||||||
|
defaultNumItem value form = NumItem { base = p
|
||||||
|
, plus10 = p .+. ten
|
||||||
|
, times10 = [p .*. ty]
|
||||||
|
} where p = assign value form
|
||||||
|
|
||||||
|
type Assignment = Integer -> String -> NumParser
|
||||||
|
|
||||||
|
assign :: Assignment
|
||||||
|
assign value = foldr (\c p -> (1 <$ char c) .*. p) (pure value)
|
||||||
|
|
||||||
|
ten :: NumParser
|
||||||
|
ten = assign 10 "zehn"
|
||||||
|
|
||||||
|
ty :: NumParser
|
||||||
|
ty = assign 10 "zig"
|
||||||
|
|
||||||
|
hundred :: NumParser
|
||||||
|
hundred = assign 100 "hundert"
|
||||||
|
|
||||||
|
thousand :: NumParser
|
||||||
|
thousand = assign 1000 "tausend"
|
||||||
|
|
||||||
|
und :: NumParser
|
||||||
|
und = assign 0 "und"
|
||||||
|
|
||||||
|
one :: NumItem
|
||||||
|
one = (defaultNumItem 1 "ein") { plus10 = assign 11 "elf"
|
||||||
|
, times10 = [ ten ] }
|
||||||
|
|
||||||
|
two :: NumItem
|
||||||
|
two = (defaultNumItem 2 "zwei") { plus10 = assign 12 "zwölf"
|
||||||
|
, times10 = [ assign 20 "zwanzig" ] }
|
||||||
|
three :: NumItem
|
||||||
|
three = (defaultNumItem 3 "drei") { times10 = [ assign 30 "dreißig"
|
||||||
|
, assign 30 "dreissig" ] }
|
||||||
|
|
||||||
|
four :: NumItem
|
||||||
|
four = defaultNumItem 4 "vier"
|
||||||
|
|
||||||
|
five :: NumItem
|
||||||
|
five = defaultNumItem 5 "fünf"
|
||||||
|
|
||||||
|
six :: NumItem
|
||||||
|
six = (defaultNumItem 6 "sechs") { plus10 = assign 16 "sechzehn"
|
||||||
|
, times10 = [ assign 60 "sechzig" ] }
|
||||||
|
|
||||||
|
seven :: NumItem
|
||||||
|
seven = (defaultNumItem 7 "sieben") { plus10 = assign 17 "siebzehn"
|
||||||
|
, times10 = [ assign 70 "siebzig" ] }
|
||||||
|
|
||||||
|
eight :: NumItem
|
||||||
|
eight = defaultNumItem 8 "acht"
|
||||||
|
|
||||||
|
nine :: NumItem
|
||||||
|
nine = defaultNumItem 9 "neun"
|
||||||
|
|
||||||
|
digitLexicon :: [NumItem]
|
||||||
|
digitLexicon = [one, two, three, four, five, six, seven, eight, nine]
|
||||||
|
|
||||||
|
from1to9 :: NumParser
|
||||||
|
from1to9 = foldr ((<|>) . base) empty digitLexicon
|
||||||
|
|
||||||
|
tensFrom20 :: NumParser
|
||||||
|
tensFrom20 = asum (concatMap times10 (tail digitLexicon))
|
||||||
|
|
||||||
|
from1to99 :: NumParser
|
||||||
|
from1to99 = opt (from1to9 .+. und) .+. tensFrom20
|
||||||
|
<|> foldr ((<|>) . plus10) empty digitLexicon
|
||||||
|
<|> ten
|
||||||
|
<|> from1to9
|
||||||
|
|
||||||
|
from1to999 :: NumParser
|
||||||
|
from1to999 = opt (from1to9 .*. hundred .+. opt und) .+. opt from1to99
|
||||||
|
|
||||||
|
from1to999999 :: NumParser
|
||||||
|
from1to999999 = opt (from1to999 .*. thousand .+. opt und) .+. opt from1to999
|
||||||
|
|
||||||
|
from1to999999' :: NumParser
|
||||||
|
from1to999999' = Parser p
|
||||||
|
where
|
||||||
|
p s
|
||||||
|
| isPrefixOf "hundert" s || isPrefixOf "tausend" s
|
||||||
|
= runParser from1to999999 ("ein" ++ s)
|
||||||
|
| otherwise
|
||||||
|
= runParser from1to999999 s
|
||||||
|
|
||||||
|
fromYear1100to1999 :: NumParser
|
||||||
|
fromYear1100to1999 = asum ((\n -> plus10 n .*. hundred) <$> digitLexicon)
|
||||||
|
.+. opt (opt und .+. from1to99)
|
||||||
|
|
||||||
|
allNumerals :: NumParser
|
||||||
|
allNumerals = fromYear1100to1999
|
||||||
|
<|> from1to999999'
|
||||||
|
|
||||||
|
removeInflection :: (Integer, String) -> Maybe Integer
|
||||||
|
removeInflection (n, suffix)
|
||||||
|
| n `mod` 10 == 1 && suffix `elem` inflection = Just n
|
||||||
|
where
|
||||||
|
inflection = ["s", "e", "em", "en", "er", "es"]
|
||||||
|
removeInflection (n, "") = Just n
|
||||||
|
removeInflection _ = Nothing
|
||||||
|
|
||||||
|
parseNumeral :: String -> Maybe Integer
|
||||||
|
parseNumeral s = removeInflection =<< runParser allNumerals s
|
@ -27,6 +27,7 @@ import Duckling.Numeral.Types (NumeralData (..))
|
|||||||
import Duckling.Regex.Types
|
import Duckling.Regex.Types
|
||||||
import Duckling.Types
|
import Duckling.Types
|
||||||
import qualified Duckling.Numeral.Types as TNumeral
|
import qualified Duckling.Numeral.Types as TNumeral
|
||||||
|
import Duckling.Numeral.DE.NumParser
|
||||||
|
|
||||||
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
|
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
|
||||||
ruleNumeralsPrefixWithNegativeOrMinus = Rule
|
ruleNumeralsPrefixWithNegativeOrMinus = Rule
|
||||||
@ -50,15 +51,6 @@ ruleFew = Rule
|
|||||||
, prod = \_ -> integer 3
|
, prod = \_ -> integer 3
|
||||||
}
|
}
|
||||||
|
|
||||||
ruleTen :: Rule
|
|
||||||
ruleTen = Rule
|
|
||||||
{ name = "ten"
|
|
||||||
, pattern =
|
|
||||||
[ regex "zehn"
|
|
||||||
]
|
|
||||||
, prod = \_ -> integer 10 >>= withGrain 1
|
|
||||||
}
|
|
||||||
|
|
||||||
ruleDecimalWithThousandsSeparator :: Rule
|
ruleDecimalWithThousandsSeparator :: Rule
|
||||||
ruleDecimalWithThousandsSeparator = Rule
|
ruleDecimalWithThousandsSeparator = Rule
|
||||||
{ name = "decimal with thousands separator"
|
{ name = "decimal with thousands separator"
|
||||||
@ -84,41 +76,6 @@ ruleDecimalNumeral = Rule
|
|||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
-- TODO: Single-word composition (#110)
|
|
||||||
ruleInteger3 :: Rule
|
|
||||||
ruleInteger3 = Rule
|
|
||||||
{ name = "integer ([2-9][1-9])"
|
|
||||||
, pattern =
|
|
||||||
[ regex "(ein|zwei|drei|vier|fünf|sechs|sieben|acht|neun)und(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)"
|
|
||||||
]
|
|
||||||
, prod = \tokens -> case tokens of
|
|
||||||
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
|
|
||||||
v1 <- case Text.toLower m1 of
|
|
||||||
"ein" -> Just 1
|
|
||||||
"zwei" -> Just 2
|
|
||||||
"drei" -> Just 3
|
|
||||||
"vier" -> Just 4
|
|
||||||
"fünf" -> Just 5
|
|
||||||
"sechs" -> Just 6
|
|
||||||
"sieben" -> Just 7
|
|
||||||
"acht" -> Just 8
|
|
||||||
"neun" -> Just 9
|
|
||||||
_ -> Nothing
|
|
||||||
v2 <- case Text.toLower m2 of
|
|
||||||
"zwanzig" -> Just 20
|
|
||||||
"dreissig" -> Just 30
|
|
||||||
"dreißig" -> Just 30
|
|
||||||
"vierzig" -> Just 40
|
|
||||||
"fünfzig" -> Just 50
|
|
||||||
"sechzig" -> Just 60
|
|
||||||
"siebzig" -> Just 70
|
|
||||||
"achtzig" -> Just 80
|
|
||||||
"neunzig" -> Just 90
|
|
||||||
_ -> Nothing
|
|
||||||
integer $ v1 + v2
|
|
||||||
_ -> Nothing
|
|
||||||
}
|
|
||||||
|
|
||||||
ruleNumeralsUnd :: Rule
|
ruleNumeralsUnd :: Rule
|
||||||
ruleNumeralsUnd = Rule
|
ruleNumeralsUnd = Rule
|
||||||
{ name = "numbers und"
|
{ name = "numbers und"
|
||||||
@ -221,78 +178,13 @@ rulePowersOfTen = Rule
|
|||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
zeroNineteenMap :: HashMap Text Integer
|
ruleZero :: Rule
|
||||||
zeroNineteenMap = HashMap.fromList
|
ruleZero = Rule
|
||||||
[ ("keine", 0)
|
{ name = "integer 0"
|
||||||
, ("null", 0)
|
|
||||||
, ("nichts", 0)
|
|
||||||
, ("keiner", 0)
|
|
||||||
, ("kein", 0)
|
|
||||||
, ("keins", 0)
|
|
||||||
, ("keinen", 0)
|
|
||||||
, ("keines", 0)
|
|
||||||
, ("einer", 1)
|
|
||||||
, ("eins", 1)
|
|
||||||
, ("ein", 1)
|
|
||||||
, ("eine", 1)
|
|
||||||
, ("einser", 1)
|
|
||||||
, ("zwei", 2)
|
|
||||||
, ("drei", 3)
|
|
||||||
, ("vier", 4)
|
|
||||||
, ("fünf", 5)
|
|
||||||
, ("sechs", 6)
|
|
||||||
, ("sieben", 7)
|
|
||||||
, ("acht", 8)
|
|
||||||
, ("neun", 9)
|
|
||||||
, ("zehn", 10)
|
|
||||||
, ("elf", 11)
|
|
||||||
, ("zwölf", 12)
|
|
||||||
, ("dreizehn", 13)
|
|
||||||
, ("vierzehn", 14)
|
|
||||||
, ("fünfzehn", 15)
|
|
||||||
, ("sechzehn", 16)
|
|
||||||
, ("siebzehn", 17)
|
|
||||||
, ("achtzehn", 18)
|
|
||||||
, ("neunzehn", 19)
|
|
||||||
]
|
|
||||||
|
|
||||||
-- TODO: Single-word composition (#110)
|
|
||||||
ruleZeroToNineteen :: Rule
|
|
||||||
ruleZeroToNineteen = Rule
|
|
||||||
{ name = "integer (0..19)"
|
|
||||||
, pattern =
|
, pattern =
|
||||||
[ regex "(keine[rn]|keine?s?|null|nichts|eins?(er?)?|zwei|dreizehn|drei|vierzehn|vier|fünfzehn|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf)"
|
[ regex "(keine(m|n|r|s)?|keins?|null|nichts)"
|
||||||
]
|
]
|
||||||
, prod = \tokens -> case tokens of
|
, prod = \_ -> integer 0
|
||||||
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
||||||
HashMap.lookup (Text.toLower match) zeroNineteenMap >>= integer
|
|
||||||
_ -> Nothing
|
|
||||||
}
|
|
||||||
|
|
||||||
tensMap :: HashMap Text Integer
|
|
||||||
tensMap = HashMap.fromList
|
|
||||||
[ ( "zwanzig" , 20 )
|
|
||||||
, ( "dreissig", 30 )
|
|
||||||
, ( "dreißig" , 30 )
|
|
||||||
, ( "vierzig" , 40 )
|
|
||||||
, ( "fünfzig" , 50 )
|
|
||||||
, ( "sechzig" , 60 )
|
|
||||||
, ( "siebzig" , 70 )
|
|
||||||
, ( "achtzig" , 80 )
|
|
||||||
, ( "neunzig" , 90 )
|
|
||||||
]
|
|
||||||
|
|
||||||
-- TODO: Single-word composition (#110)
|
|
||||||
ruleInteger2 :: Rule
|
|
||||||
ruleInteger2 = Rule
|
|
||||||
{ name = "integer (20..90)"
|
|
||||||
, pattern =
|
|
||||||
[ regex "(zwanzig|dreissig|dreißig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)"
|
|
||||||
]
|
|
||||||
, prod = \tokens -> case tokens of
|
|
||||||
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
||||||
HashMap.lookup (Text.toLower match) tensMap >>= integer
|
|
||||||
_ -> Nothing
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ruleNumeralDotNumeral :: Rule
|
ruleNumeralDotNumeral :: Rule
|
||||||
@ -323,6 +215,17 @@ ruleIntegerWithThousandsSeparator = Rule
|
|||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ruleAllNumeralWords :: Rule
|
||||||
|
ruleAllNumeralWords = Rule
|
||||||
|
{ name = "simple and complex numerals written as one word"
|
||||||
|
, pattern = [regex "(ein|zwei|drei|vier|fünf|sech|sieb|acht|neun|zehn|elf|zwölf|hundert|tausend)?([^\\s]+)?(eine[m|n|r|s]?|eins?|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|[s|ß|z]ig|hundert|tausend)"]
|
||||||
|
, prod = \tokens -> case tokens of
|
||||||
|
(Token RegexMatch (GroupMatch matches) : _) ->
|
||||||
|
(parseNumeral $ concat $ Text.unpack . Text.toLower <$> matches)
|
||||||
|
>>= integer
|
||||||
|
_ -> Nothing
|
||||||
|
}
|
||||||
|
|
||||||
rules :: [Rule]
|
rules :: [Rule]
|
||||||
rules =
|
rules =
|
||||||
[ ruleCouple
|
[ ruleCouple
|
||||||
@ -330,8 +233,6 @@ rules =
|
|||||||
, ruleDecimalWithThousandsSeparator
|
, ruleDecimalWithThousandsSeparator
|
||||||
, ruleDozen
|
, ruleDozen
|
||||||
, ruleFew
|
, ruleFew
|
||||||
, ruleInteger2
|
|
||||||
, ruleInteger3
|
|
||||||
, ruleIntegerWithThousandsSeparator
|
, ruleIntegerWithThousandsSeparator
|
||||||
, ruleIntersect
|
, ruleIntersect
|
||||||
, ruleMultiply
|
, ruleMultiply
|
||||||
@ -340,6 +241,6 @@ rules =
|
|||||||
, ruleNumeralsSuffixesKMG
|
, ruleNumeralsSuffixesKMG
|
||||||
, ruleNumeralsUnd
|
, ruleNumeralsUnd
|
||||||
, rulePowersOfTen
|
, rulePowersOfTen
|
||||||
, ruleTen
|
, ruleZero
|
||||||
, ruleZeroToNineteen
|
, ruleAllNumeralWords
|
||||||
]
|
]
|
||||||
|
@ -99,13 +99,6 @@ classifiers
|
|||||||
koData =
|
koData =
|
||||||
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
||||||
likelihoods = HashMap.fromList [], n = 0}}),
|
likelihoods = HashMap.fromList [], n = 0}}),
|
||||||
("integer (20..90)",
|
|
||||||
Classifier{okData =
|
|
||||||
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
||||||
koData =
|
|
||||||
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
||||||
likelihoods = HashMap.fromList [], n = 0}}),
|
|
||||||
("Maha Shivaratri",
|
("Maha Shivaratri",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
||||||
@ -346,14 +339,6 @@ classifiers
|
|||||||
("<day-of-month> (ordinal)", -2.8622008809294686),
|
("<day-of-month> (ordinal)", -2.8622008809294686),
|
||||||
("hour", -2.8622008809294686), ("minute", -2.169053700369523)],
|
("hour", -2.8622008809294686), ("minute", -2.169053700369523)],
|
||||||
n = 8}}),
|
n = 8}}),
|
||||||
("integer (0..19)",
|
|
||||||
Classifier{okData =
|
|
||||||
ClassData{prior = -0.10008345855698253,
|
|
||||||
unseen = -3.6888794541139363,
|
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 38},
|
|
||||||
koData =
|
|
||||||
ClassData{prior = -2.3513752571634776, unseen = -1.791759469228055,
|
|
||||||
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
|
|
||||||
("between <time-of-day> and <time-of-day> (interval)",
|
("between <time-of-day> and <time-of-day> (interval)",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = 0.0, unseen = -2.1972245773362196,
|
ClassData{prior = 0.0, unseen = -2.1972245773362196,
|
||||||
@ -566,7 +551,8 @@ classifiers
|
|||||||
unseen = -1.6094379124341003,
|
unseen = -1.6094379124341003,
|
||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("integer (0..19)hour (grain)", -0.6931471805599453),
|
[("simple and complex numerals written as one wordhour (grain)",
|
||||||
|
-0.6931471805599453),
|
||||||
("hour", -0.6931471805599453)],
|
("hour", -0.6931471805599453)],
|
||||||
n = 1},
|
n = 1},
|
||||||
koData =
|
koData =
|
||||||
@ -574,9 +560,18 @@ classifiers
|
|||||||
unseen = -2.1972245773362196,
|
unseen = -2.1972245773362196,
|
||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("integer (0..19)hour (grain)", -0.6931471805599453),
|
[("simple and complex numerals written as one wordhour (grain)",
|
||||||
|
-0.6931471805599453),
|
||||||
("hour", -0.6931471805599453)],
|
("hour", -0.6931471805599453)],
|
||||||
n = 3}}),
|
n = 3}}),
|
||||||
|
("simple and complex numerals written as one word",
|
||||||
|
Classifier{okData =
|
||||||
|
ClassData{prior = -9.763846956391606e-2,
|
||||||
|
unseen = -3.713572066704308,
|
||||||
|
likelihoods = HashMap.fromList [("", 0.0)], n = 39},
|
||||||
|
koData =
|
||||||
|
ClassData{prior = -2.374905754573672, unseen = -1.791759469228055,
|
||||||
|
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
|
||||||
("intersect",
|
("intersect",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
ClassData{prior = -0.12020308869575518,
|
ClassData{prior = -0.12020308869575518,
|
||||||
@ -1617,7 +1612,8 @@ classifiers
|
|||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("integer (numeric)", -6.995858860691034e-2),
|
[("integer (numeric)", -6.995858860691034e-2),
|
||||||
("integer (0..19)", -2.6946271807700692)],
|
("simple and complex numerals written as one word",
|
||||||
|
-2.6946271807700692)],
|
||||||
n = 72},
|
n = 72},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -1.5664205273504095,
|
ClassData{prior = -1.5664205273504095,
|
||||||
@ -1625,7 +1621,8 @@ classifiers
|
|||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("integer (numeric)", -0.2113090936672069),
|
[("integer (numeric)", -0.2113090936672069),
|
||||||
("integer (0..19)", -1.6582280766035324)],
|
("simple and complex numerals written as one word",
|
||||||
|
-1.6582280766035324)],
|
||||||
n = 19}}),
|
n = 19}}),
|
||||||
("year",
|
("year",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
@ -1656,47 +1653,62 @@ classifiers
|
|||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("week", -1.9339339580085977),
|
[("week", -1.9339339580085977),
|
||||||
("integer (0..19)year (grain)", -3.3202283191284883),
|
("simple and complex numerals written as one wordhour (grain)",
|
||||||
|
-3.3202283191284883),
|
||||||
("integer (numeric)day (grain)", -2.8094026953624978),
|
("integer (numeric)day (grain)", -2.8094026953624978),
|
||||||
("couplehour (grain)", -3.7256934272366524),
|
("couplehour (grain)", -3.7256934272366524),
|
||||||
("integer (0..19)hour (grain)", -3.3202283191284883),
|
("simple and complex numerals written as one wordyear (grain)",
|
||||||
|
-3.3202283191284883),
|
||||||
("second", -3.7256934272366524),
|
("second", -3.7256934272366524),
|
||||||
("integer (numeric)year (grain)", -3.7256934272366524),
|
("integer (numeric)year (grain)", -3.7256934272366524),
|
||||||
("day", -2.8094026953624978), ("year", -3.0325462466767075),
|
("day", -2.8094026953624978),
|
||||||
|
("simple and complex numerals written as one wordmonth (grain)",
|
||||||
|
-3.7256934272366524),
|
||||||
|
("year", -3.0325462466767075),
|
||||||
|
("simple and complex numerals written as one wordsecond (grain)",
|
||||||
|
-3.7256934272366524),
|
||||||
("integer (numeric)week (grain)", -2.627081138568543),
|
("integer (numeric)week (grain)", -2.627081138568543),
|
||||||
("integer (0..19)month (grain)", -3.7256934272366524),
|
|
||||||
("integer (0..19)second (grain)", -3.7256934272366524),
|
|
||||||
("hour", -2.8094026953624978), ("month", -3.7256934272366524),
|
("hour", -2.8094026953624978), ("month", -3.7256934272366524),
|
||||||
("integer (numeric)minute (grain)", -2.627081138568543),
|
("integer (numeric)minute (grain)", -2.627081138568543),
|
||||||
("integer (0..19)minute (grain)", -3.7256934272366524),
|
("simple and complex numerals written as one wordweek (grain)",
|
||||||
|
-2.472930458741285),
|
||||||
("minute", -2.472930458741285),
|
("minute", -2.472930458741285),
|
||||||
("integer (numeric)hour (grain)", -3.7256934272366524),
|
("integer (numeric)hour (grain)", -3.7256934272366524),
|
||||||
("integer (0..19)week (grain)", -2.472930458741285)],
|
("simple and complex numerals written as one wordminute (grain)",
|
||||||
|
-3.7256934272366524)],
|
||||||
n = 30},
|
n = 30},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -0.5937747067467416, unseen = -4.584967478670572,
|
ClassData{prior = -0.5937747067467416, unseen = -4.584967478670572,
|
||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("week", -2.62880082944807),
|
[("week", -2.62880082944807),
|
||||||
("integer (0..19)year (grain)", -2.9652730660692823),
|
("simple and complex numerals written as one wordhour (grain)",
|
||||||
|
-3.8815637979434374),
|
||||||
("integer (numeric)day (grain)", -3.4760986898352733),
|
("integer (numeric)day (grain)", -3.4760986898352733),
|
||||||
("integer (0..19)hour (grain)", -3.8815637979434374),
|
("simple and complex numerals written as one wordyear (grain)",
|
||||||
|
-2.9652730660692823),
|
||||||
("second", -2.9652730660692823),
|
("second", -2.9652730660692823),
|
||||||
("integer (numeric)second (grain)", -3.4760986898352733),
|
("integer (numeric)second (grain)", -3.4760986898352733),
|
||||||
("integer (numeric)year (grain)", -3.4760986898352733),
|
("integer (numeric)year (grain)", -3.4760986898352733),
|
||||||
("day", -2.3774864011671633), ("year", -2.62880082944807),
|
("day", -2.3774864011671633),
|
||||||
|
("simple and complex numerals written as one wordmonth (grain)",
|
||||||
|
-2.9652730660692823),
|
||||||
|
("year", -2.62880082944807),
|
||||||
|
("simple and complex numerals written as one wordsecond (grain)",
|
||||||
|
-3.4760986898352733),
|
||||||
("integer (numeric)week (grain)", -3.188416617383492),
|
("integer (numeric)week (grain)", -3.188416617383492),
|
||||||
("integer (0..19)month (grain)", -2.9652730660692823),
|
|
||||||
("integer (0..19)second (grain)", -3.4760986898352733),
|
|
||||||
("hour", -3.188416617383492), ("month", -2.62880082944807),
|
("hour", -3.188416617383492), ("month", -2.62880082944807),
|
||||||
("integer (numeric)minute (grain)", -3.4760986898352733),
|
("integer (numeric)minute (grain)", -3.4760986898352733),
|
||||||
("integer (0..19)minute (grain)", -3.4760986898352733),
|
("simple and complex numerals written as one wordday (grain)",
|
||||||
|
-2.9652730660692823),
|
||||||
|
("simple and complex numerals written as one wordweek (grain)",
|
||||||
|
-3.188416617383492),
|
||||||
("integer (numeric)month (grain)", -3.4760986898352733),
|
("integer (numeric)month (grain)", -3.4760986898352733),
|
||||||
("minute", -2.9652730660692823),
|
("minute", -2.9652730660692823),
|
||||||
("coupleday (grain)", -3.4760986898352733),
|
("coupleday (grain)", -3.4760986898352733),
|
||||||
("integer (numeric)hour (grain)", -3.4760986898352733),
|
("integer (numeric)hour (grain)", -3.4760986898352733),
|
||||||
("integer (0..19)day (grain)", -2.9652730660692823),
|
("simple and complex numerals written as one wordminute (grain)",
|
||||||
("integer (0..19)week (grain)", -3.188416617383492)],
|
-3.4760986898352733)],
|
||||||
n = 37}}),
|
n = 37}}),
|
||||||
("Dienstag",
|
("Dienstag",
|
||||||
Classifier{okData =
|
Classifier{okData =
|
||||||
@ -1754,9 +1766,10 @@ classifiers
|
|||||||
ClassData{prior = 0.0, unseen = -2.0794415416798357,
|
ClassData{prior = 0.0, unseen = -2.0794415416798357,
|
||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("hour", -0.8472978603872037),
|
[("simple and complex numerals written as one wordtime-of-day (latent)",
|
||||||
("integer (numeric)time-of-day (latent)", -1.252762968495368),
|
-1.252762968495368),
|
||||||
("integer (20..90)time-of-day (latent)", -1.252762968495368)],
|
("hour", -0.8472978603872037),
|
||||||
|
("integer (numeric)time-of-day (latent)", -1.252762968495368)],
|
||||||
n = 2},
|
n = 2},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
||||||
@ -2242,25 +2255,33 @@ classifiers
|
|||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("week", -2.740840023925201),
|
[("week", -2.740840023925201),
|
||||||
("integer (0..19)year (grain)", -3.028522096376982),
|
("simple and complex numerals written as one wordhour (grain)",
|
||||||
|
-3.4339872044851463),
|
||||||
("integer (numeric)day (grain)", -3.4339872044851463),
|
("integer (numeric)day (grain)", -3.4339872044851463),
|
||||||
("integer (0..19)hour (grain)", -3.4339872044851463),
|
("simple and complex numerals written as one wordyear (grain)",
|
||||||
|
-3.028522096376982),
|
||||||
("second", -3.028522096376982),
|
("second", -3.028522096376982),
|
||||||
("integer (numeric)second (grain)", -3.4339872044851463),
|
("integer (numeric)second (grain)", -3.4339872044851463),
|
||||||
("integer (numeric)year (grain)", -3.4339872044851463),
|
("integer (numeric)year (grain)", -3.4339872044851463),
|
||||||
("day", -2.3353749158170367), ("year", -2.740840023925201),
|
("day", -2.3353749158170367),
|
||||||
|
("simple and complex numerals written as one wordmonth (grain)",
|
||||||
|
-3.028522096376982),
|
||||||
|
("year", -2.740840023925201),
|
||||||
|
("simple and complex numerals written as one wordsecond (grain)",
|
||||||
|
-3.4339872044851463),
|
||||||
("integer (numeric)week (grain)", -3.4339872044851463),
|
("integer (numeric)week (grain)", -3.4339872044851463),
|
||||||
("integer (0..19)month (grain)", -3.028522096376982),
|
|
||||||
("integer (0..19)second (grain)", -3.4339872044851463),
|
|
||||||
("hour", -3.028522096376982), ("month", -2.740840023925201),
|
("hour", -3.028522096376982), ("month", -2.740840023925201),
|
||||||
("integer (numeric)minute (grain)", -3.4339872044851463),
|
("integer (numeric)minute (grain)", -3.4339872044851463),
|
||||||
("integer (0..19)minute (grain)", -3.4339872044851463),
|
("simple and complex numerals written as one wordday (grain)",
|
||||||
|
-3.028522096376982),
|
||||||
|
("simple and complex numerals written as one wordweek (grain)",
|
||||||
|
-3.028522096376982),
|
||||||
("integer (numeric)month (grain)", -3.4339872044851463),
|
("integer (numeric)month (grain)", -3.4339872044851463),
|
||||||
("minute", -3.028522096376982),
|
("minute", -3.028522096376982),
|
||||||
("coupleday (grain)", -3.028522096376982),
|
("coupleday (grain)", -3.028522096376982),
|
||||||
("integer (numeric)hour (grain)", -3.4339872044851463),
|
("integer (numeric)hour (grain)", -3.4339872044851463),
|
||||||
("integer (0..19)day (grain)", -3.028522096376982),
|
("simple and complex numerals written as one wordminute (grain)",
|
||||||
("integer (0..19)week (grain)", -3.028522096376982)],
|
-3.4339872044851463)],
|
||||||
n = 20},
|
n = 20},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -infinity, unseen = -3.1354942159291497,
|
ClassData{prior = -infinity, unseen = -3.1354942159291497,
|
||||||
@ -2437,22 +2458,29 @@ classifiers
|
|||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("week", -2.5257286443082556),
|
[("week", -2.5257286443082556),
|
||||||
("integer (0..19)year (grain)", -2.8134107167600364),
|
|
||||||
("integer (numeric)day (grain)", -3.2188758248682006),
|
("integer (numeric)day (grain)", -3.2188758248682006),
|
||||||
|
("simple and complex numerals written as one wordyear (grain)",
|
||||||
|
-2.8134107167600364),
|
||||||
("second", -2.8134107167600364),
|
("second", -2.8134107167600364),
|
||||||
("integer (numeric)second (grain)", -3.2188758248682006),
|
("integer (numeric)second (grain)", -3.2188758248682006),
|
||||||
("integer (numeric)year (grain)", -3.2188758248682006),
|
("integer (numeric)year (grain)", -3.2188758248682006),
|
||||||
("day", -2.5257286443082556), ("year", -2.5257286443082556),
|
("day", -2.5257286443082556),
|
||||||
|
("simple and complex numerals written as one wordmonth (grain)",
|
||||||
|
-2.8134107167600364),
|
||||||
|
("year", -2.5257286443082556),
|
||||||
|
("simple and complex numerals written as one wordsecond (grain)",
|
||||||
|
-3.2188758248682006),
|
||||||
("integer (numeric)week (grain)", -2.8134107167600364),
|
("integer (numeric)week (grain)", -2.8134107167600364),
|
||||||
("integer (0..19)month (grain)", -2.8134107167600364),
|
|
||||||
("integer (0..19)second (grain)", -3.2188758248682006),
|
|
||||||
("month", -2.5257286443082556),
|
("month", -2.5257286443082556),
|
||||||
("integer (numeric)minute (grain)", -3.2188758248682006),
|
("integer (numeric)minute (grain)", -3.2188758248682006),
|
||||||
("integer (0..19)minute (grain)", -3.2188758248682006),
|
("simple and complex numerals written as one wordday (grain)",
|
||||||
|
-2.8134107167600364),
|
||||||
|
("simple and complex numerals written as one wordweek (grain)",
|
||||||
|
-3.2188758248682006),
|
||||||
("integer (numeric)month (grain)", -3.2188758248682006),
|
("integer (numeric)month (grain)", -3.2188758248682006),
|
||||||
("minute", -2.8134107167600364),
|
("minute", -2.8134107167600364),
|
||||||
("integer (0..19)day (grain)", -2.8134107167600364),
|
("simple and complex numerals written as one wordminute (grain)",
|
||||||
("integer (0..19)week (grain)", -3.2188758248682006)],
|
-3.2188758248682006)],
|
||||||
n = 16},
|
n = 16},
|
||||||
koData =
|
koData =
|
||||||
ClassData{prior = -infinity, unseen = -2.9444389791664407,
|
ClassData{prior = -infinity, unseen = -2.9444389791664407,
|
||||||
|
@ -327,10 +327,10 @@ classifiers
|
|||||||
unseen = -4.31748811353631,
|
unseen = -4.31748811353631,
|
||||||
likelihoods =
|
likelihoods =
|
||||||
HashMap.fromList
|
HashMap.fromList
|
||||||
[("<integer> (latent time-of-day)", -0.9718605830289658),
|
[("<integer> (latent time-of-day)", -0.9718605830289657),
|
||||||
("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
|
("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
|
||||||
("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
|
("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
|
||||||
("hour", -0.9718605830289658),
|
("hour", -0.9718605830289657),
|
||||||
("two time tokens separated by `di`", -3.20545280453606),
|
("two time tokens separated by `di`", -3.20545280453606),
|
||||||
("Domenica", -3.6109179126442243)],
|
("Domenica", -3.6109179126442243)],
|
||||||
n = 33}}),
|
n = 33}}),
|
||||||
|
@ -434,6 +434,7 @@ library
|
|||||||
, Duckling.Numeral.DA.Rules
|
, Duckling.Numeral.DA.Rules
|
||||||
, Duckling.Numeral.DE.Corpus
|
, Duckling.Numeral.DE.Corpus
|
||||||
, Duckling.Numeral.DE.Rules
|
, Duckling.Numeral.DE.Rules
|
||||||
|
, Duckling.Numeral.DE.NumParser
|
||||||
, Duckling.Numeral.EL.Corpus
|
, Duckling.Numeral.EL.Corpus
|
||||||
, Duckling.Numeral.EL.Rules
|
, Duckling.Numeral.EL.Rules
|
||||||
, Duckling.Numeral.EN.Corpus
|
, Duckling.Numeral.EN.Corpus
|
||||||
|
Loading…
Reference in New Issue
Block a user