fix pt rules for numeral

Summary:
When I write "dois mil e duzentos" the result should be 2200, but duckling recognize the numbers separated and give the result:

`[{"dim":"number","body":"dois","value":{"value":2,"type":"value"},"start":0,"end":4},{"dim":"number","body":"mil","value":{"value":1000,"type":"value"},"start":5,"end":8},{"dim":"time","body":"mil","value":{"values":[],"value":"1000-01-01T00:00:00.000-07:53","grain":"year","type":"value"},"start":5,"end":8},{"dim":"number","body":"duzentos","value":{"value":200,"type":"value"},"start":11,"end":19}]`

Now with this commit, duckling gives the correct result:

`[{"dim":"number","body":"dois mil e duzentos","value":{"value":2200,"type":"value"},"start":0,"end":19}]`
Closes https://github.com/facebook/duckling/pull/117

Reviewed By: blandinw

Differential Revision: D6477925

Pulled By: patapizza

fbshipit-source-id: 26ab503cc8def739c51ceb5bae7546016ba65ad6
This commit is contained in:
Alex Torres 2017-12-04 17:49:07 -08:00 committed by Facebook Github Bot
parent 12a726aee7
commit 498e8b16e6
3 changed files with 405 additions and 279 deletions

View File

@ -9,10 +9,11 @@
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.PT.Corpus
( corpus ) where
( corpus
) where
import Prelude
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
@ -33,6 +34,8 @@ allExamples = concat
[ "2"
, "dois"
, "duas"
, "pares de"
, "um par de"
]
, examples (NumeralValue 3)
[ "3"
@ -52,6 +55,7 @@ allExamples = concat
, "doze"
, "uma dúzia"
, "uma duzia"
, "uma duzias de"
]
, examples (NumeralValue 14)
[ "14"
@ -120,6 +124,10 @@ allExamples = concat
, examples (NumeralValue 0.77)
[ "0,77"
, ",77"
, "ponto setenta e sete"
]
, examples (NumeralValue 1000)
[ "mil"
]
, examples (NumeralValue 100000)
[ "100.000"
@ -150,6 +158,9 @@ allExamples = concat
[ "oitocentos e noventa e um"
, "891"
]
, examples (NumeralValue 2200)
[ "dois mil e duzentos"
]
, examples (NumeralValue 3000000)
[ "3M"
, "3000K"
@ -170,6 +181,7 @@ allExamples = concat
, "-1,2M"
, "-1200K"
, "-,0012G"
, "negativo 1,2M"
]
, examples (NumeralValue 1.5)
[ "1 ponto cinco"

View File

@ -8,14 +8,19 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Numeral.PT.Rules
( rules
) where
import Control.Applicative ((<|>))
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
@ -25,148 +30,300 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern =
[ regex "-|menos"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1))
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
ruleIntegers :: Rule
ruleIntegers = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
(Token RegexMatch (GroupMatch (match:_)):_) ->
toInteger <$> parseInt match >>= integer
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
ruleDozen :: Rule
ruleDozen = Rule
{ name = "a dozen of"
, pattern =
[ regex "(\\d+(\\.\\d\\d\\d)+,\\d+)"
[ regex "(uma )?d(u|ú)zias?( de)?"
]
, prod = \_ -> integer 12 >>= withMultipliable >>= notOkForAnyTime
}
zeroNineteenMap :: HashMap Text Integer
zeroNineteenMap = HashMap.fromList
[ ( "zero" , 0 )
, ( "um" , 1 )
, ( "uma" , 1 )
, ( "dois" , 2 )
, ( "duas" , 2 )
, ( "tres" , 3 )
, ( "três" , 3 )
, ( "quatro" , 4 )
, ( "cinco" , 5 )
, ( "seis" , 6 )
, ( "sete" , 7 )
, ( "oito" , 8 )
, ( "nove" , 9 )
, ( "dez" , 10 )
, ( "onze" , 11 )
, ( "doze" , 12 )
, ( "treze" , 13 )
, ( "catorze" , 14 )
, ( "quatorze" , 14 )
, ( "quinze" , 15 )
, ( "dezesseis" , 16 )
, ( "dezasseis" , 16 )
, ( "dezessete" , 17 )
, ( "dezassete" , 17 )
, ( "dezoito" , 18 )
, ( "dezenove" , 19 )
, ( "dezanove" , 19 )
]
informalMap :: HashMap Text Integer
informalMap = HashMap.fromList
[ ( "um par" , 2 )
, ( "um par de" , 2 )
, ( "par" , 2 )
, ( "pares" , 2 )
, ( "par de" , 2 )
, ( "pares de" , 2 )
, ( "um pouco" , 3 )
, ( "pouco" , 3 )
]
ruleToNineteen :: Rule
ruleToNineteen = Rule
{ name = "integer (0..19)"
, pattern =
[ regex "(zero|d(oi|ua)s|(uma? )?par(es)?( de)?|tr(e|ê)s|(um )?pouco|uma?|(c|qu)atorze|quatro|quinze|cinco|dez[ea]sseis|seis|dez[ea]ssete|sete|dezoito|oito|dez[ea]nove|nove|dez|onze|doze|treze)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> let fmt = Text.replace "," "." $ Text.replace "." Text.empty match
in parseDouble fmt >>= double
(Token RegexMatch (GroupMatch (match:_)):_) ->
let x = Text.toLower match in
(HashMap.lookup x zeroNineteenMap >>= integer) <|>
(HashMap.lookup x informalMap >>= integer >>= notOkForAnyTime)
_ -> Nothing
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule
{ name = "decimal number"
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "vinte" , 20 )
, ( "trinta" , 30 )
, ( "quarenta" , 40 )
, ( "cincoenta" , 50 )
, ( "cinquenta" , 50 )
, ( "cinqüenta" , 50 )
, ( "sessenta" , 60 )
, ( "setenta" , 70 )
, ( "oitenta" , 80 )
, ( "noventa" , 90 )
]
ruleTens :: Rule
ruleTens = Rule
{ name = "tens (20..90)"
, pattern =
[ regex "(\\d*,\\d+)"
[ regex "(vinte|trinta|quarenta|cin(co|q[uü])enta|sessenta|setenta|oitenta|noventa)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> parseDecimal False match
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
ruleNumeral2 :: Rule
ruleNumeral2 = Rule
{ name = "number (20..90)"
centsMap :: HashMap Text Integer
centsMap = HashMap.fromList
[ ( "cem" , 100 )
, ( "cento" , 100 )
, ( "duzentos" , 200 )
, ( "trezentos" , 300 )
, ( "quatrocentos" , 400 )
, ( "quinhetos" , 500 )
, ( "seiscentos" , 600 )
, ( "setecentos" , 700 )
, ( "oitocentos" , 800 )
, ( "novecentos" , 900 )
]
ruleCent :: Rule
ruleCent = Rule
{ name = "hundreds (100..900)"
, pattern =
[ regex "(vinte|trinta|quarenta|cincoenta|cinq(ü)enta|cinquenta|sessenta|setenta|oitenta|noventa)"
[ regex "(cem|cento|duzentos|trezentos|quatrocentos|quinhetos|seiscentos|setecentos|oitocentos|novecentos)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) centsMap >>= integer
_ -> Nothing
}
rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern =
[ regex "(milhao|milhão|milhões|milhoes|bilhao|bilhão|bilhões|bilhoes|mil)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"vinte" -> integer 20
"trinta" -> integer 30
"quarenta" -> integer 40
"cinq\252enta" -> integer 50
"cincoenta" -> integer 50
"cinquenta" -> integer 50
"sessenta" -> integer 60
"setenta" -> integer 70
"oitenta" -> integer 80
"noventa" -> integer 90
"mil" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"milhao" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"milhão" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"milhões" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"milhoes" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"bilhao" -> double 1e9 >>= withGrain 9 >>= withMultipliable
"bilhão" -> double 1e9 >>= withGrain 9 >>= withMultipliable
"bilhões" -> double 1e9 >>= withGrain 9 >>= withMultipliable
"bilhoes" -> double 1e9 >>= withGrain 9 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number (0..15)"
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer 21..99"
, pattern =
[ regex "(zero|uma?|d(oi|ua)s|tr(ê|e)s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|(ca|qua)torze|quinze)"
[ oneOf [20,30..90]
, numberBetween 1 10
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"zero" -> integer 0
"uma" -> integer 1
"um" -> integer 1
"dois" -> integer 2
"duas" -> integer 2
"três" -> integer 3
"tres" -> integer 3
"quatro" -> integer 4
"cinco" -> integer 5
"seis" -> integer 6
"sete" -> integer 7
"oito" -> integer 8
"nove" -> integer 9
"dez" -> integer 10
"onze" -> integer 11
"doze" -> integer 12
"treze" -> integer 13
"catorze" -> integer 14
"quatorze" -> integer 14
"quinze" -> integer 15
_ -> Nothing
(Token Numeral (NumeralData {TNumeral.value = tens}):
Token Numeral (NumeralData {TNumeral.value = units}):
_) -> double $ tens + units
_ -> Nothing
}
ruleNumeral5 :: Rule
ruleNumeral5 = Rule
{ name = "number (16..19)"
ruleDecsAnd :: Rule
ruleDecsAnd = Rule
{ name = "number (21..29 31..39 .. 91..99)"
, pattern =
[ regex "(dez[ea]sseis|dez[ea]ssete|dezoito|dez[ea]nove)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"dezesseis" -> integer 16
"dezasseis" -> integer 16
"dezessete" -> integer 17
"dezassete" -> integer 17
"dezoito" -> integer 18
"dezenove" -> integer 19
"dezanove" -> integer 19
_ -> Nothing
_ -> Nothing
}
ruleNumeral3 :: Rule
ruleNumeral3 = Rule
{ name = "number (16..19)"
, pattern =
[ numberWith TNumeral.value (== 10)
[ oneOf [20, 30..90]
, regex "e"
, numberBetween 6 10
, numberBetween 1 10
]
, prod = \tokens -> case tokens of
(_:Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ 10 + v
(Token Numeral (NumeralData {TNumeral.value = v1}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 + v2
_ -> Nothing
}
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule
{ name = "numbers suffixes (K, M, G)"
ruleCompositeCents :: Rule
ruleCompositeCents = Rule
{ name = "integer 101..999"
, pattern =
[ oneOf [100, 200..900]
, numberBetween 1 100
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = hundreds}):
Token Numeral (NumeralData {TNumeral.value = units}):
_) -> double $ hundreds + units
_ -> Nothing
}
ruleCentsAnd :: Rule
ruleCentsAnd = Rule
{ name = "number (101..199 201..299 .. 901..999)"
, pattern =
[ oneOf [100, 200..900]
, regex "e"
, numberBetween 1 100
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 + v2
_ -> Nothing
}
ruleSkipHundreds :: Rule
ruleSkipHundreds = Rule
{ name = "one twenty two"
, pattern =
[ numberBetween 1 10
, numberBetween 10 100
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = hundreds}):
Token Numeral (NumeralData {TNumeral.value = rest}):
_) -> double $ hundreds*100 + rest
_ -> Nothing
}
ruleDotSpelledOut :: Rule
ruleDotSpelledOut = Rule
{ name = "one point 2"
, pattern =
[ dimension Numeral
, regex "([kmg])(?=[\\W\\$€]|$)"
, regex "ponto"
, numberWith TNumeral.grain isNothing
]
, prod = \tokens -> case tokens of
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleLeadingDotSpelledOut :: Rule
ruleLeadingDotSpelledOut = Rule
{ name = "point 77"
, pattern =
[ regex "ponto"
, numberWith TNumeral.grain isNothing
]
, prod = \tokens -> case tokens of
(_:Token Numeral nd:_) -> double . decimalsToDouble $ TNumeral.value nd
_ -> Nothing
}
ruleDecimals :: Rule
ruleDecimals = Rule
{ name = "decimal number"
, pattern =
[ regex "(\\d*\\,\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal False match
_ -> Nothing
}
ruleFractions :: Rule
ruleFractions = Rule
{ name = "fractional number"
, pattern =
[ regex "(\\d+)/(\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (numerator:denominator:_)):_) -> do
n <- parseDecimal False numerator
d <- parseDecimal False denominator
divide n d
_ -> Nothing
}
ruleCommas :: Rule
ruleCommas = Rule
{ name = "dot-separated numbers"
, pattern =
[ regex "(\\d+(.\\d\\d\\d)+(\\,\\d+)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDecimal False $ Text.replace "." Text.empty match
_ -> Nothing
}
ruleSuffixes :: Rule
ruleSuffixes = Rule
{ name = "suffixes (K,M,G))"
, pattern =
[ dimension Numeral
, regex "(k|m|g)(?=[\\W$€¢£]|$)"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v}):
@ -179,128 +336,81 @@ ruleNumeralsSuffixesKMG = Rule
_ -> Nothing
}
ruleNumeral6 :: Rule
ruleNumeral6 = Rule
{ name = "number 100..1000 "
ruleNegative :: Rule
ruleNegative = Rule
{ name = "negative numbers"
, pattern =
[
regex "(cem|cento|duzentos|trezentos|quatrocentos|quinhentos|seiscentos|setecentos|oitocentos|novecentos|mil)"
[ regex "(-|menos|negativo)(?!\\s*-)"
, numberWith TNumeral.value (>0)
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"cento" -> integer 100
"cem" -> integer 100
"duzentos" -> integer 200
"trezentos" -> integer 300
"quatrocentos" -> integer 400
"quinhentos" -> integer 500
"seiscentos" -> integer 600
"setecentos" -> integer 700
"oitocentos" -> integer 800
"novecentos" -> integer 900
"mil" -> integer 1000
_ -> Nothing
(_:Token Numeral nd:_) -> double $ TNumeral.value nd * (-1)
_ -> Nothing
}
ruleNumeral4 :: Rule
ruleNumeral4 = Rule
{ name = "number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)"
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect 2 numbers"
, pattern =
[ oneOf [70, 20, 60, 50, 40, 90, 30, 80]
[ numberWith (fromMaybe 0 . TNumeral.grain) (>1)
, numberWith TNumeral.multipliable not
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = val1, TNumeral.grain = Just g}):
Token Numeral (NumeralData {TNumeral.value = val2}):
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleSumAnd :: Rule
ruleSumAnd = Rule
{ name = "intersect 2 numbers (with and)"
, pattern =
[ numberWith (fromMaybe 0 . TNumeral.grain) (>1)
, regex "e"
, numberBetween 1 10
, numberWith TNumeral.multipliable not
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
(Token Numeral (NumeralData {TNumeral.value = val1, TNumeral.grain = Just g}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 + v2
Token Numeral (NumeralData {TNumeral.value = val2}):
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleDozen :: Rule
ruleDozen = Rule
{ name = "dozen"
, pattern =
[ regex "d(ú|u)zias?"
]
, prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable
}
ruleNumeralDozen :: Rule
ruleNumeralDozen = Rule
{ name = "number dozen"
, pattern =
[ numberBetween 1 11
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
Token Numeral (NumeralData {TNumeral.value = v2, TNumeral.grain = Just g}):
_) -> double (v1 * v2) >>= withGrain g
_ -> Nothing
}
ruleNumerals :: Rule
ruleNumerals = Rule
{ name = "numbers (100..999)"
, pattern =
[ numberBetween 100 1000
, regex "e"
, numberBetween 0 100
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):
_:
Token Numeral (NumeralData {TNumeral.value = v2}):
_) -> double $ v1 + v2
_ -> Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule
{ name = "number dot number"
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, regex "ponto"
, numberWith TNumeral.grain isNothing
, numberWith TNumeral.multipliable id
]
, prod = \tokens -> case tokens of
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule
{ name = "integer with thousands separator ."
, pattern =
[ regex "(\\d{1,3}(\\.\\d\\d\\d){1,5})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> let fmt = Text.replace "." Text.empty match
in parseDouble fmt >>= double
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
[ ruleIntegers
, ruleToNineteen
, ruleTens
, ruleCent
, rulePowersOfTen
, ruleCompositeTens
, ruleCompositeCents
, ruleSkipHundreds
, ruleDotSpelledOut
, ruleLeadingDotSpelledOut
, ruleDecimals
, ruleFractions
, ruleCommas
, ruleSuffixes
, ruleNegative
, ruleSum
, ruleDecsAnd
, ruleCentsAnd
, ruleSumAnd
, ruleMultiply
, ruleDozen
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeral
, ruleNumeral2
, ruleNumeral3
, ruleNumeral4
, ruleNumeral5
, ruleNumeral6
, ruleNumeralDotNumeral
, ruleNumeralDozen
, ruleNumerals
, ruleNumeralsPrefixWithNegativeOrMinus
, ruleNumeralsSuffixesKMG
]

View File

@ -287,6 +287,14 @@ classifiers
("named-dayintersect", -2.871679624884012),
("named-dayintersect by `da` or `de`", -1.405342556090585)],
n = 20}}),
("integer (0..19)",
Classifier{okData =
ClassData{prior = -1.9048194970694474e-2,
unseen = -3.9889840465642745,
likelihoods = HashMap.fromList [("", 0.0)], n = 52},
koData =
ClassData{prior = -3.970291913552122, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("ordinals (primeiro..10)",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
@ -300,7 +308,7 @@ classifiers
unseen = -4.02535169073515,
likelihoods =
HashMap.fromList
[("number (0..15)named-month", -1.927891643552635),
[("integer (0..19)named-month", -1.927891643552635),
("integer (numeric)named-month", -1.0116009116784799),
("month", -0.7114963192281418)],
n = 26},
@ -368,6 +376,15 @@ classifiers
-2.1972245773362196),
("named-monthintersect by `da` or `de`", -1.791759469228055)],
n = 5}}),
("number (21..29 31..39 .. 91..99)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods =
HashMap.fromList [("tens (20..90)integer (0..19)", 0.0)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("<time-of-day> horas",
Classifier{okData =
ClassData{prior = -0.7731898882334817,
@ -557,10 +574,9 @@ classifiers
likelihoods =
HashMap.fromList
[("integer (numeric)", -0.6061358035703156),
("number (0..15)", -1.0116009116784799),
("numbers prefix with -, negative or minus",
-3.0910424533583156),
("number (20..90)", -3.0910424533583156)],
("integer (0..19)", -1.0116009116784799),
("negative numbers", -3.0910424533583156),
("tens (20..90)", -3.0910424533583156)],
n = 40}}),
("n[ao] <date>",
Classifier{okData =
@ -687,14 +703,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("number (0..15)",
Classifier{okData =
ClassData{prior = -1.9048194970694474e-2,
unseen = -3.9889840465642745,
likelihoods = HashMap.fromList [("", 0.0)], n = 52},
koData =
ClassData{prior = -3.970291913552122, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("antes das <time-of-day>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004,
@ -706,14 +714,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("numbers prefix with -, negative or minus",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
n = 4}}),
("dd-dd <month>(interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -761,19 +761,12 @@ classifiers
likelihoods =
HashMap.fromList
[("integer (numeric)noon", -1.7047480922384253),
("hour", -0.7884573603642702),
("number (0..15)noon", -1.0116009116784799)],
("integer (0..19)noon", -1.0116009116784799),
("hour", -0.7884573603642702)],
n = 4},
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("number (20..90)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("<dim time> da manha",
Classifier{okData =
ClassData{prior = -0.262364264467491, unseen = -3.4339872044851463,
@ -811,6 +804,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
likelihoods = HashMap.fromList [], n = 0}}),
("fractional number",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.5649493574615367,
likelihoods = HashMap.fromList [("", 0.0)], n = 11}}),
("passados n <cycle>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.5649493574615367,
@ -850,41 +850,41 @@ classifiers
likelihoods =
HashMap.fromList
[("integer (numeric)", -1.0986122886681098),
("number (0..15)", -0.46262352194811296)],
("integer (0..19)", -0.46262352194811296)],
n = 24},
koData =
ClassData{prior = -0.4999559515290868, unseen = -3.713572066704308,
likelihoods =
HashMap.fromList
[("integer (numeric)", -0.5108256237659907),
("number (0..15)", -1.0498221244986778),
("number (20..90)", -2.995732273553991)],
("integer (0..19)", -1.0498221244986778),
("tens (20..90)", -2.995732273553991)],
n = 37}}),
("<hour-of-day> and <relative minutes>",
Classifier{okData =
ClassData{prior = -0.2719337154836418, unseen = -3.713572066704308,
likelihoods =
HashMap.fromList
[("\224s <time-of-day>number (0..15)", -2.3025850929940455),
("time-of-day (latent)number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)",
[("\224s <time-of-day>tens (20..90)", -2.5902671654458267),
("time-of-day (latent)integer (0..19)", -2.0794415416798357),
("time-of-day (latent)tens (20..90)", -2.5902671654458267),
("nooninteger (0..19)", -2.995732273553991),
("time-of-day (latent)number (21..29 31..39 .. 91..99)",
-2.5902671654458267),
("\224s <time-of-day>number (20..90)", -2.5902671654458267),
("\224s <time-of-day>number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)",
-2.5902671654458267),
("time-of-day (latent)number (0..15)", -2.0794415416798357),
("hour", -0.8556661100577202),
("time-of-day (latent)number (20..90)", -2.5902671654458267),
("noonnumber (0..15)", -2.995732273553991)],
("\224s <time-of-day>number (21..29 31..39 .. 91..99)",
-2.5902671654458267),
("\224s <time-of-day>integer (0..19)", -2.3025850929940455)],
n = 16},
koData =
ClassData{prior = -1.4350845252893227,
unseen = -2.9444389791664407,
likelihoods =
HashMap.fromList
[("\224s <time-of-day>number (20..90)", -1.791759469228055),
("time-of-day (latent)number (0..15)", -2.1972245773362196),
("hour", -1.0986122886681098),
("time-of-day (latent)number (20..90)", -1.791759469228055)],
[("\224s <time-of-day>tens (20..90)", -1.791759469228055),
("time-of-day (latent)integer (0..19)", -2.1972245773362196),
("time-of-day (latent)tens (20..90)", -1.791759469228055),
("hour", -1.0986122886681098)],
n = 5}}),
("year",
Classifier{okData =
@ -901,62 +901,61 @@ classifiers
ClassData{prior = -0.6443570163905132, unseen = -4.174387269895637,
likelihoods =
HashMap.fromList
[("week", -2.772588722239781),
("number (0..15)ano (grain)", -3.0602707946915624),
[("number (21..29 31..39 .. 91..99)hora (grain)",
-3.4657359027997265),
("week", -2.772588722239781),
("integer (0..19)segundo (grain)", -3.4657359027997265),
("integer (numeric)hora (grain)", -3.4657359027997265),
("integer (numeric)dia (grain)", -3.4657359027997265),
("number (0..15)segundo (grain)", -3.4657359027997265),
("integer (0..19)ano (grain)", -3.0602707946915624),
("second", -3.4657359027997265),
("integer (0..19)semana (grain)", -2.772588722239781),
("integer (numeric)ano (grain)", -3.4657359027997265),
("integer (0..19)mes (grain)", -3.0602707946915624),
("integer (0..19)hora (grain)", -2.772588722239781),
("integer (numeric)minutos (grain)", -2.772588722239781),
("number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)hora (grain)",
-3.4657359027997265),
("day", -3.0602707946915624), ("year", -2.772588722239781),
("number (0..15)mes (grain)", -3.0602707946915624),
("number (0..15)hora (grain)", -2.772588722239781),
("integer (0..19)minutos (grain)", -3.0602707946915624),
("hour", -2.367123614131617), ("month", -3.0602707946915624),
("number (0..15)dia (grain)", -3.4657359027997265),
("number (0..15)minutos (grain)", -3.0602707946915624),
("minute", -2.367123614131617),
("number (0..15)semana (grain)", -2.772588722239781)],
("integer (0..19)dia (grain)", -3.4657359027997265)],
n = 21},
koData =
ClassData{prior = -0.7444404749474959, unseen = -4.110873864173311,
likelihoods =
HashMap.fromList
[("week", -2.995732273553991),
("number (0..15)ano (grain)", -3.4011973816621555),
("integer (numeric)hora (grain)", -2.70805020110221),
("integer (numeric)dia (grain)", -2.995732273553991),
("integer (0..19)ano (grain)", -3.4011973816621555),
("integer (numeric)mes (grain)", -3.4011973816621555),
("second", -2.995732273553991),
("integer (0..19)semana (grain)", -3.4011973816621555),
("integer (numeric)semana (grain)", -3.4011973816621555),
("integer (numeric)ano (grain)", -2.995732273553991),
("integer (0..19)mes (grain)", -2.995732273553991),
("integer (0..19)hora (grain)", -2.995732273553991),
("integer (numeric)minutos (grain)", -2.995732273553991),
("day", -2.995732273553991),
("integer (numeric)segundo (grain)", -2.995732273553991),
("year", -2.70805020110221),
("number (0..15)mes (grain)", -2.995732273553991),
("number (0..15)hora (grain)", -2.995732273553991),
("hour", -2.3025850929940455), ("month", -2.70805020110221),
("minute", -2.995732273553991),
("number (0..15)semana (grain)", -3.4011973816621555)],
("year", -2.70805020110221), ("hour", -2.3025850929940455),
("month", -2.70805020110221), ("minute", -2.995732273553991)],
n = 19}}),
("proximas n <cycle>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -3.2188758248682006,
likelihoods =
HashMap.fromList
[("number (0..15)ano (grain)", -2.4849066497880004),
("integer (numeric)hora (grain)", -2.4849066497880004),
[("integer (numeric)hora (grain)", -2.4849066497880004),
("integer (numeric)dia (grain)", -2.4849066497880004),
("integer (0..19)ano (grain)", -2.4849066497880004),
("second", -2.4849066497880004),
("integer (0..19)mes (grain)", -2.4849066497880004),
("integer (numeric)minutos (grain)", -2.4849066497880004),
("day", -2.4849066497880004),
("integer (numeric)segundo (grain)", -2.4849066497880004),
("year", -2.4849066497880004),
("number (0..15)mes (grain)", -2.4849066497880004),
("hour", -2.4849066497880004), ("month", -2.4849066497880004),
("year", -2.4849066497880004), ("hour", -2.4849066497880004),
("month", -2.4849066497880004),
("minute", -2.4849066497880004)],
n = 6},
koData =
@ -1007,15 +1006,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods =
HashMap.fromList [("number (20..90)number (0..15)", 0.0)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("day of month (1st)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -1108,9 +1098,8 @@ classifiers
[("week", -1.791759469228055),
("integer (numeric)semana (grain)", -1.791759469228055),
("integer (numeric)ano (grain)", -1.791759469228055),
("year", -1.791759469228055),
("number (0..15)mes (grain)", -1.791759469228055),
("month", -1.791759469228055)],
("integer (0..19)mes (grain)", -1.791759469228055),
("year", -1.791759469228055), ("month", -1.791759469228055)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -1.9459101490553135,
@ -1146,6 +1135,14 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}}),
("negative numbers",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
n = 4}}),
("<datetime> - <datetime> (interval)",
Classifier{okData =
ClassData{prior = -1.791759469228055, unseen = -3.1354942159291497,
@ -1189,6 +1186,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("tens (20..90)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("dia (grain)",
Classifier{okData =
ClassData{prior = -0.5596157879354228, unseen = -1.791759469228055,
@ -1313,10 +1317,10 @@ classifiers
likelihoods =
HashMap.fromList
[("week", -1.791759469228055), ("second", -1.791759469228055),
("integer (0..19)semana (grain)", -1.791759469228055),
("integer (numeric)minutos (grain)", -1.791759469228055),
("integer (numeric)segundo (grain)", -1.791759469228055),
("minute", -1.791759469228055),
("number (0..15)semana (grain)", -1.791759469228055)],
("minute", -1.791759469228055)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -1.9459101490553135,