From 498e8b16e69ae61d30cf2fd8169500310c0ea89e Mon Sep 17 00:00:00 2001 From: Alex Torres Date: Mon, 4 Dec 2017 17:49:07 -0800 Subject: [PATCH] fix pt rules for numeral Summary: When I write "dois mil e duzentos" the result should be 2200, but duckling recognize the numbers separated and give the result: `[{"dim":"number","body":"dois","value":{"value":2,"type":"value"},"start":0,"end":4},{"dim":"number","body":"mil","value":{"value":1000,"type":"value"},"start":5,"end":8},{"dim":"time","body":"mil","value":{"values":[],"value":"1000-01-01T00:00:00.000-07:53","grain":"year","type":"value"},"start":5,"end":8},{"dim":"number","body":"duzentos","value":{"value":200,"type":"value"},"start":11,"end":19}]` Now with this commit, duckling gives the correct result: `[{"dim":"number","body":"dois mil e duzentos","value":{"value":2200,"type":"value"},"start":0,"end":19}]` Closes https://github.com/facebook/duckling/pull/117 Reviewed By: blandinw Differential Revision: D6477925 Pulled By: patapizza fbshipit-source-id: 26ab503cc8def739c51ceb5bae7546016ba65ad6 --- Duckling/Numeral/PT/Corpus.hs | 16 +- Duckling/Numeral/PT/Rules.hs | 502 ++++++++++++++++---------- Duckling/Ranking/Classifiers/PT_XX.hs | 166 ++++----- 3 files changed, 405 insertions(+), 279 deletions(-) diff --git a/Duckling/Numeral/PT/Corpus.hs b/Duckling/Numeral/PT/Corpus.hs index ebd41282..e4a4c418 100644 --- a/Duckling/Numeral/PT/Corpus.hs +++ b/Duckling/Numeral/PT/Corpus.hs @@ -9,10 +9,11 @@ {-# LANGUAGE OverloadedStrings #-} module Duckling.Numeral.PT.Corpus - ( corpus ) where + ( corpus + ) where -import Prelude import Data.String +import Prelude import Duckling.Locale import Duckling.Numeral.Types @@ -33,6 +34,8 @@ allExamples = concat [ "2" , "dois" , "duas" + , "pares de" + , "um par de" ] , examples (NumeralValue 3) [ "3" @@ -52,6 +55,7 @@ allExamples = concat , "doze" , "uma dúzia" , "uma duzia" + , "uma duzias de" ] , examples (NumeralValue 14) [ "14" @@ -120,6 +124,10 @@ allExamples = concat , examples (NumeralValue 0.77) [ "0,77" , ",77" + , "ponto setenta e sete" + ] + , examples (NumeralValue 1000) + [ "mil" ] , examples (NumeralValue 100000) [ "100.000" @@ -150,6 +158,9 @@ allExamples = concat [ "oitocentos e noventa e um" , "891" ] + , examples (NumeralValue 2200) + [ "dois mil e duzentos" + ] , examples (NumeralValue 3000000) [ "3M" , "3000K" @@ -170,6 +181,7 @@ allExamples = concat , "-1,2M" , "-1200K" , "-,0012G" + , "negativo 1,2M" ] , examples (NumeralValue 1.5) [ "1 ponto cinco" diff --git a/Duckling/Numeral/PT/Rules.hs b/Duckling/Numeral/PT/Rules.hs index ed7304ec..a6aee8dd 100644 --- a/Duckling/Numeral/PT/Rules.hs +++ b/Duckling/Numeral/PT/Rules.hs @@ -8,14 +8,19 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoRebindableSyntax #-} module Duckling.Numeral.PT.Rules ( rules ) where +import Control.Applicative ((<|>)) +import Data.HashMap.Strict (HashMap) import Data.Maybe import Data.String +import Data.Text (Text) import Prelude +import qualified Data.HashMap.Strict as HashMap import qualified Data.Text as Text import Duckling.Dimensions.Types @@ -25,189 +30,176 @@ import Duckling.Regex.Types import Duckling.Types import qualified Duckling.Numeral.Types as TNumeral -ruleNumeralsPrefixWithNegativeOrMinus :: Rule -ruleNumeralsPrefixWithNegativeOrMinus = Rule - { name = "numbers prefix with -, negative or minus" - , pattern = - [ regex "-|menos" - , dimension Numeral - ] - , prod = \tokens -> case tokens of - (_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1)) - _ -> Nothing - } - -ruleIntegerNumeric :: Rule -ruleIntegerNumeric = Rule +ruleIntegers :: Rule +ruleIntegers = Rule { name = "integer (numeric)" , pattern = [ regex "(\\d{1,18})" ] , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)):_) -> do - v <- parseInt match - integer $ toInteger v + (Token RegexMatch (GroupMatch (match:_)):_) -> + toInteger <$> parseInt match >>= integer _ -> Nothing } -ruleDecimalWithThousandsSeparator :: Rule -ruleDecimalWithThousandsSeparator = Rule - { name = "decimal with thousands separator" +ruleDozen :: Rule +ruleDozen = Rule + { name = "a dozen of" , pattern = - [ regex "(\\d+(\\.\\d\\d\\d)+,\\d+)" + [ regex "(uma )?d(u|ú)zias?( de)?" + ] + , prod = \_ -> integer 12 >>= withMultipliable >>= notOkForAnyTime + } + +zeroNineteenMap :: HashMap Text Integer +zeroNineteenMap = HashMap.fromList + [ ( "zero" , 0 ) + , ( "um" , 1 ) + , ( "uma" , 1 ) + , ( "dois" , 2 ) + , ( "duas" , 2 ) + , ( "tres" , 3 ) + , ( "três" , 3 ) + , ( "quatro" , 4 ) + , ( "cinco" , 5 ) + , ( "seis" , 6 ) + , ( "sete" , 7 ) + , ( "oito" , 8 ) + , ( "nove" , 9 ) + , ( "dez" , 10 ) + , ( "onze" , 11 ) + , ( "doze" , 12 ) + , ( "treze" , 13 ) + , ( "catorze" , 14 ) + , ( "quatorze" , 14 ) + , ( "quinze" , 15 ) + , ( "dezesseis" , 16 ) + , ( "dezasseis" , 16 ) + , ( "dezessete" , 17 ) + , ( "dezassete" , 17 ) + , ( "dezoito" , 18 ) + , ( "dezenove" , 19 ) + , ( "dezanove" , 19 ) + ] + +informalMap :: HashMap Text Integer +informalMap = HashMap.fromList + [ ( "um par" , 2 ) + , ( "um par de" , 2 ) + , ( "par" , 2 ) + , ( "pares" , 2 ) + , ( "par de" , 2 ) + , ( "pares de" , 2 ) + , ( "um pouco" , 3 ) + , ( "pouco" , 3 ) + ] + +ruleToNineteen :: Rule +ruleToNineteen = Rule + { name = "integer (0..19)" + , pattern = + [ regex "(zero|d(oi|ua)s|(uma? )?par(es)?( de)?|tr(e|ê)s|(um )?pouco|uma?|(c|qu)atorze|quatro|quinze|cinco|dez[ea]sseis|seis|dez[ea]ssete|sete|dezoito|oito|dez[ea]nove|nove|dez|onze|doze|treze)" ] , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)): - _) -> let fmt = Text.replace "," "." $ Text.replace "." Text.empty match - in parseDouble fmt >>= double + (Token RegexMatch (GroupMatch (match:_)):_) -> + let x = Text.toLower match in + (HashMap.lookup x zeroNineteenMap >>= integer) <|> + (HashMap.lookup x informalMap >>= integer >>= notOkForAnyTime) _ -> Nothing } -ruleDecimalNumeral :: Rule -ruleDecimalNumeral = Rule - { name = "decimal number" +tensMap :: HashMap Text Integer +tensMap = HashMap.fromList + [ ( "vinte" , 20 ) + , ( "trinta" , 30 ) + , ( "quarenta" , 40 ) + , ( "cincoenta" , 50 ) + , ( "cinquenta" , 50 ) + , ( "cinqüenta" , 50 ) + , ( "sessenta" , 60 ) + , ( "setenta" , 70 ) + , ( "oitenta" , 80 ) + , ( "noventa" , 90 ) + ] + +ruleTens :: Rule +ruleTens = Rule + { name = "tens (20..90)" , pattern = - [ regex "(\\d*,\\d+)" + [ regex "(vinte|trinta|quarenta|cin(co|q[uü])enta|sessenta|setenta|oitenta|noventa)" ] , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)): - _) -> parseDecimal False match + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) tensMap >>= integer _ -> Nothing } -ruleNumeral2 :: Rule -ruleNumeral2 = Rule - { name = "number (20..90)" +centsMap :: HashMap Text Integer +centsMap = HashMap.fromList + [ ( "cem" , 100 ) + , ( "cento" , 100 ) + , ( "duzentos" , 200 ) + , ( "trezentos" , 300 ) + , ( "quatrocentos" , 400 ) + , ( "quinhetos" , 500 ) + , ( "seiscentos" , 600 ) + , ( "setecentos" , 700 ) + , ( "oitocentos" , 800 ) + , ( "novecentos" , 900 ) + ] + +ruleCent :: Rule +ruleCent = Rule + { name = "hundreds (100..900)" , pattern = - [ regex "(vinte|trinta|quarenta|cincoenta|cinq(ü)enta|cinquenta|sessenta|setenta|oitenta|noventa)" + [ regex "(cem|cento|duzentos|trezentos|quatrocentos|quinhetos|seiscentos|setecentos|oitocentos|novecentos)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) centsMap >>= integer + _ -> Nothing + } + +rulePowersOfTen :: Rule +rulePowersOfTen = Rule + { name = "powers of tens" + , pattern = + [ regex "(milhao|milhão|milhões|milhoes|bilhao|bilhão|bilhões|bilhoes|mil)" ] , prod = \tokens -> case tokens of (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "vinte" -> integer 20 - "trinta" -> integer 30 - "quarenta" -> integer 40 - "cinq\252enta" -> integer 50 - "cincoenta" -> integer 50 - "cinquenta" -> integer 50 - "sessenta" -> integer 60 - "setenta" -> integer 70 - "oitenta" -> integer 80 - "noventa" -> integer 90 - _ -> Nothing + "mil" -> double 1e3 >>= withGrain 3 >>= withMultipliable + "milhao" -> double 1e6 >>= withGrain 6 >>= withMultipliable + "milhão" -> double 1e6 >>= withGrain 6 >>= withMultipliable + "milhões" -> double 1e6 >>= withGrain 6 >>= withMultipliable + "milhoes" -> double 1e6 >>= withGrain 6 >>= withMultipliable + "bilhao" -> double 1e9 >>= withGrain 9 >>= withMultipliable + "bilhão" -> double 1e9 >>= withGrain 9 >>= withMultipliable + "bilhões" -> double 1e9 >>= withGrain 9 >>= withMultipliable + "bilhoes" -> double 1e9 >>= withGrain 9 >>= withMultipliable + _ -> Nothing _ -> Nothing } -ruleNumeral :: Rule -ruleNumeral = Rule - { name = "number (0..15)" +ruleCompositeTens :: Rule +ruleCompositeTens = Rule + { name = "integer 21..99" , pattern = - [ regex "(zero|uma?|d(oi|ua)s|tr(ê|e)s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|(ca|qua)torze|quinze)" + [ oneOf [20,30..90] + , numberBetween 1 10 ] , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "zero" -> integer 0 - "uma" -> integer 1 - "um" -> integer 1 - "dois" -> integer 2 - "duas" -> integer 2 - "três" -> integer 3 - "tres" -> integer 3 - "quatro" -> integer 4 - "cinco" -> integer 5 - "seis" -> integer 6 - "sete" -> integer 7 - "oito" -> integer 8 - "nove" -> integer 9 - "dez" -> integer 10 - "onze" -> integer 11 - "doze" -> integer 12 - "treze" -> integer 13 - "catorze" -> integer 14 - "quatorze" -> integer 14 - "quinze" -> integer 15 - _ -> Nothing + (Token Numeral (NumeralData {TNumeral.value = tens}): + Token Numeral (NumeralData {TNumeral.value = units}): + _) -> double $ tens + units _ -> Nothing } -ruleNumeral5 :: Rule -ruleNumeral5 = Rule - { name = "number (16..19)" +ruleDecsAnd :: Rule +ruleDecsAnd = Rule + { name = "number (21..29 31..39 .. 91..99)" , pattern = - [ regex "(dez[ea]sseis|dez[ea]ssete|dezoito|dez[ea]nove)" - ] - , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "dezesseis" -> integer 16 - "dezasseis" -> integer 16 - "dezessete" -> integer 17 - "dezassete" -> integer 17 - "dezoito" -> integer 18 - "dezenove" -> integer 19 - "dezanove" -> integer 19 - _ -> Nothing - _ -> Nothing - } - -ruleNumeral3 :: Rule -ruleNumeral3 = Rule - { name = "number (16..19)" - , pattern = - [ numberWith TNumeral.value (== 10) - , regex "e" - , numberBetween 6 10 - ] - , prod = \tokens -> case tokens of - (_:Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ 10 + v - _ -> Nothing - } - -ruleNumeralsSuffixesKMG :: Rule -ruleNumeralsSuffixesKMG = Rule - { name = "numbers suffixes (K, M, G)" - , pattern = - [ dimension Numeral - , regex "([kmg])(?=[\\W\\$€]|$)" - ] - , prod = \tokens -> case tokens of - (Token Numeral (NumeralData {TNumeral.value = v}): - Token RegexMatch (GroupMatch (match:_)): - _) -> case Text.toLower match of - "k" -> double $ v * 1e3 - "m" -> double $ v * 1e6 - "g" -> double $ v * 1e9 - _ -> Nothing - _ -> Nothing - } - -ruleNumeral6 :: Rule -ruleNumeral6 = Rule - { name = "number 100..1000 " - , pattern = - [ - regex "(cem|cento|duzentos|trezentos|quatrocentos|quinhentos|seiscentos|setecentos|oitocentos|novecentos|mil)" - ] - , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of - "cento" -> integer 100 - "cem" -> integer 100 - "duzentos" -> integer 200 - "trezentos" -> integer 300 - "quatrocentos" -> integer 400 - "quinhentos" -> integer 500 - "seiscentos" -> integer 600 - "setecentos" -> integer 700 - "oitocentos" -> integer 800 - "novecentos" -> integer 900 - "mil" -> integer 1000 - _ -> Nothing - _ -> Nothing - } - -ruleNumeral4 :: Rule -ruleNumeral4 = Rule - { name = "number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)" - , pattern = - [ oneOf [70, 20, 60, 50, 40, 90, 30, 80] + [ oneOf [20, 30..90] , regex "e" , numberBetween 1 10 ] @@ -219,36 +211,27 @@ ruleNumeral4 = Rule _ -> Nothing } -ruleDozen :: Rule -ruleDozen = Rule - { name = "dozen" +ruleCompositeCents :: Rule +ruleCompositeCents = Rule + { name = "integer 101..999" , pattern = - [ regex "d(ú|u)zias?" - ] - , prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable - } - -ruleNumeralDozen :: Rule -ruleNumeralDozen = Rule - { name = "number dozen" - , pattern = - [ numberBetween 1 11 - , dimension Numeral + [ oneOf [100, 200..900] + , numberBetween 1 100 ] , prod = \tokens -> case tokens of - (Token Numeral (NumeralData {TNumeral.value = v1}): - Token Numeral (NumeralData {TNumeral.value = v2, TNumeral.grain = Just g}): - _) -> double (v1 * v2) >>= withGrain g + (Token Numeral (NumeralData {TNumeral.value = hundreds}): + Token Numeral (NumeralData {TNumeral.value = units}): + _) -> double $ hundreds + units _ -> Nothing } -ruleNumerals :: Rule -ruleNumerals = Rule - { name = "numbers (100..999)" +ruleCentsAnd :: Rule +ruleCentsAnd = Rule + { name = "number (101..199 201..299 .. 901..999)" , pattern = - [ numberBetween 100 1000 + [ oneOf [100, 200..900] , regex "e" - , numberBetween 0 100 + , numberBetween 1 100 ] , prod = \tokens -> case tokens of (Token Numeral (NumeralData {TNumeral.value = v1}): @@ -258,9 +241,23 @@ ruleNumerals = Rule _ -> Nothing } -ruleNumeralDotNumeral :: Rule -ruleNumeralDotNumeral = Rule - { name = "number dot number" +ruleSkipHundreds :: Rule +ruleSkipHundreds = Rule + { name = "one twenty two" + , pattern = + [ numberBetween 1 10 + , numberBetween 10 100 + ] + , prod = \tokens -> case tokens of + (Token Numeral (NumeralData {TNumeral.value = hundreds}): + Token Numeral (NumeralData {TNumeral.value = rest}): + _) -> double $ hundreds*100 + rest + _ -> Nothing + } + +ruleDotSpelledOut :: Rule +ruleDotSpelledOut = Rule + { name = "one point 2" , pattern = [ dimension Numeral , regex "ponto" @@ -272,35 +269,148 @@ ruleNumeralDotNumeral = Rule _ -> Nothing } -ruleIntegerWithThousandsSeparator :: Rule -ruleIntegerWithThousandsSeparator = Rule - { name = "integer with thousands separator ." +ruleLeadingDotSpelledOut :: Rule +ruleLeadingDotSpelledOut = Rule + { name = "point 77" , pattern = - [ regex "(\\d{1,3}(\\.\\d\\d\\d){1,5})" + [ regex "ponto" + , numberWith TNumeral.grain isNothing ] , prod = \tokens -> case tokens of - (Token RegexMatch (GroupMatch (match:_)): - _) -> let fmt = Text.replace "." Text.empty match - in parseDouble fmt >>= double + (_:Token Numeral nd:_) -> double . decimalsToDouble $ TNumeral.value nd + _ -> Nothing + } + +ruleDecimals :: Rule +ruleDecimals = Rule + { name = "decimal number" + , pattern = + [ regex "(\\d*\\,\\d+)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal False match + _ -> Nothing + } + +ruleFractions :: Rule +ruleFractions = Rule + { name = "fractional number" + , pattern = + [ regex "(\\d+)/(\\d+)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (numerator:denominator:_)):_) -> do + n <- parseDecimal False numerator + d <- parseDecimal False denominator + divide n d + _ -> Nothing + } + +ruleCommas :: Rule +ruleCommas = Rule + { name = "dot-separated numbers" + , pattern = + [ regex "(\\d+(.\\d\\d\\d)+(\\,\\d+)?)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + parseDecimal False $ Text.replace "." Text.empty match + _ -> Nothing + } + +ruleSuffixes :: Rule +ruleSuffixes = Rule + { name = "suffixes (K,M,G))" + , pattern = + [ dimension Numeral + , regex "(k|m|g)(?=[\\W$€¢£]|$)" + ] + , prod = \tokens -> case tokens of + (Token Numeral (NumeralData {TNumeral.value = v}): + Token RegexMatch (GroupMatch (match:_)): + _) -> case Text.toLower match of + "k" -> double $ v * 1e3 + "m" -> double $ v * 1e6 + "g" -> double $ v * 1e9 + _ -> Nothing + _ -> Nothing + } + +ruleNegative :: Rule +ruleNegative = Rule + { name = "negative numbers" + , pattern = + [ regex "(-|menos|negativo)(?!\\s*-)" + , numberWith TNumeral.value (>0) + ] + , prod = \tokens -> case tokens of + (_:Token Numeral nd:_) -> double $ TNumeral.value nd * (-1) + _ -> Nothing + } + +ruleSum :: Rule +ruleSum = Rule + { name = "intersect 2 numbers" + , pattern = + [ numberWith (fromMaybe 0 . TNumeral.grain) (>1) + , numberWith TNumeral.multipliable not + ] + , prod = \tokens -> case tokens of + (Token Numeral (NumeralData {TNumeral.value = val1, TNumeral.grain = Just g}): + Token Numeral (NumeralData {TNumeral.value = val2}): + _) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2 + _ -> Nothing + } + +ruleSumAnd :: Rule +ruleSumAnd = Rule + { name = "intersect 2 numbers (with and)" + , pattern = + [ numberWith (fromMaybe 0 . TNumeral.grain) (>1) + , regex "e" + , numberWith TNumeral.multipliable not + ] + , prod = \tokens -> case tokens of + (Token Numeral (NumeralData {TNumeral.value = val1, TNumeral.grain = Just g}): + _: + Token Numeral (NumeralData {TNumeral.value = val2}): + _) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2 + _ -> Nothing + } + +ruleMultiply :: Rule +ruleMultiply = Rule + { name = "compose by multiplication" + , pattern = + [ dimension Numeral + , numberWith TNumeral.multipliable id + ] + , prod = \tokens -> case tokens of + (token1:token2:_) -> multiply token1 token2 _ -> Nothing } rules :: [Rule] rules = - [ ruleDecimalNumeral - , ruleDecimalWithThousandsSeparator + [ ruleIntegers + , ruleToNineteen + , ruleTens + , ruleCent + , rulePowersOfTen + , ruleCompositeTens + , ruleCompositeCents + , ruleSkipHundreds + , ruleDotSpelledOut + , ruleLeadingDotSpelledOut + , ruleDecimals + , ruleFractions + , ruleCommas + , ruleSuffixes + , ruleNegative + , ruleSum + , ruleDecsAnd + , ruleCentsAnd + , ruleSumAnd + , ruleMultiply , ruleDozen - , ruleIntegerNumeric - , ruleIntegerWithThousandsSeparator - , ruleNumeral - , ruleNumeral2 - , ruleNumeral3 - , ruleNumeral4 - , ruleNumeral5 - , ruleNumeral6 - , ruleNumeralDotNumeral - , ruleNumeralDozen - , ruleNumerals - , ruleNumeralsPrefixWithNegativeOrMinus - , ruleNumeralsSuffixesKMG ] diff --git a/Duckling/Ranking/Classifiers/PT_XX.hs b/Duckling/Ranking/Classifiers/PT_XX.hs index 075ec231..af5fbc49 100644 --- a/Duckling/Ranking/Classifiers/PT_XX.hs +++ b/Duckling/Ranking/Classifiers/PT_XX.hs @@ -287,6 +287,14 @@ classifiers ("named-dayintersect", -2.871679624884012), ("named-dayintersect by `da` or `de`", -1.405342556090585)], n = 20}}), + ("integer (0..19)", + Classifier{okData = + ClassData{prior = -1.9048194970694474e-2, + unseen = -3.9889840465642745, + likelihoods = HashMap.fromList [("", 0.0)], n = 52}, + koData = + ClassData{prior = -3.970291913552122, unseen = -1.0986122886681098, + likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), ("ordinals (primeiro..10)", Classifier{okData = ClassData{prior = -infinity, unseen = -0.6931471805599453, @@ -300,7 +308,7 @@ classifiers unseen = -4.02535169073515, likelihoods = HashMap.fromList - [("number (0..15)named-month", -1.927891643552635), + [("integer (0..19)named-month", -1.927891643552635), ("integer (numeric)named-month", -1.0116009116784799), ("month", -0.7114963192281418)], n = 26}, @@ -368,6 +376,15 @@ classifiers -2.1972245773362196), ("named-monthintersect by `da` or `de`", -1.791759469228055)], n = 5}}), + ("number (21..29 31..39 .. 91..99)", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.6094379124341003, + likelihoods = + HashMap.fromList [("tens (20..90)integer (0..19)", 0.0)], + n = 3}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), (" horas", Classifier{okData = ClassData{prior = -0.7731898882334817, @@ -557,10 +574,9 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -0.6061358035703156), - ("number (0..15)", -1.0116009116784799), - ("numbers prefix with -, negative or minus", - -3.0910424533583156), - ("number (20..90)", -3.0910424533583156)], + ("integer (0..19)", -1.0116009116784799), + ("negative numbers", -3.0910424533583156), + ("tens (20..90)", -3.0910424533583156)], n = 40}}), ("n[ao] ", Classifier{okData = @@ -687,14 +703,6 @@ classifiers koData = ClassData{prior = -infinity, unseen = -1.0986122886681098, likelihoods = HashMap.fromList [], n = 0}}), - ("number (0..15)", - Classifier{okData = - ClassData{prior = -1.9048194970694474e-2, - unseen = -3.9889840465642745, - likelihoods = HashMap.fromList [("", 0.0)], n = 52}, - koData = - ClassData{prior = -3.970291913552122, unseen = -1.0986122886681098, - likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), ("antes das ", Classifier{okData = ClassData{prior = 0.0, unseen = -2.4849066497880004, @@ -706,14 +714,6 @@ classifiers koData = ClassData{prior = -infinity, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [], n = 0}}), - ("numbers prefix with -, negative or minus", - Classifier{okData = - ClassData{prior = -infinity, unseen = -0.6931471805599453, - likelihoods = HashMap.fromList [], n = 0}, - koData = - ClassData{prior = 0.0, unseen = -1.791759469228055, - likelihoods = HashMap.fromList [("integer (numeric)", 0.0)], - n = 4}}), ("dd-dd (interval)", Classifier{okData = ClassData{prior = 0.0, unseen = -1.9459101490553135, @@ -761,19 +761,12 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)noon", -1.7047480922384253), - ("hour", -0.7884573603642702), - ("number (0..15)noon", -1.0116009116784799)], + ("integer (0..19)noon", -1.0116009116784799), + ("hour", -0.7884573603642702)], n = 4}, koData = ClassData{prior = -infinity, unseen = -1.3862943611198906, likelihoods = HashMap.fromList [], n = 0}}), - ("number (20..90)", - Classifier{okData = - ClassData{prior = 0.0, unseen = -1.9459101490553135, - likelihoods = HashMap.fromList [("", 0.0)], n = 5}, - koData = - ClassData{prior = -infinity, unseen = -0.6931471805599453, - likelihoods = HashMap.fromList [], n = 0}}), (" da manha", Classifier{okData = ClassData{prior = -0.262364264467491, unseen = -3.4339872044851463, @@ -811,6 +804,13 @@ classifiers koData = ClassData{prior = -infinity, unseen = -2.1972245773362196, likelihoods = HashMap.fromList [], n = 0}}), + ("fractional number", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -2.5649493574615367, + likelihoods = HashMap.fromList [("", 0.0)], n = 11}}), ("passados n ", Classifier{okData = ClassData{prior = 0.0, unseen = -2.5649493574615367, @@ -850,41 +850,41 @@ classifiers likelihoods = HashMap.fromList [("integer (numeric)", -1.0986122886681098), - ("number (0..15)", -0.46262352194811296)], + ("integer (0..19)", -0.46262352194811296)], n = 24}, koData = ClassData{prior = -0.4999559515290868, unseen = -3.713572066704308, likelihoods = HashMap.fromList [("integer (numeric)", -0.5108256237659907), - ("number (0..15)", -1.0498221244986778), - ("number (20..90)", -2.995732273553991)], + ("integer (0..19)", -1.0498221244986778), + ("tens (20..90)", -2.995732273553991)], n = 37}}), (" and ", Classifier{okData = ClassData{prior = -0.2719337154836418, unseen = -3.713572066704308, likelihoods = HashMap.fromList - [("\224s number (0..15)", -2.3025850929940455), - ("time-of-day (latent)number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)", + [("\224s tens (20..90)", -2.5902671654458267), + ("time-of-day (latent)integer (0..19)", -2.0794415416798357), + ("time-of-day (latent)tens (20..90)", -2.5902671654458267), + ("nooninteger (0..19)", -2.995732273553991), + ("time-of-day (latent)number (21..29 31..39 .. 91..99)", -2.5902671654458267), - ("\224s number (20..90)", -2.5902671654458267), - ("\224s number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)", - -2.5902671654458267), - ("time-of-day (latent)number (0..15)", -2.0794415416798357), ("hour", -0.8556661100577202), - ("time-of-day (latent)number (20..90)", -2.5902671654458267), - ("noonnumber (0..15)", -2.995732273553991)], + ("\224s number (21..29 31..39 .. 91..99)", + -2.5902671654458267), + ("\224s integer (0..19)", -2.3025850929940455)], n = 16}, koData = ClassData{prior = -1.4350845252893227, unseen = -2.9444389791664407, likelihoods = HashMap.fromList - [("\224s number (20..90)", -1.791759469228055), - ("time-of-day (latent)number (0..15)", -2.1972245773362196), - ("hour", -1.0986122886681098), - ("time-of-day (latent)number (20..90)", -1.791759469228055)], + [("\224s tens (20..90)", -1.791759469228055), + ("time-of-day (latent)integer (0..19)", -2.1972245773362196), + ("time-of-day (latent)tens (20..90)", -1.791759469228055), + ("hour", -1.0986122886681098)], n = 5}}), ("year", Classifier{okData = @@ -901,62 +901,61 @@ classifiers ClassData{prior = -0.6443570163905132, unseen = -4.174387269895637, likelihoods = HashMap.fromList - [("week", -2.772588722239781), - ("number (0..15)ano (grain)", -3.0602707946915624), + [("number (21..29 31..39 .. 91..99)hora (grain)", + -3.4657359027997265), + ("week", -2.772588722239781), + ("integer (0..19)segundo (grain)", -3.4657359027997265), ("integer (numeric)hora (grain)", -3.4657359027997265), ("integer (numeric)dia (grain)", -3.4657359027997265), - ("number (0..15)segundo (grain)", -3.4657359027997265), + ("integer (0..19)ano (grain)", -3.0602707946915624), ("second", -3.4657359027997265), + ("integer (0..19)semana (grain)", -2.772588722239781), ("integer (numeric)ano (grain)", -3.4657359027997265), + ("integer (0..19)mes (grain)", -3.0602707946915624), + ("integer (0..19)hora (grain)", -2.772588722239781), ("integer (numeric)minutos (grain)", -2.772588722239781), - ("number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)hora (grain)", - -3.4657359027997265), ("day", -3.0602707946915624), ("year", -2.772588722239781), - ("number (0..15)mes (grain)", -3.0602707946915624), - ("number (0..15)hora (grain)", -2.772588722239781), + ("integer (0..19)minutos (grain)", -3.0602707946915624), ("hour", -2.367123614131617), ("month", -3.0602707946915624), - ("number (0..15)dia (grain)", -3.4657359027997265), - ("number (0..15)minutos (grain)", -3.0602707946915624), ("minute", -2.367123614131617), - ("number (0..15)semana (grain)", -2.772588722239781)], + ("integer (0..19)dia (grain)", -3.4657359027997265)], n = 21}, koData = ClassData{prior = -0.7444404749474959, unseen = -4.110873864173311, likelihoods = HashMap.fromList [("week", -2.995732273553991), - ("number (0..15)ano (grain)", -3.4011973816621555), ("integer (numeric)hora (grain)", -2.70805020110221), ("integer (numeric)dia (grain)", -2.995732273553991), + ("integer (0..19)ano (grain)", -3.4011973816621555), ("integer (numeric)mes (grain)", -3.4011973816621555), ("second", -2.995732273553991), + ("integer (0..19)semana (grain)", -3.4011973816621555), ("integer (numeric)semana (grain)", -3.4011973816621555), ("integer (numeric)ano (grain)", -2.995732273553991), + ("integer (0..19)mes (grain)", -2.995732273553991), + ("integer (0..19)hora (grain)", -2.995732273553991), ("integer (numeric)minutos (grain)", -2.995732273553991), ("day", -2.995732273553991), ("integer (numeric)segundo (grain)", -2.995732273553991), - ("year", -2.70805020110221), - ("number (0..15)mes (grain)", -2.995732273553991), - ("number (0..15)hora (grain)", -2.995732273553991), - ("hour", -2.3025850929940455), ("month", -2.70805020110221), - ("minute", -2.995732273553991), - ("number (0..15)semana (grain)", -3.4011973816621555)], + ("year", -2.70805020110221), ("hour", -2.3025850929940455), + ("month", -2.70805020110221), ("minute", -2.995732273553991)], n = 19}}), ("proximas n ", Classifier{okData = ClassData{prior = 0.0, unseen = -3.2188758248682006, likelihoods = HashMap.fromList - [("number (0..15)ano (grain)", -2.4849066497880004), - ("integer (numeric)hora (grain)", -2.4849066497880004), + [("integer (numeric)hora (grain)", -2.4849066497880004), ("integer (numeric)dia (grain)", -2.4849066497880004), + ("integer (0..19)ano (grain)", -2.4849066497880004), ("second", -2.4849066497880004), + ("integer (0..19)mes (grain)", -2.4849066497880004), ("integer (numeric)minutos (grain)", -2.4849066497880004), ("day", -2.4849066497880004), ("integer (numeric)segundo (grain)", -2.4849066497880004), - ("year", -2.4849066497880004), - ("number (0..15)mes (grain)", -2.4849066497880004), - ("hour", -2.4849066497880004), ("month", -2.4849066497880004), + ("year", -2.4849066497880004), ("hour", -2.4849066497880004), + ("month", -2.4849066497880004), ("minute", -2.4849066497880004)], n = 6}, koData = @@ -1007,15 +1006,6 @@ classifiers koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), - ("number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)", - Classifier{okData = - ClassData{prior = 0.0, unseen = -1.6094379124341003, - likelihoods = - HashMap.fromList [("number (20..90)number (0..15)", 0.0)], - n = 3}, - koData = - ClassData{prior = -infinity, unseen = -0.6931471805599453, - likelihoods = HashMap.fromList [], n = 0}}), ("day of month (1st)", Classifier{okData = ClassData{prior = 0.0, unseen = -1.6094379124341003, @@ -1108,9 +1098,8 @@ classifiers [("week", -1.791759469228055), ("integer (numeric)semana (grain)", -1.791759469228055), ("integer (numeric)ano (grain)", -1.791759469228055), - ("year", -1.791759469228055), - ("number (0..15)mes (grain)", -1.791759469228055), - ("month", -1.791759469228055)], + ("integer (0..19)mes (grain)", -1.791759469228055), + ("year", -1.791759469228055), ("month", -1.791759469228055)], n = 3}, koData = ClassData{prior = -infinity, unseen = -1.9459101490553135, @@ -1146,6 +1135,14 @@ classifiers koData = ClassData{prior = -infinity, unseen = -1.6094379124341003, likelihoods = HashMap.fromList [], n = 0}}), + ("negative numbers", + Classifier{okData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}, + koData = + ClassData{prior = 0.0, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("integer (numeric)", 0.0)], + n = 4}}), (" - (interval)", Classifier{okData = ClassData{prior = -1.791759469228055, unseen = -3.1354942159291497, @@ -1189,6 +1186,13 @@ classifiers koData = ClassData{prior = -infinity, unseen = -0.6931471805599453, likelihoods = HashMap.fromList [], n = 0}}), + ("tens (20..90)", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.9459101490553135, + likelihoods = HashMap.fromList [("", 0.0)], n = 5}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), ("dia (grain)", Classifier{okData = ClassData{prior = -0.5596157879354228, unseen = -1.791759469228055, @@ -1313,10 +1317,10 @@ classifiers likelihoods = HashMap.fromList [("week", -1.791759469228055), ("second", -1.791759469228055), + ("integer (0..19)semana (grain)", -1.791759469228055), ("integer (numeric)minutos (grain)", -1.791759469228055), ("integer (numeric)segundo (grain)", -1.791759469228055), - ("minute", -1.791759469228055), - ("number (0..15)semana (grain)", -1.791759469228055)], + ("minute", -1.791759469228055)], n = 3}, koData = ClassData{prior = -infinity, unseen = -1.9459101490553135,