Numeral/ZH: support more common expressions (#516-1) (#522)

Summary:
**1st set of changes from pull request https://github.com/facebook/duckling/issues/516

Supporting more common expressions, such as fraction, half, dozen, in Chinese.

Pull Request resolved: https://github.com/facebook/duckling/pull/522

Reviewed By: patapizza

Differential Revision: D23428893

Pulled By: chessai

fbshipit-source-id: 3454ac70a4bfff90dc282560916a0fae9969f521
This commit is contained in:
kckckcng 2021-01-21 21:05:56 -08:00 committed by Facebook GitHub Bot
parent e9e5507820
commit f2798021b6
7 changed files with 356 additions and 116 deletions

View File

@ -35,67 +35,97 @@ allExamples = concat
, ""
, "一个"
, "1个"
, ""
]
, examples (NumeralValue 2)
[ "2"
, "二個"
, "二个"
, ""
, "一對"
, "一雙"
]
, examples (NumeralValue 10)
[ "10"
, ""
, ""
, "五對"
, "五雙"
]
, examples (NumeralValue 11)
[ "11"
, "十一"
, "拾壹"
]
, examples (NumeralValue 20)
[ "20"
, "二十"
, "貳拾"
, "廿"
]
, examples (NumeralValue 60)
[ "60"
, "六十"
, "陸拾"
, "五打"
]
, examples (NumeralValue 33)
[ "33"
, "三十三"
, "參拾參"
, "卅三"
]
, examples (NumeralValue 96)
[ "96"
, "九十六"
, "玖拾陸"
, "八打"
]
, examples (NumeralValue 203)
[ "203"
, "二百零三"
, "貳佰零參"
]
, examples (NumeralValue 534)
[ "534"
, "五百三十四"
, "伍佰參拾肆"
, "五百卅四"
]
, examples (NumeralValue 34567)
[ "34567"
, "34,567"
, "三万四千五百六十七"
, "三萬四千五百六十七"
, "參萬肆仟伍佰陸拾柒"
]
, examples (NumeralValue 10040)
[ "10040"
, "10,040"
, "一万零四十"
, "一萬零四十"
, "壹萬零肆拾"
, "一萬零卌"
]
, examples (NumeralValue 1.1)
[ "1.1"
, "1.10"
, "01.10"
, "一點一"
, "十分之十一"
, "十分十一"
]
, examples (NumeralValue 0.77)
[ "0.77"
, ".77"
, "零點77"
]
, examples (NumeralValue 34507)
[ "34507"
, "34,507"
, "三万四千五百零七"
, "三萬四千五百零七"
, "參萬肆仟伍佰零柒"
]
, examples (NumeralValue 100000)
[ "100,000"
@ -103,18 +133,24 @@ allExamples = concat
, "100K"
, "100k"
, "十万"
, "十萬"
, "拾萬"
]
, examples (NumeralValue 3000000)
[ "3M"
, "3000000"
, "3,000,000"
, "三百万"
, "三百萬"
, "參佰萬"
]
, examples (NumeralValue 1040000)
[ "1,040,000"
, "1040000"
, "1.04M"
, "一百零四万"
, "一百零四萬"
, "壹佰零肆萬"
]
, examples (NumeralValue 1200000)
[ "1,200,000"
@ -122,6 +158,8 @@ allExamples = concat
, "1.2M"
, ".0012G"
, "一百二十万"
, "一百二十萬"
, "壹佰貳拾萬"
]
, examples (NumeralValue (-1200000))
[ "- 1,200,000"
@ -135,5 +173,17 @@ allExamples = concat
, "-1200K"
, "-.0012G"
, "负一百二十万"
, "負一百二十萬"
, "負壹佰貳拾萬"
]
, examples (NumeralValue 0.5)
[ "0.5"
, "一半"
, "1半"
, "半个"
, "半個"
, "零點五"
, "二分之一"
, "二分一"
]
]

View File

@ -31,7 +31,7 @@ ruleInteger :: Rule
ruleInteger = Rule
{ name = "integer (0..10)"
, pattern =
[ regex "(|零|一|二|两|兩|三|四|五|六|七|八|九|十)"
[ regex "(|零|一|二|两|兩|三|四|五|六|七|八|九|十|壹|貳|參|肆|伍|陸|柒|捌|玖|拾)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
@ -44,19 +44,63 @@ integerMap = HashMap.fromList
[ ( "", 0 )
, ( "", 0 )
, ( "", 1 )
, ( "", 1 )
, ( "", 2 )
, ( "", 2 )
, ( "", 2 )
, ( "", 2 )
, ( "", 3 )
, ( "", 3 )
, ( "", 4 )
, ( "", 4 )
, ( "", 5 )
, ( "", 5 )
, ( "", 6 )
, ( "", 6 )
, ( "", 7 )
, ( "", 7 )
, ( "", 8 )
, ( "", 8 )
, ( "", 9 )
, ( "", 9 )
, ( "", 10 )
, ( "", 10 )
]
tensMap :: HashMap.HashMap Text Integer
tensMap = HashMap.fromList
[ ( "廿" , 20 )
, ( "" , 30 )
, ( "" , 40 )
]
ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20,30,40)"
, pattern =
[ regex "(廿|卅|卌)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer 21..49"
, pattern =
[ oneOf [20,30,40]
, regex "[\\s\\-]+"
, numberBetween 1 10
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = tens}:
_:
Token Numeral NumeralData{TNumeral.value = units}:
_) -> double $ tens + units
_ -> Nothing
}
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
@ -93,6 +137,36 @@ ruleDecimalNumeral = Rule
_ -> Nothing
}
ruleDotSpelledOut :: Rule
ruleDotSpelledOut = Rule
{ name = "one point 2"
, pattern =
[ dimension Numeral
, regex ""
, Predicate $ not . hasGrain
]
, prod = \tokens -> case tokens of
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleFraction :: Rule
ruleFraction = Rule
{ name = "fraction"
, pattern =
[ dimension Numeral
, regex "分之|分"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = v1}:
_:
Token Numeral NumeralData{TNumeral.value = v2}:
_) -> double $ v2 / v1
_ -> Nothing
}
ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "<number>个/個"
@ -105,16 +179,45 @@ ruleNumeral = Rule
_ -> Nothing
}
ruleHalf :: Rule
ruleHalf = Rule
{ name = "half"
, pattern =
[ regex "半|1半|一半|半个|半個"
]
, prod = \case
(_:_) -> double 0.5 >>= withMultipliable
_ -> Nothing
}
ruleDozen :: Rule
ruleDozen = Rule
{ name = "a dozen of"
, pattern =
[ regex ""
]
, prod = \_ -> integer 12 >>= withMultipliable >>= notOkForAnyTime
}
rulePair :: Rule
rulePair = Rule
{ name = "a pair"
, pattern =
[ regex "雙|對"
]
, prod = \_ -> integer 2 >>= withMultipliable >>= notOkForAnyTime
}
numeralSuffixList :: [(Text, Maybe Token)]
numeralSuffixList =
[ ("K", double 1e3 >>= withGrain 3 >>= withMultipliable)
, ("M", double 1e6 >>= withGrain 6 >>= withMultipliable)
, ("G", double 1e9 >>= withGrain 9 >>= withMultipliable)
, ("", double 1e1 >>= withGrain 1 >>= withMultipliable)
, ("", double 1e2 >>= withGrain 2 >>= withMultipliable)
, ("", double 1e3 >>= withGrain 3 >>= withMultipliable)
, ("", double 1e4 >>= withGrain 4 >>= withMultipliable)
, ("亿", double 1e8 >>= withGrain 8 >>= withMultipliable)
, ("|拾", double 1e1 >>= withGrain 1 >>= withMultipliable)
, ("|佰", double 1e2 >>= withGrain 2 >>= withMultipliable)
, ("|仟", double 1e3 >>= withGrain 3 >>= withMultipliable)
, ("|萬", double 1e4 >>= withGrain 4 >>= withMultipliable)
, ("亿|億", double 1e8 >>= withGrain 8 >>= withMultipliable)
]
ruleNumeralSuffixes :: [Rule]
@ -207,5 +310,12 @@ rules =
, ruleNumeralsIntersectNonconsectiveUnit
, ruleNumeralsPrefixWithNegativeOrMinus
, ruleMultiply
, ruleTens
, ruleCompositeTens
, ruleHalf
, ruleDotSpelledOut
, ruleDozen
, rulePair
, ruleFraction
]
++ ruleNumeralSuffixes

View File

@ -177,15 +177,6 @@ classifiers
ClassData{prior = -1.6094379124341003,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \21313",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("Wednesday",
Classifier{okData =
ClassData{prior = -5.715841383994864e-2,
@ -793,13 +784,14 @@ classifiers
ClassData{prior = 0.0, unseen = -6.12029741895095,
likelihoods =
HashMap.fromList
[("week", -2.8992213731731473),
[("number suffix: \21313|\25342month (grain)",
-4.508659285607248),
("week", -2.8992213731731473),
("integer (0..10)month (grain)", -2.7168998163791924),
("integer (0..10)hour (grain)", -3.1736582188749076),
("<number>\20010/\20491week (grain)", -3.720201925242977),
("second", -3.4790398684260895),
("integer (0..10)day (grain)", -2.982602982112198),
("number suffix: \21313month (grain)", -4.508659285607248),
("integer (0..10)year (grain)", -3.8155121050473024),
("<number>\20010/\20491month (grain)", -3.3455084758015667),
("integer (numeric)year (grain)", -2.246896187133457),
@ -844,13 +836,6 @@ classifiers
[("<integer> (latent time-of-day)", -0.916290731874155),
("hour", -0.916290731874155)],
n = 3}}),
("number suffix: \19975",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("relative minutes after|past <integer> (hour-of-day)",
Classifier{okData =
ClassData{prior = -0.6931471805599453,
@ -866,12 +851,26 @@ classifiers
unseen = -3.2188758248682006,
likelihoods =
HashMap.fromList
[("<integer> (latent time-of-day)number suffix: \21313",
[("<integer> (latent time-of-day)number suffix: \21313|\25342",
-1.3862943611198906),
("<integer> (latent time-of-day)integer (0..10)",
-1.3862943611198906),
("hour", -0.7801585575495751)],
n = 10}}),
("one point 2",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.772588722239781,
likelihoods =
HashMap.fromList
[("integer (0..10)integer (0..10)", -1.3217558399823195),
("integer (0..10)number suffix: \21313|\25342",
-1.3217558399823195),
("integer (0..10)integer with consecutive unit modifiers",
-0.7621400520468967)],
n = 12}}),
("intersect by \",\"",
Classifier{okData =
ClassData{prior = -0.40546510810816444,
@ -908,7 +907,8 @@ classifiers
ClassData{prior = 0.0, unseen = -3.367295829986474,
likelihoods =
HashMap.fromList
[("number suffix: \21313integer (0..10)", -0.6931471805599453),
[("number suffix: \21313|\25342integer (0..10)",
-0.6931471805599453),
("integer (0..10)integer (0..10)", -0.6931471805599453)],
n = 26},
koData =
@ -1152,11 +1152,11 @@ classifiers
("Marchinteger (numeric)", -3.0910424533583156),
("Aprilinteger (numeric)", -3.6018680771243066),
("Februaryinteger (0..10)", -2.6210388241125804),
("Februarynumber suffix: \21313|\25342", -2.6210388241125804),
("month", -0.7492366472109889),
("Februaryinteger (numeric)", -2.503255788456197),
("Februaryinteger with consecutive unit modifiers",
-1.8672670217362002),
("Februarynumber suffix: \21313", -2.6210388241125804)],
-1.8672670217362002)],
n = 51},
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
@ -1177,6 +1177,13 @@ classifiers
ClassData{prior = -0.6931471805599453,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \19975|\33836",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3}}),
("\22823\25995\39318\26085",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -1272,6 +1279,15 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number suffix: \21313|\25342",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("\22320\29699\19968\23567\26102",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,

View File

@ -177,15 +177,6 @@ classifiers
ClassData{prior = -1.6094379124341003,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \21313",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("Wednesday",
Classifier{okData =
ClassData{prior = -5.715841383994864e-2,
@ -793,13 +784,14 @@ classifiers
ClassData{prior = 0.0, unseen = -6.12029741895095,
likelihoods =
HashMap.fromList
[("week", -2.8992213731731473),
[("number suffix: \21313|\25342month (grain)",
-4.508659285607248),
("week", -2.8992213731731473),
("integer (0..10)month (grain)", -2.7168998163791924),
("integer (0..10)hour (grain)", -3.1736582188749076),
("<number>\20010/\20491week (grain)", -3.720201925242977),
("second", -3.4790398684260895),
("integer (0..10)day (grain)", -2.982602982112198),
("number suffix: \21313month (grain)", -4.508659285607248),
("integer (0..10)year (grain)", -3.8155121050473024),
("<number>\20010/\20491month (grain)", -3.3455084758015667),
("integer (numeric)year (grain)", -2.246896187133457),
@ -844,13 +836,6 @@ classifiers
[("<integer> (latent time-of-day)", -0.916290731874155),
("hour", -0.916290731874155)],
n = 3}}),
("number suffix: \19975",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("relative minutes after|past <integer> (hour-of-day)",
Classifier{okData =
ClassData{prior = -0.6931471805599453,
@ -866,12 +851,26 @@ classifiers
unseen = -3.2188758248682006,
likelihoods =
HashMap.fromList
[("<integer> (latent time-of-day)number suffix: \21313",
[("<integer> (latent time-of-day)number suffix: \21313|\25342",
-1.3862943611198906),
("<integer> (latent time-of-day)integer (0..10)",
-1.3862943611198906),
("hour", -0.7801585575495751)],
n = 10}}),
("one point 2",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.772588722239781,
likelihoods =
HashMap.fromList
[("integer (0..10)integer (0..10)", -1.3217558399823195),
("integer (0..10)number suffix: \21313|\25342",
-1.3217558399823195),
("integer (0..10)integer with consecutive unit modifiers",
-0.7621400520468967)],
n = 12}}),
("intersect by \",\"",
Classifier{okData =
ClassData{prior = -0.40546510810816444,
@ -908,7 +907,8 @@ classifiers
ClassData{prior = 0.0, unseen = -3.367295829986474,
likelihoods =
HashMap.fromList
[("number suffix: \21313integer (0..10)", -0.6931471805599453),
[("number suffix: \21313|\25342integer (0..10)",
-0.6931471805599453),
("integer (0..10)integer (0..10)", -0.6931471805599453)],
n = 26},
koData =
@ -1152,11 +1152,11 @@ classifiers
("Marchinteger (numeric)", -3.0910424533583156),
("Aprilinteger (numeric)", -3.6018680771243066),
("Februaryinteger (0..10)", -2.6210388241125804),
("Februarynumber suffix: \21313|\25342", -2.6210388241125804),
("month", -0.7492366472109889),
("Februaryinteger (numeric)", -2.503255788456197),
("Februaryinteger with consecutive unit modifiers",
-1.8672670217362002),
("Februarynumber suffix: \21313", -2.6210388241125804)],
-1.8672670217362002)],
n = 51},
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
@ -1177,6 +1177,13 @@ classifiers
ClassData{prior = -0.6931471805599453,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \19975|\33836",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3}}),
("\22823\25995\39318\26085",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -1272,6 +1279,15 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number suffix: \21313|\25342",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("\22320\29699\19968\23567\26102",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,

View File

@ -177,15 +177,6 @@ classifiers
ClassData{prior = -1.6094379124341003,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \21313",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("Wednesday",
Classifier{okData =
ClassData{prior = -5.715841383994864e-2,
@ -793,13 +784,14 @@ classifiers
ClassData{prior = 0.0, unseen = -6.12029741895095,
likelihoods =
HashMap.fromList
[("week", -2.8992213731731473),
[("number suffix: \21313|\25342month (grain)",
-4.508659285607248),
("week", -2.8992213731731473),
("integer (0..10)month (grain)", -2.7168998163791924),
("integer (0..10)hour (grain)", -3.1736582188749076),
("<number>\20010/\20491week (grain)", -3.720201925242977),
("second", -3.4790398684260895),
("integer (0..10)day (grain)", -2.982602982112198),
("number suffix: \21313month (grain)", -4.508659285607248),
("integer (0..10)year (grain)", -3.8155121050473024),
("<number>\20010/\20491month (grain)", -3.3455084758015667),
("integer (numeric)year (grain)", -2.246896187133457),
@ -844,13 +836,6 @@ classifiers
[("<integer> (latent time-of-day)", -0.916290731874155),
("hour", -0.916290731874155)],
n = 3}}),
("number suffix: \19975",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("relative minutes after|past <integer> (hour-of-day)",
Classifier{okData =
ClassData{prior = -0.6931471805599453,
@ -866,12 +851,26 @@ classifiers
unseen = -3.2188758248682006,
likelihoods =
HashMap.fromList
[("<integer> (latent time-of-day)number suffix: \21313",
[("<integer> (latent time-of-day)number suffix: \21313|\25342",
-1.3862943611198906),
("<integer> (latent time-of-day)integer (0..10)",
-1.3862943611198906),
("hour", -0.7801585575495751)],
n = 10}}),
("one point 2",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.772588722239781,
likelihoods =
HashMap.fromList
[("integer (0..10)integer (0..10)", -1.3217558399823195),
("integer (0..10)number suffix: \21313|\25342",
-1.3217558399823195),
("integer (0..10)integer with consecutive unit modifiers",
-0.7621400520468967)],
n = 12}}),
("intersect by \",\"",
Classifier{okData =
ClassData{prior = -0.40546510810816444,
@ -908,7 +907,8 @@ classifiers
ClassData{prior = 0.0, unseen = -3.367295829986474,
likelihoods =
HashMap.fromList
[("number suffix: \21313integer (0..10)", -0.6931471805599453),
[("number suffix: \21313|\25342integer (0..10)",
-0.6931471805599453),
("integer (0..10)integer (0..10)", -0.6931471805599453)],
n = 26},
koData =
@ -1152,11 +1152,11 @@ classifiers
("Marchinteger (numeric)", -3.0910424533583156),
("Aprilinteger (numeric)", -3.6018680771243066),
("Februaryinteger (0..10)", -2.6210388241125804),
("Februarynumber suffix: \21313|\25342", -2.6210388241125804),
("month", -0.7492366472109889),
("Februaryinteger (numeric)", -2.503255788456197),
("Februaryinteger with consecutive unit modifiers",
-1.8672670217362002),
("Februarynumber suffix: \21313", -2.6210388241125804)],
-1.8672670217362002)],
n = 51},
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
@ -1177,6 +1177,13 @@ classifiers
ClassData{prior = -0.6931471805599453,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \19975|\33836",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3}}),
("\22823\25995\39318\26085",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -1272,6 +1279,15 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number suffix: \21313|\25342",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("\22320\29699\19968\23567\26102",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,

View File

@ -177,15 +177,6 @@ classifiers
ClassData{prior = -1.6094379124341003,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \21313",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("Wednesday",
Classifier{okData =
ClassData{prior = -5.715841383994864e-2,
@ -793,13 +784,14 @@ classifiers
ClassData{prior = 0.0, unseen = -6.12029741895095,
likelihoods =
HashMap.fromList
[("week", -2.8992213731731473),
[("number suffix: \21313|\25342month (grain)",
-4.508659285607248),
("week", -2.8992213731731473),
("integer (0..10)month (grain)", -2.7168998163791924),
("integer (0..10)hour (grain)", -3.1736582188749076),
("<number>\20010/\20491week (grain)", -3.720201925242977),
("second", -3.4790398684260895),
("integer (0..10)day (grain)", -2.982602982112198),
("number suffix: \21313month (grain)", -4.508659285607248),
("integer (0..10)year (grain)", -3.8155121050473024),
("<number>\20010/\20491month (grain)", -3.3455084758015667),
("integer (numeric)year (grain)", -2.246896187133457),
@ -844,13 +836,6 @@ classifiers
[("<integer> (latent time-of-day)", -0.916290731874155),
("hour", -0.916290731874155)],
n = 3}}),
("number suffix: \19975",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("relative minutes after|past <integer> (hour-of-day)",
Classifier{okData =
ClassData{prior = -0.6931471805599453,
@ -866,12 +851,26 @@ classifiers
unseen = -3.2188758248682006,
likelihoods =
HashMap.fromList
[("<integer> (latent time-of-day)number suffix: \21313",
[("<integer> (latent time-of-day)number suffix: \21313|\25342",
-1.3862943611198906),
("<integer> (latent time-of-day)integer (0..10)",
-1.3862943611198906),
("hour", -0.7801585575495751)],
n = 10}}),
("one point 2",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.772588722239781,
likelihoods =
HashMap.fromList
[("integer (0..10)integer (0..10)", -1.3217558399823195),
("integer (0..10)number suffix: \21313|\25342",
-1.3217558399823195),
("integer (0..10)integer with consecutive unit modifiers",
-0.7621400520468967)],
n = 12}}),
("intersect by \",\"",
Classifier{okData =
ClassData{prior = -0.40546510810816444,
@ -908,7 +907,8 @@ classifiers
ClassData{prior = 0.0, unseen = -3.367295829986474,
likelihoods =
HashMap.fromList
[("number suffix: \21313integer (0..10)", -0.6931471805599453),
[("number suffix: \21313|\25342integer (0..10)",
-0.6931471805599453),
("integer (0..10)integer (0..10)", -0.6931471805599453)],
n = 26},
koData =
@ -1152,11 +1152,11 @@ classifiers
("Marchinteger (numeric)", -3.0910424533583156),
("Aprilinteger (numeric)", -3.6018680771243066),
("Februaryinteger (0..10)", -2.6210388241125804),
("Februarynumber suffix: \21313|\25342", -2.6210388241125804),
("month", -0.7492366472109889),
("Februaryinteger (numeric)", -2.503255788456197),
("Februaryinteger with consecutive unit modifiers",
-1.8672670217362002),
("Februarynumber suffix: \21313", -2.6210388241125804)],
-1.8672670217362002)],
n = 51},
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
@ -1177,6 +1177,13 @@ classifiers
ClassData{prior = -0.6931471805599453,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \19975|\33836",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3}}),
("\22823\25995\39318\26085",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -1272,6 +1279,15 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number suffix: \21313|\25342",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("\22320\29699\19968\23567\26102",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,

View File

@ -168,15 +168,6 @@ classifiers
[("<integer> (latent time-of-day)", -0.6931471805599453),
("hour", -0.6931471805599453)],
n = 5}}),
("number suffix: \21313",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("Wednesday",
Classifier{okData =
ClassData{prior = -5.715841383994864e-2,
@ -783,13 +774,14 @@ classifiers
ClassData{prior = 0.0, unseen = -6.12029741895095,
likelihoods =
HashMap.fromList
[("week", -2.8992213731731473),
[("number suffix: \21313|\25342month (grain)",
-4.508659285607248),
("week", -2.8992213731731473),
("integer (0..10)month (grain)", -2.7168998163791924),
("integer (0..10)hour (grain)", -3.1736582188749076),
("<number>\20010/\20491week (grain)", -3.720201925242977),
("second", -3.4790398684260895),
("integer (0..10)day (grain)", -2.982602982112198),
("number suffix: \21313month (grain)", -4.508659285607248),
("integer (0..10)year (grain)", -3.8155121050473024),
("<number>\20010/\20491month (grain)", -3.3455084758015667),
("integer (numeric)year (grain)", -2.246896187133457),
@ -834,13 +826,6 @@ classifiers
[("<integer> (latent time-of-day)", -0.916290731874155),
("hour", -0.916290731874155)],
n = 3}}),
("number suffix: \19975",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("relative minutes after|past <integer> (hour-of-day)",
Classifier{okData =
ClassData{prior = -0.6931471805599453,
@ -856,12 +841,26 @@ classifiers
unseen = -3.2188758248682006,
likelihoods =
HashMap.fromList
[("<integer> (latent time-of-day)number suffix: \21313",
[("<integer> (latent time-of-day)number suffix: \21313|\25342",
-1.3862943611198906),
("<integer> (latent time-of-day)integer (0..10)",
-1.3862943611198906),
("hour", -0.7801585575495751)],
n = 10}}),
("one point 2",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.772588722239781,
likelihoods =
HashMap.fromList
[("integer (0..10)integer (0..10)", -1.3217558399823195),
("integer (0..10)number suffix: \21313|\25342",
-1.3217558399823195),
("integer (0..10)integer with consecutive unit modifiers",
-0.7621400520468967)],
n = 12}}),
("intersect by \",\"",
Classifier{okData =
ClassData{prior = -0.40546510810816444,
@ -898,7 +897,8 @@ classifiers
ClassData{prior = 0.0, unseen = -3.367295829986474,
likelihoods =
HashMap.fromList
[("number suffix: \21313integer (0..10)", -0.6931471805599453),
[("number suffix: \21313|\25342integer (0..10)",
-0.6931471805599453),
("integer (0..10)integer (0..10)", -0.6931471805599453)],
n = 26},
koData =
@ -1142,11 +1142,11 @@ classifiers
("Marchinteger (numeric)", -3.0910424533583156),
("Aprilinteger (numeric)", -3.6018680771243066),
("Februaryinteger (0..10)", -2.6210388241125804),
("Februarynumber suffix: \21313|\25342", -2.6210388241125804),
("month", -0.7492366472109889),
("Februaryinteger (numeric)", -2.503255788456197),
("Februaryinteger with consecutive unit modifiers",
-1.8672670217362002),
("Februarynumber suffix: \21313", -2.6210388241125804)],
-1.8672670217362002)],
n = 51},
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
@ -1167,6 +1167,13 @@ classifiers
ClassData{prior = -0.6931471805599453,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("number suffix: \19975|\33836",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3}}),
("\22823\25995\39318\26085",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -1262,6 +1269,15 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number suffix: \21313|\25342",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.0910424533583156,
likelihoods = HashMap.fromList [("", 0.0)], n = 20},
koData =
ClassData{prior = -1.6094379124341003,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}}),
("\22320\29699\19968\23567\26102",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,