Add isArabic rule (#577)

Summary:
Fixes https://github.com/facebook/duckling/issues/437, fixes https://github.com/facebook/duckling/issues/571

Pull Request resolved: https://github.com/facebook/duckling/pull/577

Reviewed By: stroxler

Differential Revision: D29664126

Pulled By: chessai

fbshipit-source-id: b6365699231527b0869322c798e32a21328f1071
This commit is contained in:
Amr Keleg 2021-07-12 13:27:56 -07:00 committed by Facebook GitHub Bot
parent ed291c2a3a
commit 79ac8f63f9
7 changed files with 647 additions and 630 deletions

View File

@ -110,6 +110,69 @@ ruleInteger21 = Rule
_ -> Nothing _ -> Nothing
} }
ruleInteger300 :: Rule
ruleInteger300 = Rule
{ name = "integer 300"
, pattern =
[ regex "(ثلاث)ما?[ئي][ةه]"
]
, prod = \_ -> integer 300
}
ruleInteger400 :: Rule
ruleInteger400 = Rule
{ name = "integer 400"
, pattern =
[ regex "([أا]ربع)ما?[ئي][ةه]"
]
, prod = \_ -> integer 400
}
ruleInteger500 :: Rule
ruleInteger500 = Rule
{ name = "integer 500"
, pattern =
[ regex "(خمس)ما?[ئي][ةه]"
]
, prod = \_ -> integer 500
}
ruleInteger600 :: Rule
ruleInteger600 = Rule
{ name = "integer 600"
, pattern =
[ regex "(ست)ما?[ئي][ةه]"
]
, prod = \_ -> integer 600
}
ruleInteger700 :: Rule
ruleInteger700 = Rule
{ name = "integer 700"
, pattern =
[ regex "(سبع)ما?[ئي][ةه]"
]
, prod = \_ -> integer 700
}
ruleInteger800 :: Rule
ruleInteger800 = Rule
{ name = "integer 800"
, pattern =
[ regex "(ثمان[ي]?)ما?[ئي][ةه]"
]
, prod = \_ -> integer 800
}
ruleInteger900 :: Rule
ruleInteger900 = Rule
{ name = "integer 900"
, pattern =
[ regex "(تسع)ما?[ئي][ةه]"
]
, prod = \_ -> integer 900
}
ruleDecimalWithThousandsSeparator :: Rule ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator" { name = "decimal with thousands separator"
@ -402,6 +465,13 @@ rules =
, ruleInteger5 , ruleInteger5
, ruleInteger7 , ruleInteger7
, ruleInteger9 , ruleInteger9
, ruleInteger300
, ruleInteger400
, ruleInteger500
, ruleInteger600
, ruleInteger700
, ruleInteger800
, ruleInteger900
, ruleIntegerWithThousandsSeparator , ruleIntegerWithThousandsSeparator
, ruleMultiply , ruleMultiply
, ruleNumeralDotNumeral , ruleNumeralDotNumeral

View File

@ -9,6 +9,7 @@
module Duckling.Quantity.AR.Corpus module Duckling.Quantity.AR.Corpus
( corpus ( corpus
, negativeCorpus
) where ) where
import Data.String import Data.String
@ -19,6 +20,15 @@ import Duckling.Quantity.Types
import Duckling.Resolve import Duckling.Resolve
import Duckling.Testing.Types import Duckling.Testing.Types
negativeCorpus :: NegativeCorpus
negativeCorpus = (testContext, testOptions, examples)
where
examples =
[ "جمبري"
, "جمهور"
, "غمامة"
]
corpus :: Corpus corpus :: Corpus
corpus = (testContext {locale = makeLocale AR Nothing}, testOptions, allExamples) corpus = (testContext {locale = makeLocale AR Nothing}, testOptions, allExamples)

View File

@ -65,13 +65,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906, ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("noon|midnight|EOD|end of day",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("<hour-of-day> third", ("<hour-of-day> third",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357, ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -119,7 +112,7 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) <named-month>", ("<day-of-month> (ordinal or number) <named-month>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.3101549283038396, ClassData{prior = -0.2411620568168881,
unseen = -3.4339872044851463, unseen = -3.4339872044851463,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
@ -133,14 +126,13 @@ classifiers
("integer (numeric)March", -2.70805020110221)], ("integer (numeric)March", -2.70805020110221)],
n = 11}, n = 11},
koData = koData =
ClassData{prior = -1.3217558399823195, unseen = -2.833213344056216, ClassData{prior = -1.540445040947149, unseen = -2.70805020110221,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer (numeric)in <named-month>", -2.0794415416798357), [("integer (numeric)in <named-month>", -1.9459101490553135),
("integer (numeric)August", -2.0794415416798357), ("month", -1.252762968495368),
("month", -1.1631508098056809), ("integer (numeric)July", -1.540445040947149)],
("integer (numeric)July", -1.6739764335716716)], n = 3}}),
n = 4}}),
("<time> <part-of-day>", ("<time> <part-of-day>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.41616039722491244, ClassData{prior = -0.41616039722491244,
@ -327,36 +319,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("on <date>", ("on <date>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308, ClassData{prior = -0.6131044728864089,
unseen = -3.7612001156935624,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("<day-of-month> (ordinal or number) <named-month>", [("<day-of-month> (ordinal or number) <named-month>",
-2.5902671654458267), -2.639057329615259),
("September", -2.995732273553991), ("September", -3.044522437723423),
("second", -3.044522437723423),
("<day-of-month> (ordinal or number) of <named-month>", ("<day-of-month> (ordinal or number) of <named-month>",
-2.995732273553991), -3.044522437723423),
("day", -1.3862943611198906), ("year", -2.5902671654458267), ("day", -1.4350845252893227), ("year", -2.639057329615259),
("part of <named-month>", -2.0794415416798357), ("part of <named-month>", -2.128231705849268),
("month", -2.995732273553991), ("month", -3.044522437723423),
("year (integer)", -2.5902671654458267), ("year (integer)", -2.639057329615259),
("this|last the <cycle>", -2.995732273553991), ("this|last the <cycle>", -2.639057329615259),
("day of <named-month>", -2.995732273553991)], ("day of <named-month>", -3.044522437723423)],
n = 12}, n = 13},
koData = koData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308, ClassData{prior = -0.7801585575495751,
unseen = -3.6635616461296463,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("September", -2.995732273553991), [("September", -2.9444389791664407),
("October", -2.995732273553991), ("October", -2.9444389791664407),
("intersect", -2.0794415416798357), ("intersect", -2.0281482472922856),
("Saturday", -2.995732273553991), ("<time> for <duration>", -2.538973871058276),
("<time> for <duration>", -2.5902671654458267), ("month (ordinal)", -2.9444389791664407),
("day", -2.995732273553991),
("month (ordinal)", -2.995732273553991),
("intersect by \",\", \"of\", \"from\", \"'s\"", ("intersect by \",\", \"of\", \"from\", \"'s\"",
-2.5902671654458267), -2.538973871058276),
("month", -1.2039728043259361)], ("month", -1.1526795099383855)],
n = 12}}), n = 11}}),
("<hour-of-day> and integer minutes", ("<hour-of-day> and integer minutes",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.15415067982725836, ClassData{prior = -0.15415067982725836,
@ -459,166 +452,163 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 39}}), likelihoods = HashMap.fromList [("", 0.0)], n = 39}}),
("intersect", ("intersect",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.569352963213281, unseen = -5.529429087511423, ClassData{prior = -0.5458224658030868,
unseen = -5.5134287461649825,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("Tuesdayon <date>", -4.832305758571839), [("Tuesdayon <date>", -4.816241156068032),
("in <named-month>year", -4.832305758571839), ("in <named-month>year", -4.816241156068032),
("<hour-of-day> till thirdafter part of days", ("<hour-of-day> till thirdafter part of days",
-4.426840650463674), -4.410776047959867),
("dayhour", -3.9160150266976834), ("dayhour", -3.899950424193877),
("daymonth", -4.832305758571839), ("daymonth", -4.816241156068032),
("in <named-month>on <date>", -4.832305758571839), ("in <named-month>on <date>", -4.816241156068032),
("monthyear", -3.3282283617955644), ("monthyear", -3.312163759291758),
("Mondayon <date>", -4.832305758571839), ("Mondayon <date>", -4.816241156068032),
("Christmasyear", -4.832305758571839), ("Christmasyear", -4.816241156068032),
("Monday<day-of-month> (ordinal or number) <named-month>", ("Monday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("month (integer)year (integer)", -4.426840650463674), ("month (integer)year (integer)", -4.410776047959867),
("\1593\1610\1583 \1575\1604\1605\1610\1604\1575\1583year", ("\1593\1610\1583 \1575\1604\1605\1610\1604\1575\1583year",
-4.832305758571839), -4.816241156068032),
("<day-of-week> the last of <time><cycle> this|last|next", ("<day-of-week> the last of <time><cycle> this|last|next",
-4.832305758571839), -4.816241156068032),
("at <time-of-day>before part of days", -4.139158578011894), ("at <time-of-day>before part of days", -4.123093975508087),
("hh:mmbefore part of days", -4.832305758571839), ("hh:mmbefore part of days", -4.816241156068032),
("intersect by \",\", \"of\", \"from\", \"'s\"year", ("intersect by \",\", \"of\", \"from\", \"'s\"year",
-4.832305758571839), -4.816241156068032),
("intersect<time> <part-of-day>", -4.426840650463674), ("intersect<time> <part-of-day>", -4.410776047959867),
("month (ordinal)year (integer)", -4.832305758571839), ("month (ordinal)year (integer)", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1589\1581year", ("\1593\1610\1583 \1575\1604\1601\1589\1581year",
-4.832305758571839), -4.816241156068032),
("dd/mm<time> <part-of-day>", -4.832305758571839), ("dd/mm<time> <part-of-day>", -4.816241156068032),
("Thursday<time> timezone", -4.426840650463674), ("Thursday<time> timezone", -4.410776047959867),
("<hour-of-day> till quarterthis <part-of-day>", ("<hour-of-day> till quarterthis <part-of-day>",
-4.426840650463674), -4.410776047959867),
("dayday", -3.0405462893437836), ("dayday", -3.024481686839977),
("absorption of , after named day<day-of-month> (ordinal or number) <named-month>", ("absorption of , after named day<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("hourhour", -3.733693469903729), ("hourhour", -3.717628867399922),
("month (integer)year", -4.832305758571839), ("month (integer)year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1591\1585year", ("\1593\1610\1583 \1575\1604\1601\1591\1585year",
-4.832305758571839), -4.816241156068032),
("Thursday<day-of-month> (ordinal or number) <named-month>", ("Thursday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("dayyear", -2.960503581670247), ("dayyear", -2.9444389791664407),
("Thursday<time> <part-of-day>", -4.832305758571839), ("Thursday<time> <part-of-day>", -4.816241156068032),
("<cycle> last of <time>year (integer)", -4.426840650463674), ("<cycle> last of <time>year (integer)", -4.410776047959867),
("last <cycle> of <time>year (integer)", -4.832305758571839), ("last <cycle> of <time>year (integer)", -4.816241156068032),
("<hour-of-day> quarterafter part of days", -4.832305758571839), ("<hour-of-day> quarterafter part of days", -4.816241156068032),
("minutehour", -2.529720665577793), ("minutehour", -2.513656063073986),
("Thursdayfrom <datetime> - <datetime> (interval)", ("Thursdayfrom <datetime> - <datetime> (interval)",
-4.832305758571839), -4.816241156068032),
("\1585\1571\1587 \1575\1604\1587\1606\1577 \1575\1604\1607\1580\1585\1610\1577year", ("\1585\1571\1587 \1575\1604\1587\1606\1577 \1575\1604\1607\1580\1585\1610\1577year",
-4.832305758571839), -4.816241156068032),
("Monday<cycle> this|last|next", -4.832305758571839), ("Monday<cycle> this|last|next", -4.816241156068032),
("<day-of-month> (ordinal or number) of <named-month>year", ("<day-of-month> (ordinal or number) of <named-month>year",
-4.832305758571839), -4.816241156068032),
("<cycle> <ordinal> day? of <time>year", -4.832305758571839), ("<cycle> <ordinal> day? of <time>year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1571\1590\1581\1609year", ("\1593\1610\1583 \1575\1604\1571\1590\1581\1609year",
-4.832305758571839), -4.816241156068032),
("on a <named-day>dd/mm", -4.832305758571839), ("on a <named-day>dd/mm", -4.816241156068032),
("on a <named-day>on <date>", -4.832305758571839), ("on a <named-day>on <date>", -4.816241156068032),
("on a <named-day><day-of-month> (ordinal or number) <named-month>", ("on a <named-day><day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("Tuesday<cycle> this|last|next", -4.832305758571839), ("Tuesday<cycle> this|last|next", -4.816241156068032),
("dayminute", -3.5795427900764705), ("dayminute", -3.563478187572664),
("dd/mmintersect", -4.832305758571839), ("dd/mmintersect", -4.816241156068032),
("in <named-month><cycle> this|last|next", -4.832305758571839), ("in <named-month><cycle> this|last|next", -4.816241156068032),
("<hour-of-day> and integer minutesafter part of days", ("<hour-of-day> and integer minutesafter part of days",
-4.426840650463674), -4.410776047959867),
("Saturdaydd/mm", -4.832305758571839), ("Saturdaydd/mm", -4.816241156068032),
("\1585\1605\1590\1575\1606year", -4.832305758571839), ("\1585\1605\1590\1575\1606year", -4.816241156068032),
("the <cycle> the <ordinal> of <time>year", -4.832305758571839), ("the <cycle> the <ordinal> of <time>year", -4.816241156068032),
("at <time-of-day>after part of days", -3.733693469903729), ("at <time-of-day>after part of days", -3.717628867399922),
("<ordinal> <cycle> of <time>year", -4.832305758571839), ("<ordinal> <cycle> of <time>year", -4.816241156068032),
("Sunday<cycle> this|last|next", -4.832305758571839), ("Sunday<cycle> this|last|next", -4.816241156068032),
("Tuesday<day-of-month> (ordinal or number) of <named-month>", ("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.426840650463674), -4.410776047959867),
("Septemberon <date>", -4.832305758571839), ("Septemberon <date>", -4.816241156068032),
("from <datetime> - <datetime> (interval)after part of days", ("from <datetime> - <datetime> (interval)after part of days",
-4.832305758571839), -4.816241156068032),
("intersectintersect", -4.426840650463674), ("intersectintersect", -4.410776047959867),
("dayweek", -4.139158578011894), ("dayweek", -4.123093975508087),
("weekyear", -3.9160150266976834), ("weekyear", -3.899950424193877),
("<datetime> - <datetime> (interval)after part of days", ("<datetime> - <datetime> (interval)after part of days",
-4.832305758571839), -4.816241156068032),
("Friday<day-of-month> (ordinal or number) <named-month>", ("Friday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("<hour-of-day> till quarterbefore part of days", ("<hour-of-day> till quarterbefore part of days",
-4.426840650463674), -4.410776047959867),
("<time-of-day> - <time-of-day> (interval)after part of days", ("<time-of-day> - <time-of-day> (interval)after part of days",
-4.832305758571839), -4.816241156068032),
("at <time-of-day>this <part-of-day>", -4.832305758571839), ("at <time-of-day>this <part-of-day>", -4.816241156068032),
("<hour-of-day> till thirdbefore part of days", ("<hour-of-day> till thirdbefore part of days",
-4.426840650463674)], -4.410776047959867)],
n = 73}, n = 73},
koData = koData =
ClassData{prior = -0.8344607136265229, unseen = -5.384495062789089, ClassData{prior = -0.8659899933993561, unseen = -5.337538079701318,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("Tuesdayon <date>", -4.686750172980514), [("Tuesdayon <date>", -4.639571612705423),
("dayhour", -3.18267277620424), ("dayhour", -3.1354942159291497),
("daymonth", -3.5881378843124043), ("daymonth", -3.5409593240373143),
("monthday", -4.686750172980514), ("monthday", -4.639571612705423),
("monthyear", -2.814947996078923), ("monthyear", -2.7677694358038325),
("hh:mmafter part of days", -4.686750172980514), ("hh:mmafter part of days", -4.639571612705423),
("houryear", -4.28128506487235), ("houryear", -4.639571612705423),
("intersectat <time-of-day>", -3.770459441106359), ("intersectat <time-of-day>", -3.7232808808312687),
("intersect<time> <part-of-day>", -4.28128506487235), ("intersect<time> <part-of-day>", -4.23410650459726),
("Octoberon <date>", -4.686750172980514), ("Octoberon <date>", -4.639571612705423),
("until <time-of-day>after part of days", -4.686750172980514), ("until <time-of-day>after part of days", -4.639571612705423),
("this|last <cycle>on <date>", -4.686750172980514), ("Tuesdayafter <time-of-day>", -4.639571612705423),
("Tuesdayafter <time-of-day>", -4.686750172980514), ("Wednesdayon <date>", -4.23410650459726),
("Wednesdayon <date>", -4.28128506487235), ("dd/mm<time> <part-of-day>", -4.639571612705423),
("dd/mm<time> <part-of-day>", -4.686750172980514), ("yearyear", -4.23410650459726),
("yearyear", -4.28128506487235),
("<hour-of-day> till quarterthis <part-of-day>", ("<hour-of-day> till quarterthis <part-of-day>",
-4.686750172980514), -4.639571612705423),
("dayday", -3.993602992420569), ("dayday", -3.9464244321454784),
("dd/mmat <time-of-day>", -4.28128506487235), ("dd/mmat <time-of-day>", -4.23410650459726),
("hourhour", -4.28128506487235), ("hourhour", -4.23410650459726),
("dayyear", -3.4339872044851463), ("dayyear", -3.386808644210056),
("Thursdayat <time-of-day>", -4.28128506487235), ("Thursdayat <time-of-day>", -4.23410650459726),
("Februaryyear", -4.686750172980514), ("Februaryyear", -4.639571612705423),
("minutemonth", -4.686750172980514), ("minutemonth", -4.639571612705423),
("minutehour", -4.28128506487235), ("minutehour", -4.23410650459726),
("for <duration> from <time>on <date>", -4.686750172980514),
("<day-of-month> (ordinal or number) of <named-month>year", ("<day-of-month> (ordinal or number) of <named-month>year",
-4.28128506487235), -4.23410650459726),
("March<cycle> this|last|next", -4.686750172980514), ("March<cycle> this|last|next", -4.639571612705423),
("nth <time> of <time>year", -4.686750172980514), ("nth <time> of <time>year", -4.639571612705423),
("Wednesday<day-of-month> (ordinal or number) of <named-month>", ("Wednesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514), -4.639571612705423),
("secondyear", -4.28128506487235), ("secondyear", -4.639571612705423),
("Wednesdayintersect by \",\", \"of\", \"from\", \"'s\"", ("Wednesdayintersect by \",\", \"of\", \"from\", \"'s\"",
-4.686750172980514), -4.639571612705423),
("on <date>year (integer)", -4.686750172980514), ("on <date>year (integer)", -4.639571612705423),
("<time> for <duration>year", -3.5881378843124043), ("<time> for <duration>year", -3.5409593240373143),
("<duration> after|before|from <time>on <date>", ("<duration> after|before|from <time>on <date>",
-4.686750172980514), -4.639571612705423),
("dayminute", -3.18267277620424), ("dayminute", -3.1354942159291497),
("Julyuntil <time-of-day>", -4.686750172980514), ("Julyuntil <time-of-day>", -4.639571612705423),
("intersecton <date>", -4.686750172980514), ("intersecton <date>", -4.639571612705423),
("at <time-of-day>after part of days", -4.686750172980514), ("at <time-of-day>after part of days", -4.639571612705423),
("Octoberyear", -4.686750172980514), ("Octoberyear", -4.639571612705423),
("in|within|after <duration>year", -4.28128506487235), ("in|within|after <duration>year", -4.23410650459726),
("Tuesday<day-of-month> (ordinal or number) of <named-month>", ("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514), -4.639571612705423),
("on a <named-day>intersect", -4.28128506487235), ("on a <named-day>intersect", -4.23410650459726),
("Saturdayintersect", -4.28128506487235), ("Saturdayintersect", -4.23410650459726),
("at <time-of-day>year", -4.686750172980514), ("on <date>year", -4.23410650459726),
("on <date>year", -4.28128506487235), ("on <date>on <date>", -4.639571612705423),
("on <date>on <date>", -4.686750172980514), ("Tuesdaynth <time> of <time>", -4.639571612705423),
("weekday", -4.686750172980514), ("weekyear", -4.639571612705423),
("Tuesdaynth <time> of <time>", -4.686750172980514), ("Tuesdayintersect", -4.639571612705423),
("weekyear", -4.686750172980514), ("after <time-of-day>year", -4.639571612705423),
("Tuesdayintersect", -4.686750172980514), ("in <number> (implicit minutes)February", -4.639571612705423),
("after <time-of-day>year", -4.686750172980514), ("last <cycle> of <time>year", -4.639571612705423),
("in <number> (implicit minutes)February", -4.686750172980514),
("last <cycle> of <time>year", -4.686750172980514),
("<day-of-month> (ordinal or number) of <named-month>on <date>", ("<day-of-month> (ordinal or number) of <named-month>on <date>",
-4.686750172980514)], -4.639571612705423)],
n = 56}}), n = 53}}),
("\1585\1605\1590\1575\1606", ("\1585\1605\1590\1575\1606",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098, ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -706,13 +696,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("Saturday", ("Saturday",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.2876820724517809, ClassData{prior = 0.0, unseen = -1.6094379124341003,
unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3}, likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData = koData =
ClassData{prior = -1.3862943611198906, ClassData{prior = -infinity, unseen = -0.6931471805599453,
unseen = -1.0986122886681098, likelihoods = HashMap.fromList [], n = 0}}),
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("integer 21..99", ("integer 21..99",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003, ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -796,12 +784,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("yesterday", ("yesterday",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.1823215567939546, ClassData{prior = 0.0, unseen = -1.9459101490553135,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}, likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData = koData =
ClassData{prior = -1.791759469228055, unseen = -1.0986122886681098, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), likelihoods = HashMap.fromList [], n = 0}}),
("hh:mm:ss", ("hh:mm:ss",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098, ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -976,39 +963,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) of <named-month>", ("<day-of-month> (ordinal or number) of <named-month>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.6131044728864089, ClassData{prior = -0.570544858467613, unseen = -3.713572066704308,
unseen = -3.7376696182833684,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("ordinals (thirtieth..nineteenth)February", [("ordinals (thirtieth..nineteenth)February",
-2.6149597780361984), -2.5902671654458267),
("ordinals (first..tenth)October", -2.6149597780361984), ("ordinals (first..tenth)October", -2.5902671654458267),
("ordinals (first..tenth)April", -2.6149597780361984), ("ordinals (first..tenth)April", -2.5902671654458267),
("ordinals (first..tenth)February", -3.0204248861443626), ("ordinals (first..tenth)February", -2.995732273553991),
("ordinals (first..tenth)month (integer)", -2.6149597780361984), ("ordinals (first..tenth)month (integer)", -2.5902671654458267),
("ordinals (first..tenth)March", -2.6149597780361984), ("ordinals (first..tenth)March", -2.5902671654458267),
("month", -1.074514737089049), ("month", -1.0498221244986778),
("ordinals (first..tenth)in <named-month>", ("ordinals (first..tenth)in <named-month>",
-2.6149597780361984)], -2.5902671654458267)],
n = 13}, n = 13},
koData = koData =
ClassData{prior = -0.7801585575495751, ClassData{prior = -0.832909122935104, unseen = -3.5553480614894135,
unseen = -3.6375861597263857,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer 2in <named-month>", -2.917770732084279), [("integer 2in <named-month>", -2.833213344056216),
("integer 10February", -2.512305623976115), ("integer 10February", -2.4277482359480516),
("integer (numeric)month (ordinal)", -2.917770732084279), ("integer (numeric)month (ordinal)", -2.833213344056216),
("ordinals (first..tenth)month (integer)", -2.917770732084279), ("ordinals (first..tenth)month (integer)", -2.833213344056216),
("integer (20..90)in <named-month>", -2.917770732084279), ("integer (20..90)in <named-month>", -2.833213344056216),
("ordinals (composite, e.g., eighty-seven)in <named-month>", ("ordinals (composite, e.g., eighty-seven)in <named-month>",
-2.917770732084279), -2.833213344056216),
("ordinals (twenty, thirty..ninety)in <named-month>", ("ordinals (twenty, thirty..ninety)in <named-month>",
-2.917770732084279), -2.833213344056216),
("month", -1.126011262856224), ("month", -1.128465251817791),
("ordinals (first..tenth)in <named-month>", -2.512305623976115), ("ordinals (first..tenth)in <named-month>",
("ordinals (first..tenth)August", -2.917770732084279)], -2.4277482359480516)],
n = 11}}), n = 10}}),
("this <part-of-day>", ("this <part-of-day>",
Classifier{okData = Classifier{okData =
ClassData{prior = -1.0986122886681098, ClassData{prior = -1.0986122886681098,
@ -1045,13 +1030,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -1.9459101490553135, ClassData{prior = -infinity, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("powers of tens",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6}}),
("Friday", ("Friday",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003, ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -1199,19 +1177,15 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("year", ("year",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.9932517730102834, ClassData{prior = -0.7419373447293773,
unseen = -2.5649493574615367, unseen = -2.4849066497880004,
likelihoods = likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
HashMap.fromList [("integer (numeric)", -8.701137698962981e-2)],
n = 10}, n = 10},
koData = koData =
ClassData{prior = -0.46262352194811296, ClassData{prior = -0.6466271649250525,
unseen = -2.995732273553991, unseen = -2.5649493574615367,
likelihoods = likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
HashMap.fromList n = 11}}),
[("integer (numeric)", -0.4595323293784402),
("powers of tens", -0.9985288301111273)],
n = 17}}),
("last <day-of-week> of <time>", ("last <day-of-week> of <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357, ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1256,12 +1230,13 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("integer 10", ("integer 10",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.8472978603872037, ClassData{prior = -0.6931471805599453,
unseen = -2.3978952727983707, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9}, likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData = koData =
ClassData{prior = -0.5596157879354228, unseen = -2.639057329615259, ClassData{prior = -0.6931471805599453,
likelihoods = HashMap.fromList [("", 0.0)], n = 12}}), unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9}}),
("from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)", ("from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357, ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1297,28 +1272,26 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("<hour-of-day> and integer", ("<hour-of-day> and integer",
Classifier{okData = Classifier{okData =
ClassData{prior = -1.3217558399823195, unseen = -2.890371757896165, ClassData{prior = -1.252762968495368, unseen = -2.833213344056216,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("at <time-of-day>integer (13..19)", -2.1400661634962708), [("at <time-of-day>integer (13..19)", -2.0794415416798357),
("at <time-of-day>integer 21..99", -1.7346010553881064), ("at <time-of-day>integer 21..99", -1.6739764335716716),
("hour", -1.2237754316221157), ("hour", -1.1631508098056809),
("at <time-of-day>integer (numeric)", -2.1400661634962708)], ("at <time-of-day>integer (numeric)", -2.0794415416798357)],
n = 4}, n = 4},
koData = koData =
ClassData{prior = -0.3101549283038396, ClassData{prior = -0.3364722366212129, unseen = -3.367295829986474,
unseen = -3.4657359027997265,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("at <time-of-day>integer 5", -2.3353749158170367), [("at <time-of-day>integer 5", -2.2335922215070942),
("at <time-of-day>integer (13..19)", -2.3353749158170367), ("at <time-of-day>integer (13..19)", -2.2335922215070942),
("at <time-of-day>integer 3", -2.740840023925201), ("at <time-of-day>integer 3", -2.639057329615259),
("at <time-of-day>integer 4", -2.740840023925201), ("at <time-of-day>integer (20..90)", -2.639057329615259),
("at <time-of-day>integer (20..90)", -2.740840023925201), ("at <time-of-day>integer 8", -2.2335922215070942),
("at <time-of-day>integer 8", -2.3353749158170367), ("hour", -0.9343092373768334),
("hour", -0.9490805546971459), ("at <time-of-day>integer (numeric)", -2.2335922215070942)],
("at <time-of-day>integer (numeric)", -2.3353749158170367)], n = 10}}),
n = 11}}),
("month (ordinal)", ("month (ordinal)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906, ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1327,19 +1300,13 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("integer 4",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods = HashMap.fromList [("", 0.0)], n = 7}}),
("second (grain) ", ("second (grain) ",
Classifier{okData = Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData = koData =
ClassData{prior = 0.0, unseen = -1.791759469228055, ClassData{prior = -0.2231435513142097, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}), likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
("\1593\1610\1583 \1575\1604\1601\1589\1581", ("\1593\1610\1583 \1575\1604\1601\1589\1581",
Classifier{okData = Classifier{okData =
@ -1459,12 +1426,11 @@ classifiers
n = 17}}), n = 17}}),
("integer 3", ("integer 3",
Classifier{okData = Classifier{okData =
ClassData{prior = -1.0560526742493137, ClassData{prior = 0.0, unseen = -2.3025850929940455,
unseen = -2.3025850929940455,
likelihoods = HashMap.fromList [("", 0.0)], n = 8}, likelihoods = HashMap.fromList [("", 0.0)], n = 8},
koData = koData =
ClassData{prior = -0.4274440148269396, unseen = -2.833213344056216, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [("", 0.0)], n = 15}}), likelihoods = HashMap.fromList [], n = 0}}),
("last <time>", ("last <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = -infinity, unseen = -1.791759469228055, ClassData{prior = -infinity, unseen = -1.791759469228055,
@ -1681,19 +1647,21 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("this|last the <cycle>", ("this|last the <cycle>",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.9444389791664407, ClassData{prior = 0.0, unseen = -3.1354942159291497,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -1.791759469228055), [("week", -1.9924301646902063),
("month (grain)", -2.1972245773362196), ("month (grain)", -2.3978952727983707),
("year (grain)", -2.1972245773362196), ("year (grain)", -2.3978952727983707),
("week (grain)", -1.791759469228055), ("second", -2.3978952727983707),
("day", -2.1972245773362196), ("year", -2.1972245773362196), ("week (grain)", -1.9924301646902063),
("month", -2.1972245773362196), ("day", -2.3978952727983707), ("year", -2.3978952727983707),
("day (grain)", -2.1972245773362196)], ("second (grain) ", -2.3978952727983707),
n = 5}, ("month", -2.3978952727983707),
("day (grain)", -2.3978952727983707)],
n = 6},
koData = koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196, ClassData{prior = -infinity, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("in <named-month>", ("in <named-month>",
Classifier{okData = Classifier{okData =
@ -1759,28 +1727,27 @@ classifiers
n = 3}}), n = 3}}),
("last <cycle> of <time>", ("last <cycle> of <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.5596157879354228, ClassData{prior = -0.40546510810816444,
unseen = -2.9444389791664407, unseen = -2.833213344056216,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("daymonth", -1.791759469228055), [("daymonth", -1.6739764335716716),
("week (grain)intersect by \",\", \"of\", \"from\", \"'s\"", ("week (grain)intersect by \",\", \"of\", \"from\", \"'s\"",
-2.1972245773362196), -2.0794415416798357),
("day (grain)intersect", -2.1972245773362196), ("day (grain)intersect", -2.0794415416798357),
("weekmonth", -1.791759469228055), ("weekmonth", -1.6739764335716716),
("week (grain)September", -2.1972245773362196), ("week (grain)September", -2.0794415416798357),
("day (grain)month (integer)", -2.1972245773362196)], ("day (grain)month (integer)", -2.0794415416798357)],
n = 4}, n = 4},
koData = koData =
ClassData{prior = -0.8472978603872037, unseen = -2.833213344056216, ClassData{prior = -1.0986122886681098,
unseen = -2.5649493574615367,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week (grain)<time> for <duration>", -2.0794415416798357), [("week (grain)<time> for <duration>", -1.791759469228055),
("weekmonth", -1.6739764335716716), ("weekmonth", -1.3862943611198906),
("weekday", -2.0794415416798357), ("week (grain)intersect", -1.791759469228055)],
("week (grain)intersect", -2.0794415416798357), n = 2}}),
("week (grain)Saturday", -2.0794415416798357)],
n = 3}}),
("ordinals (composite, e.g., eighty-seven)", ("ordinals (composite, e.g., eighty-seven)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906, ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1832,19 +1799,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("for <duration> from <time>",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods =
HashMap.fromList
[("single <unit-of-duration>intersect", -1.252762968495368),
("single <unit-of-duration>in <named-month>",
-1.252762968495368),
("secondmonth", -0.8472978603872037)],
n = 2}}),
("<day-of-week> the last of <time>", ("<day-of-week> the last of <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3978952727983707, ClassData{prior = 0.0, unseen = -2.3978952727983707,
@ -1888,15 +1842,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -1.6094379124341003, ClassData{prior = -infinity, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("compose by multiplication",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods =
HashMap.fromList [("integer (numeric)powers of tens", 0.0)],
n = 1}}),
("<month> dd-dd (interval)", ("<month> dd-dd (interval)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455, ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -1953,7 +1898,7 @@ classifiers
n = 14}}), n = 14}}),
("single <unit-of-duration>", ("single <unit-of-duration>",
Classifier{okData = Classifier{okData =
ClassData{prior = -3.58351893845611, unseen = -3.1354942159291497, ClassData{prior = -3.590439381300684, unseen = -3.1354942159291497,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -1.9924301646902063), [("week", -1.9924301646902063),
@ -1962,24 +1907,23 @@ classifiers
("day (grain)", -1.9924301646902063)], ("day (grain)", -1.9924301646902063)],
n = 4}, n = 4},
koData = koData =
ClassData{prior = -2.8170876966696335e-2, ClassData{prior = -2.797385204240618e-2, unseen = -5.6937321388027,
unseen = -5.68697535633982,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -2.639057329615259), [("week", -2.6458370166006375),
("month (grain)", -2.249592562853535), ("month (grain)", -2.2563722498389143),
("hour (grain)", -1.9947003132247452), ("hour (grain)", -2.001480000210124),
("year (grain)", -2.793208009442517), ("year (grain)", -2.7999876964278956),
("second", -4.074141854904581), ("second", -3.898599985096005),
("week (grain)", -2.639057329615259), ("week (grain)", -2.6458370166006375),
("day", -2.592537313980366), ("day", -2.5993170009657445),
("minute (grain)", -3.380994674344636), ("minute (grain)", -3.3877743613300146),
("year", -2.793208009442517), ("year", -2.7999876964278956),
("second (grain) ", -4.074141854904581), ("second (grain) ", -3.898599985096005),
("hour", -1.9947003132247452), ("month", -2.249592562853535), ("hour", -2.001480000210124), ("month", -2.2563722498389143),
("minute", -3.380994674344636), ("minute", -3.3877743613300146),
("day (grain)", -2.592537313980366)], ("day (grain)", -2.5993170009657445)],
n = 140}}), n = 141}}),
("dd-dd <month> (interval)", ("dd-dd <month> (interval)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455, ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -2005,10 +1949,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("August", ("August",
Classifier{okData = Classifier{okData =
ClassData{prior = -2.0794415416798357, ClassData{prior = 0.0, unseen = -1.3862943611198906,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}, likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData = koData =
ClassData{prior = -0.13353139262452263, ClassData{prior = -infinity, unseen = -0.6931471805599453,
unseen = -2.772588722239781, likelihoods = HashMap.fromList [], n = 0}})]
likelihoods = HashMap.fromList [("", 0.0)], n = 14}})]

View File

@ -65,13 +65,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906, ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("noon|midnight|EOD|end of day",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("<hour-of-day> third", ("<hour-of-day> third",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357, ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -119,7 +112,7 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) <named-month>", ("<day-of-month> (ordinal or number) <named-month>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.3101549283038396, ClassData{prior = -0.2411620568168881,
unseen = -3.4339872044851463, unseen = -3.4339872044851463,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
@ -133,14 +126,13 @@ classifiers
("integer (numeric)March", -2.70805020110221)], ("integer (numeric)March", -2.70805020110221)],
n = 11}, n = 11},
koData = koData =
ClassData{prior = -1.3217558399823195, unseen = -2.833213344056216, ClassData{prior = -1.540445040947149, unseen = -2.70805020110221,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer (numeric)in <named-month>", -2.0794415416798357), [("integer (numeric)in <named-month>", -1.9459101490553135),
("integer (numeric)August", -2.0794415416798357), ("month", -1.252762968495368),
("month", -1.1631508098056809), ("integer (numeric)July", -1.540445040947149)],
("integer (numeric)July", -1.6739764335716716)], n = 3}}),
n = 4}}),
("<time> <part-of-day>", ("<time> <part-of-day>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.41616039722491244, ClassData{prior = -0.41616039722491244,
@ -327,36 +319,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("on <date>", ("on <date>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308, ClassData{prior = -0.6131044728864089,
unseen = -3.7612001156935624,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("<day-of-month> (ordinal or number) <named-month>", [("<day-of-month> (ordinal or number) <named-month>",
-2.5902671654458267), -2.639057329615259),
("September", -2.995732273553991), ("September", -3.044522437723423),
("second", -3.044522437723423),
("<day-of-month> (ordinal or number) of <named-month>", ("<day-of-month> (ordinal or number) of <named-month>",
-2.995732273553991), -3.044522437723423),
("day", -1.3862943611198906), ("year", -2.5902671654458267), ("day", -1.4350845252893227), ("year", -2.639057329615259),
("part of <named-month>", -2.0794415416798357), ("part of <named-month>", -2.128231705849268),
("month", -2.995732273553991), ("month", -3.044522437723423),
("year (integer)", -2.5902671654458267), ("year (integer)", -2.639057329615259),
("this|last the <cycle>", -2.995732273553991), ("this|last the <cycle>", -2.639057329615259),
("day of <named-month>", -2.995732273553991)], ("day of <named-month>", -3.044522437723423)],
n = 12}, n = 13},
koData = koData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308, ClassData{prior = -0.7801585575495751,
unseen = -3.6635616461296463,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("September", -2.995732273553991), [("September", -2.9444389791664407),
("October", -2.995732273553991), ("October", -2.9444389791664407),
("intersect", -2.0794415416798357), ("intersect", -2.0281482472922856),
("Saturday", -2.995732273553991), ("<time> for <duration>", -2.538973871058276),
("<time> for <duration>", -2.5902671654458267), ("month (ordinal)", -2.9444389791664407),
("day", -2.995732273553991),
("month (ordinal)", -2.995732273553991),
("intersect by \",\", \"of\", \"from\", \"'s\"", ("intersect by \",\", \"of\", \"from\", \"'s\"",
-2.5902671654458267), -2.538973871058276),
("month", -1.2039728043259361)], ("month", -1.1526795099383855)],
n = 12}}), n = 11}}),
("<hour-of-day> and integer minutes", ("<hour-of-day> and integer minutes",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.15415067982725836, ClassData{prior = -0.15415067982725836,
@ -459,166 +452,163 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 39}}), likelihoods = HashMap.fromList [("", 0.0)], n = 39}}),
("intersect", ("intersect",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.569352963213281, unseen = -5.529429087511423, ClassData{prior = -0.5458224658030868,
unseen = -5.5134287461649825,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("Tuesdayon <date>", -4.832305758571839), [("Tuesdayon <date>", -4.816241156068032),
("in <named-month>year", -4.832305758571839), ("in <named-month>year", -4.816241156068032),
("<hour-of-day> till thirdafter part of days", ("<hour-of-day> till thirdafter part of days",
-4.426840650463674), -4.410776047959867),
("dayhour", -3.9160150266976834), ("dayhour", -3.899950424193877),
("daymonth", -4.832305758571839), ("daymonth", -4.816241156068032),
("in <named-month>on <date>", -4.832305758571839), ("in <named-month>on <date>", -4.816241156068032),
("monthyear", -3.3282283617955644), ("monthyear", -3.312163759291758),
("Mondayon <date>", -4.832305758571839), ("Mondayon <date>", -4.816241156068032),
("Christmasyear", -4.832305758571839), ("Christmasyear", -4.816241156068032),
("Monday<day-of-month> (ordinal or number) <named-month>", ("Monday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("month (integer)year (integer)", -4.426840650463674), ("month (integer)year (integer)", -4.410776047959867),
("\1593\1610\1583 \1575\1604\1605\1610\1604\1575\1583year", ("\1593\1610\1583 \1575\1604\1605\1610\1604\1575\1583year",
-4.832305758571839), -4.816241156068032),
("<day-of-week> the last of <time><cycle> this|last|next", ("<day-of-week> the last of <time><cycle> this|last|next",
-4.832305758571839), -4.816241156068032),
("at <time-of-day>before part of days", -4.139158578011894), ("at <time-of-day>before part of days", -4.123093975508087),
("hh:mmbefore part of days", -4.832305758571839), ("hh:mmbefore part of days", -4.816241156068032),
("intersect by \",\", \"of\", \"from\", \"'s\"year", ("intersect by \",\", \"of\", \"from\", \"'s\"year",
-4.832305758571839), -4.816241156068032),
("intersect<time> <part-of-day>", -4.426840650463674), ("intersect<time> <part-of-day>", -4.410776047959867),
("month (ordinal)year (integer)", -4.832305758571839), ("month (ordinal)year (integer)", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1589\1581year", ("\1593\1610\1583 \1575\1604\1601\1589\1581year",
-4.832305758571839), -4.816241156068032),
("dd/mm<time> <part-of-day>", -4.832305758571839), ("dd/mm<time> <part-of-day>", -4.816241156068032),
("Thursday<time> timezone", -4.426840650463674), ("Thursday<time> timezone", -4.410776047959867),
("<hour-of-day> till quarterthis <part-of-day>", ("<hour-of-day> till quarterthis <part-of-day>",
-4.426840650463674), -4.410776047959867),
("dayday", -3.0405462893437836), ("dayday", -3.024481686839977),
("absorption of , after named day<day-of-month> (ordinal or number) <named-month>", ("absorption of , after named day<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("hourhour", -3.733693469903729), ("hourhour", -3.717628867399922),
("month (integer)year", -4.832305758571839), ("month (integer)year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1591\1585year", ("\1593\1610\1583 \1575\1604\1601\1591\1585year",
-4.832305758571839), -4.816241156068032),
("Thursday<day-of-month> (ordinal or number) <named-month>", ("Thursday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("dayyear", -2.960503581670247), ("dayyear", -2.9444389791664407),
("Thursday<time> <part-of-day>", -4.832305758571839), ("Thursday<time> <part-of-day>", -4.816241156068032),
("<cycle> last of <time>year (integer)", -4.426840650463674), ("<cycle> last of <time>year (integer)", -4.410776047959867),
("last <cycle> of <time>year (integer)", -4.832305758571839), ("last <cycle> of <time>year (integer)", -4.816241156068032),
("<hour-of-day> quarterafter part of days", -4.832305758571839), ("<hour-of-day> quarterafter part of days", -4.816241156068032),
("minutehour", -2.529720665577793), ("minutehour", -2.513656063073986),
("Thursdayfrom <datetime> - <datetime> (interval)", ("Thursdayfrom <datetime> - <datetime> (interval)",
-4.832305758571839), -4.816241156068032),
("\1585\1571\1587 \1575\1604\1587\1606\1577 \1575\1604\1607\1580\1585\1610\1577year", ("\1585\1571\1587 \1575\1604\1587\1606\1577 \1575\1604\1607\1580\1585\1610\1577year",
-4.832305758571839), -4.816241156068032),
("Monday<cycle> this|last|next", -4.832305758571839), ("Monday<cycle> this|last|next", -4.816241156068032),
("<day-of-month> (ordinal or number) of <named-month>year", ("<day-of-month> (ordinal or number) of <named-month>year",
-4.832305758571839), -4.816241156068032),
("<cycle> <ordinal> day? of <time>year", -4.832305758571839), ("<cycle> <ordinal> day? of <time>year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1571\1590\1581\1609year", ("\1593\1610\1583 \1575\1604\1571\1590\1581\1609year",
-4.832305758571839), -4.816241156068032),
("on a <named-day>dd/mm", -4.832305758571839), ("on a <named-day>dd/mm", -4.816241156068032),
("on a <named-day>on <date>", -4.832305758571839), ("on a <named-day>on <date>", -4.816241156068032),
("on a <named-day><day-of-month> (ordinal or number) <named-month>", ("on a <named-day><day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("Tuesday<cycle> this|last|next", -4.832305758571839), ("Tuesday<cycle> this|last|next", -4.816241156068032),
("dayminute", -3.5795427900764705), ("dayminute", -3.563478187572664),
("dd/mmintersect", -4.832305758571839), ("dd/mmintersect", -4.816241156068032),
("in <named-month><cycle> this|last|next", -4.832305758571839), ("in <named-month><cycle> this|last|next", -4.816241156068032),
("<hour-of-day> and integer minutesafter part of days", ("<hour-of-day> and integer minutesafter part of days",
-4.426840650463674), -4.410776047959867),
("Saturdaydd/mm", -4.832305758571839), ("Saturdaydd/mm", -4.816241156068032),
("\1585\1605\1590\1575\1606year", -4.832305758571839), ("\1585\1605\1590\1575\1606year", -4.816241156068032),
("the <cycle> the <ordinal> of <time>year", -4.832305758571839), ("the <cycle> the <ordinal> of <time>year", -4.816241156068032),
("at <time-of-day>after part of days", -3.733693469903729), ("at <time-of-day>after part of days", -3.717628867399922),
("<ordinal> <cycle> of <time>year", -4.832305758571839), ("<ordinal> <cycle> of <time>year", -4.816241156068032),
("Sunday<cycle> this|last|next", -4.832305758571839), ("Sunday<cycle> this|last|next", -4.816241156068032),
("Tuesday<day-of-month> (ordinal or number) of <named-month>", ("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.426840650463674), -4.410776047959867),
("Septemberon <date>", -4.832305758571839), ("Septemberon <date>", -4.816241156068032),
("from <datetime> - <datetime> (interval)after part of days", ("from <datetime> - <datetime> (interval)after part of days",
-4.832305758571839), -4.816241156068032),
("intersectintersect", -4.426840650463674), ("intersectintersect", -4.410776047959867),
("dayweek", -4.139158578011894), ("dayweek", -4.123093975508087),
("weekyear", -3.9160150266976834), ("weekyear", -3.899950424193877),
("<datetime> - <datetime> (interval)after part of days", ("<datetime> - <datetime> (interval)after part of days",
-4.832305758571839), -4.816241156068032),
("Friday<day-of-month> (ordinal or number) <named-month>", ("Friday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839), -4.816241156068032),
("<hour-of-day> till quarterbefore part of days", ("<hour-of-day> till quarterbefore part of days",
-4.426840650463674), -4.410776047959867),
("<time-of-day> - <time-of-day> (interval)after part of days", ("<time-of-day> - <time-of-day> (interval)after part of days",
-4.832305758571839), -4.816241156068032),
("at <time-of-day>this <part-of-day>", -4.832305758571839), ("at <time-of-day>this <part-of-day>", -4.816241156068032),
("<hour-of-day> till thirdbefore part of days", ("<hour-of-day> till thirdbefore part of days",
-4.426840650463674)], -4.410776047959867)],
n = 73}, n = 73},
koData = koData =
ClassData{prior = -0.8344607136265229, unseen = -5.384495062789089, ClassData{prior = -0.8659899933993561, unseen = -5.337538079701318,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("Tuesdayon <date>", -4.686750172980514), [("Tuesdayon <date>", -4.639571612705423),
("dayhour", -3.18267277620424), ("dayhour", -3.1354942159291497),
("daymonth", -3.5881378843124043), ("daymonth", -3.5409593240373143),
("monthday", -4.686750172980514), ("monthday", -4.639571612705423),
("monthyear", -2.814947996078923), ("monthyear", -2.7677694358038325),
("hh:mmafter part of days", -4.686750172980514), ("hh:mmafter part of days", -4.639571612705423),
("houryear", -4.28128506487235), ("houryear", -4.639571612705423),
("intersectat <time-of-day>", -3.770459441106359), ("intersectat <time-of-day>", -3.7232808808312687),
("intersect<time> <part-of-day>", -4.28128506487235), ("intersect<time> <part-of-day>", -4.23410650459726),
("Octoberon <date>", -4.686750172980514), ("Octoberon <date>", -4.639571612705423),
("until <time-of-day>after part of days", -4.686750172980514), ("until <time-of-day>after part of days", -4.639571612705423),
("this|last <cycle>on <date>", -4.686750172980514), ("Tuesdayafter <time-of-day>", -4.639571612705423),
("Tuesdayafter <time-of-day>", -4.686750172980514), ("Wednesdayon <date>", -4.23410650459726),
("Wednesdayon <date>", -4.28128506487235), ("dd/mm<time> <part-of-day>", -4.639571612705423),
("dd/mm<time> <part-of-day>", -4.686750172980514), ("yearyear", -4.23410650459726),
("yearyear", -4.28128506487235),
("<hour-of-day> till quarterthis <part-of-day>", ("<hour-of-day> till quarterthis <part-of-day>",
-4.686750172980514), -4.639571612705423),
("dayday", -3.993602992420569), ("dayday", -3.9464244321454784),
("dd/mmat <time-of-day>", -4.28128506487235), ("dd/mmat <time-of-day>", -4.23410650459726),
("hourhour", -4.28128506487235), ("hourhour", -4.23410650459726),
("dayyear", -3.4339872044851463), ("dayyear", -3.386808644210056),
("Thursdayat <time-of-day>", -4.28128506487235), ("Thursdayat <time-of-day>", -4.23410650459726),
("Februaryyear", -4.686750172980514), ("Februaryyear", -4.639571612705423),
("minutemonth", -4.686750172980514), ("minutemonth", -4.639571612705423),
("minutehour", -4.28128506487235), ("minutehour", -4.23410650459726),
("for <duration> from <time>on <date>", -4.686750172980514),
("<day-of-month> (ordinal or number) of <named-month>year", ("<day-of-month> (ordinal or number) of <named-month>year",
-4.28128506487235), -4.23410650459726),
("March<cycle> this|last|next", -4.686750172980514), ("March<cycle> this|last|next", -4.639571612705423),
("nth <time> of <time>year", -4.686750172980514), ("nth <time> of <time>year", -4.639571612705423),
("Wednesday<day-of-month> (ordinal or number) of <named-month>", ("Wednesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514), -4.639571612705423),
("secondyear", -4.28128506487235), ("secondyear", -4.639571612705423),
("Wednesdayintersect by \",\", \"of\", \"from\", \"'s\"", ("Wednesdayintersect by \",\", \"of\", \"from\", \"'s\"",
-4.686750172980514), -4.639571612705423),
("on <date>year (integer)", -4.686750172980514), ("on <date>year (integer)", -4.639571612705423),
("<time> for <duration>year", -3.5881378843124043), ("<time> for <duration>year", -3.5409593240373143),
("<duration> after|before|from <time>on <date>", ("<duration> after|before|from <time>on <date>",
-4.686750172980514), -4.639571612705423),
("dayminute", -3.18267277620424), ("dayminute", -3.1354942159291497),
("Julyuntil <time-of-day>", -4.686750172980514), ("Julyuntil <time-of-day>", -4.639571612705423),
("intersecton <date>", -4.686750172980514), ("intersecton <date>", -4.639571612705423),
("at <time-of-day>after part of days", -4.686750172980514), ("at <time-of-day>after part of days", -4.639571612705423),
("Octoberyear", -4.686750172980514), ("Octoberyear", -4.639571612705423),
("in|within|after <duration>year", -4.28128506487235), ("in|within|after <duration>year", -4.23410650459726),
("Tuesday<day-of-month> (ordinal or number) of <named-month>", ("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514), -4.639571612705423),
("on a <named-day>intersect", -4.28128506487235), ("on a <named-day>intersect", -4.23410650459726),
("Saturdayintersect", -4.28128506487235), ("Saturdayintersect", -4.23410650459726),
("at <time-of-day>year", -4.686750172980514), ("on <date>year", -4.23410650459726),
("on <date>year", -4.28128506487235), ("on <date>on <date>", -4.639571612705423),
("on <date>on <date>", -4.686750172980514), ("Tuesdaynth <time> of <time>", -4.639571612705423),
("weekday", -4.686750172980514), ("weekyear", -4.639571612705423),
("Tuesdaynth <time> of <time>", -4.686750172980514), ("Tuesdayintersect", -4.639571612705423),
("weekyear", -4.686750172980514), ("after <time-of-day>year", -4.639571612705423),
("Tuesdayintersect", -4.686750172980514), ("in <number> (implicit minutes)February", -4.639571612705423),
("after <time-of-day>year", -4.686750172980514), ("last <cycle> of <time>year", -4.639571612705423),
("in <number> (implicit minutes)February", -4.686750172980514),
("last <cycle> of <time>year", -4.686750172980514),
("<day-of-month> (ordinal or number) of <named-month>on <date>", ("<day-of-month> (ordinal or number) of <named-month>on <date>",
-4.686750172980514)], -4.639571612705423)],
n = 56}}), n = 53}}),
("\1585\1605\1590\1575\1606", ("\1585\1605\1590\1575\1606",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098, ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -706,13 +696,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("Saturday", ("Saturday",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.2876820724517809, ClassData{prior = 0.0, unseen = -1.6094379124341003,
unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3}, likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData = koData =
ClassData{prior = -1.3862943611198906, ClassData{prior = -infinity, unseen = -0.6931471805599453,
unseen = -1.0986122886681098, likelihoods = HashMap.fromList [], n = 0}}),
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("integer 21..99", ("integer 21..99",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003, ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -796,12 +784,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("yesterday", ("yesterday",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.1823215567939546, ClassData{prior = 0.0, unseen = -1.9459101490553135,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}, likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData = koData =
ClassData{prior = -1.791759469228055, unseen = -1.0986122886681098, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), likelihoods = HashMap.fromList [], n = 0}}),
("hh:mm:ss", ("hh:mm:ss",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098, ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -976,39 +963,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) of <named-month>", ("<day-of-month> (ordinal or number) of <named-month>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.6131044728864089, ClassData{prior = -0.570544858467613, unseen = -3.713572066704308,
unseen = -3.7376696182833684,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("ordinals (thirtieth..nineteenth)February", [("ordinals (thirtieth..nineteenth)February",
-2.6149597780361984), -2.5902671654458267),
("ordinals (first..tenth)October", -2.6149597780361984), ("ordinals (first..tenth)October", -2.5902671654458267),
("ordinals (first..tenth)April", -2.6149597780361984), ("ordinals (first..tenth)April", -2.5902671654458267),
("ordinals (first..tenth)February", -3.0204248861443626), ("ordinals (first..tenth)February", -2.995732273553991),
("ordinals (first..tenth)month (integer)", -2.6149597780361984), ("ordinals (first..tenth)month (integer)", -2.5902671654458267),
("ordinals (first..tenth)March", -2.6149597780361984), ("ordinals (first..tenth)March", -2.5902671654458267),
("month", -1.074514737089049), ("month", -1.0498221244986778),
("ordinals (first..tenth)in <named-month>", ("ordinals (first..tenth)in <named-month>",
-2.6149597780361984)], -2.5902671654458267)],
n = 13}, n = 13},
koData = koData =
ClassData{prior = -0.7801585575495751, ClassData{prior = -0.832909122935104, unseen = -3.5553480614894135,
unseen = -3.6375861597263857,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("integer 2in <named-month>", -2.917770732084279), [("integer 2in <named-month>", -2.833213344056216),
("integer 10February", -2.512305623976115), ("integer 10February", -2.4277482359480516),
("integer (numeric)month (ordinal)", -2.917770732084279), ("integer (numeric)month (ordinal)", -2.833213344056216),
("ordinals (first..tenth)month (integer)", -2.917770732084279), ("ordinals (first..tenth)month (integer)", -2.833213344056216),
("integer (20..90)in <named-month>", -2.917770732084279), ("integer (20..90)in <named-month>", -2.833213344056216),
("ordinals (composite, e.g., eighty-seven)in <named-month>", ("ordinals (composite, e.g., eighty-seven)in <named-month>",
-2.917770732084279), -2.833213344056216),
("ordinals (twenty, thirty..ninety)in <named-month>", ("ordinals (twenty, thirty..ninety)in <named-month>",
-2.917770732084279), -2.833213344056216),
("month", -1.126011262856224), ("month", -1.128465251817791),
("ordinals (first..tenth)in <named-month>", -2.512305623976115), ("ordinals (first..tenth)in <named-month>",
("ordinals (first..tenth)August", -2.917770732084279)], -2.4277482359480516)],
n = 11}}), n = 10}}),
("this <part-of-day>", ("this <part-of-day>",
Classifier{okData = Classifier{okData =
ClassData{prior = -1.0986122886681098, ClassData{prior = -1.0986122886681098,
@ -1045,13 +1030,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -1.9459101490553135, ClassData{prior = -infinity, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("powers of tens",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6}}),
("Friday", ("Friday",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003, ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -1199,19 +1177,15 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("year", ("year",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.9932517730102834, ClassData{prior = -0.7419373447293773,
unseen = -2.5649493574615367, unseen = -2.4849066497880004,
likelihoods = likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
HashMap.fromList [("integer (numeric)", -8.701137698962981e-2)],
n = 10}, n = 10},
koData = koData =
ClassData{prior = -0.46262352194811296, ClassData{prior = -0.6466271649250525,
unseen = -2.995732273553991, unseen = -2.5649493574615367,
likelihoods = likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
HashMap.fromList n = 11}}),
[("integer (numeric)", -0.4595323293784402),
("powers of tens", -0.9985288301111273)],
n = 17}}),
("last <day-of-week> of <time>", ("last <day-of-week> of <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357, ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1256,12 +1230,13 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("integer 10", ("integer 10",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.8472978603872037, ClassData{prior = -0.6931471805599453,
unseen = -2.3978952727983707, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9}, likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData = koData =
ClassData{prior = -0.5596157879354228, unseen = -2.639057329615259, ClassData{prior = -0.6931471805599453,
likelihoods = HashMap.fromList [("", 0.0)], n = 12}}), unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9}}),
("from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)", ("from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357, ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1297,28 +1272,26 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}), likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("<hour-of-day> and integer", ("<hour-of-day> and integer",
Classifier{okData = Classifier{okData =
ClassData{prior = -1.3217558399823195, unseen = -2.890371757896165, ClassData{prior = -1.252762968495368, unseen = -2.833213344056216,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("at <time-of-day>integer (13..19)", -2.1400661634962708), [("at <time-of-day>integer (13..19)", -2.0794415416798357),
("at <time-of-day>integer 21..99", -1.7346010553881064), ("at <time-of-day>integer 21..99", -1.6739764335716716),
("hour", -1.2237754316221157), ("hour", -1.1631508098056809),
("at <time-of-day>integer (numeric)", -2.1400661634962708)], ("at <time-of-day>integer (numeric)", -2.0794415416798357)],
n = 4}, n = 4},
koData = koData =
ClassData{prior = -0.3101549283038396, ClassData{prior = -0.3364722366212129, unseen = -3.367295829986474,
unseen = -3.4657359027997265,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("at <time-of-day>integer 5", -2.3353749158170367), [("at <time-of-day>integer 5", -2.2335922215070942),
("at <time-of-day>integer (13..19)", -2.3353749158170367), ("at <time-of-day>integer (13..19)", -2.2335922215070942),
("at <time-of-day>integer 3", -2.740840023925201), ("at <time-of-day>integer 3", -2.639057329615259),
("at <time-of-day>integer 4", -2.740840023925201), ("at <time-of-day>integer (20..90)", -2.639057329615259),
("at <time-of-day>integer (20..90)", -2.740840023925201), ("at <time-of-day>integer 8", -2.2335922215070942),
("at <time-of-day>integer 8", -2.3353749158170367), ("hour", -0.9343092373768334),
("hour", -0.9490805546971459), ("at <time-of-day>integer (numeric)", -2.2335922215070942)],
("at <time-of-day>integer (numeric)", -2.3353749158170367)], n = 10}}),
n = 11}}),
("month (ordinal)", ("month (ordinal)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906, ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1327,19 +1300,13 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("integer 4",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods = HashMap.fromList [("", 0.0)], n = 7}}),
("second (grain) ", ("second (grain) ",
Classifier{okData = Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData = koData =
ClassData{prior = 0.0, unseen = -1.791759469228055, ClassData{prior = -0.2231435513142097, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}), likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
("\1593\1610\1583 \1575\1604\1601\1589\1581", ("\1593\1610\1583 \1575\1604\1601\1589\1581",
Classifier{okData = Classifier{okData =
@ -1459,12 +1426,11 @@ classifiers
n = 17}}), n = 17}}),
("integer 3", ("integer 3",
Classifier{okData = Classifier{okData =
ClassData{prior = -1.0560526742493137, ClassData{prior = 0.0, unseen = -2.3025850929940455,
unseen = -2.3025850929940455,
likelihoods = HashMap.fromList [("", 0.0)], n = 8}, likelihoods = HashMap.fromList [("", 0.0)], n = 8},
koData = koData =
ClassData{prior = -0.4274440148269396, unseen = -2.833213344056216, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [("", 0.0)], n = 15}}), likelihoods = HashMap.fromList [], n = 0}}),
("last <time>", ("last <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = -infinity, unseen = -1.791759469228055, ClassData{prior = -infinity, unseen = -1.791759469228055,
@ -1681,19 +1647,21 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("this|last the <cycle>", ("this|last the <cycle>",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.9444389791664407, ClassData{prior = 0.0, unseen = -3.1354942159291497,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -1.791759469228055), [("week", -1.9924301646902063),
("month (grain)", -2.1972245773362196), ("month (grain)", -2.3978952727983707),
("year (grain)", -2.1972245773362196), ("year (grain)", -2.3978952727983707),
("week (grain)", -1.791759469228055), ("second", -2.3978952727983707),
("day", -2.1972245773362196), ("year", -2.1972245773362196), ("week (grain)", -1.9924301646902063),
("month", -2.1972245773362196), ("day", -2.3978952727983707), ("year", -2.3978952727983707),
("day (grain)", -2.1972245773362196)], ("second (grain) ", -2.3978952727983707),
n = 5}, ("month", -2.3978952727983707),
("day (grain)", -2.3978952727983707)],
n = 6},
koData = koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196, ClassData{prior = -infinity, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("in <named-month>", ("in <named-month>",
Classifier{okData = Classifier{okData =
@ -1759,28 +1727,27 @@ classifiers
n = 3}}), n = 3}}),
("last <cycle> of <time>", ("last <cycle> of <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = -0.5596157879354228, ClassData{prior = -0.40546510810816444,
unseen = -2.9444389791664407, unseen = -2.833213344056216,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("daymonth", -1.791759469228055), [("daymonth", -1.6739764335716716),
("week (grain)intersect by \",\", \"of\", \"from\", \"'s\"", ("week (grain)intersect by \",\", \"of\", \"from\", \"'s\"",
-2.1972245773362196), -2.0794415416798357),
("day (grain)intersect", -2.1972245773362196), ("day (grain)intersect", -2.0794415416798357),
("weekmonth", -1.791759469228055), ("weekmonth", -1.6739764335716716),
("week (grain)September", -2.1972245773362196), ("week (grain)September", -2.0794415416798357),
("day (grain)month (integer)", -2.1972245773362196)], ("day (grain)month (integer)", -2.0794415416798357)],
n = 4}, n = 4},
koData = koData =
ClassData{prior = -0.8472978603872037, unseen = -2.833213344056216, ClassData{prior = -1.0986122886681098,
unseen = -2.5649493574615367,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week (grain)<time> for <duration>", -2.0794415416798357), [("week (grain)<time> for <duration>", -1.791759469228055),
("weekmonth", -1.6739764335716716), ("weekmonth", -1.3862943611198906),
("weekday", -2.0794415416798357), ("week (grain)intersect", -1.791759469228055)],
("week (grain)intersect", -2.0794415416798357), n = 2}}),
("week (grain)Saturday", -2.0794415416798357)],
n = 3}}),
("ordinals (composite, e.g., eighty-seven)", ("ordinals (composite, e.g., eighty-seven)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906, ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1832,19 +1799,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453, ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("for <duration> from <time>",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods =
HashMap.fromList
[("single <unit-of-duration>intersect", -1.252762968495368),
("single <unit-of-duration>in <named-month>",
-1.252762968495368),
("secondmonth", -0.8472978603872037)],
n = 2}}),
("<day-of-week> the last of <time>", ("<day-of-week> the last of <time>",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3978952727983707, ClassData{prior = 0.0, unseen = -2.3978952727983707,
@ -1888,15 +1842,6 @@ classifiers
koData = koData =
ClassData{prior = -infinity, unseen = -1.6094379124341003, ClassData{prior = -infinity, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("compose by multiplication",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods =
HashMap.fromList [("integer (numeric)powers of tens", 0.0)],
n = 1}}),
("<month> dd-dd (interval)", ("<month> dd-dd (interval)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455, ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -1953,7 +1898,7 @@ classifiers
n = 14}}), n = 14}}),
("single <unit-of-duration>", ("single <unit-of-duration>",
Classifier{okData = Classifier{okData =
ClassData{prior = -3.58351893845611, unseen = -3.1354942159291497, ClassData{prior = -3.590439381300684, unseen = -3.1354942159291497,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -1.9924301646902063), [("week", -1.9924301646902063),
@ -1962,24 +1907,23 @@ classifiers
("day (grain)", -1.9924301646902063)], ("day (grain)", -1.9924301646902063)],
n = 4}, n = 4},
koData = koData =
ClassData{prior = -2.8170876966696335e-2, ClassData{prior = -2.797385204240618e-2, unseen = -5.6937321388027,
unseen = -5.68697535633982,
likelihoods = likelihoods =
HashMap.fromList HashMap.fromList
[("week", -2.639057329615259), [("week", -2.6458370166006375),
("month (grain)", -2.249592562853535), ("month (grain)", -2.2563722498389143),
("hour (grain)", -1.9947003132247452), ("hour (grain)", -2.001480000210124),
("year (grain)", -2.793208009442517), ("year (grain)", -2.7999876964278956),
("second", -4.074141854904581), ("second", -3.898599985096005),
("week (grain)", -2.639057329615259), ("week (grain)", -2.6458370166006375),
("day", -2.592537313980366), ("day", -2.5993170009657445),
("minute (grain)", -3.380994674344636), ("minute (grain)", -3.3877743613300146),
("year", -2.793208009442517), ("year", -2.7999876964278956),
("second (grain) ", -4.074141854904581), ("second (grain) ", -3.898599985096005),
("hour", -1.9947003132247452), ("month", -2.249592562853535), ("hour", -2.001480000210124), ("month", -2.2563722498389143),
("minute", -3.380994674344636), ("minute", -3.3877743613300146),
("day (grain)", -2.592537313980366)], ("day (grain)", -2.5993170009657445)],
n = 140}}), n = 141}}),
("dd-dd <month> (interval)", ("dd-dd <month> (interval)",
Classifier{okData = Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455, ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -2005,10 +1949,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}), likelihoods = HashMap.fromList [], n = 0}}),
("August", ("August",
Classifier{okData = Classifier{okData =
ClassData{prior = -2.0794415416798357, ClassData{prior = 0.0, unseen = -1.3862943611198906,
unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}, likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData = koData =
ClassData{prior = -0.13353139262452263, ClassData{prior = -infinity, unseen = -0.6931471805599453,
unseen = -2.772588722239781, likelihoods = HashMap.fromList [], n = 0}})]
likelihoods = HashMap.fromList [("", 0.0)], n = 14}})]

View File

@ -22,19 +22,21 @@ import Duckling.Time.Types hiding (Month)
import Duckling.TimeGrain.Types hiding (add) import Duckling.TimeGrain.Types hiding (add)
import Duckling.Testing.Types hiding (examples) import Duckling.Testing.Types hiding (examples)
context :: Context
context = testContext {locale = makeLocale AR Nothing}
corpus :: Corpus
corpus = (context, testOptions, allExamples)
negativeCorpus :: NegativeCorpus negativeCorpus :: NegativeCorpus
negativeCorpus = (context, testOptions, examples) negativeCorpus = (context, testOptions, examples)
where where
examples = examples =
[ "حب" [ "حب"
, "اياب"
, "ابحث"
] ]
context :: Context
context = testContext {locale = makeLocale AR Nothing}
corpus :: Corpus
corpus = (context, testOptions, allExamples)
allExamples :: [Example] allExamples :: [Example]
allExamples = concat allExamples = concat
[ examples (datetime (2013, 2, 12, 4, 30, 0) Second) [ examples (datetime (2013, 2, 12, 4, 30, 0) Second)

View File

@ -125,9 +125,59 @@ data CharClass
-- the reasonability of the match to actually be a word. -- the reasonability of the match to actually be a word.
isRangeValid :: Lang -> Document -> Int -> Int -> Bool isRangeValid :: Lang -> Document -> Int -> Int -> Bool
isRangeValid = \case isRangeValid = \case
AR -> arIsRangeValid
ZH -> zhIsRangeValid ZH -> zhIsRangeValid
_ -> defaultIsRangeValid _ -> defaultIsRangeValid
where where
arIsRangeValid :: Document -> Int -> Int -> Bool
arIsRangeValid doc start end =
((start == 0 ||
isDifferent (doc ! (start - 1)) (doc ! (start))) &&
(end == length doc ||
isDifferent (doc ! (end - 1)) (doc ! (end)))) ||
-- Is Arabic proclitic?
(start == end - 1 &&
isArabicProclitic (doc ! start) &&
(start == 0 || isDifferent (doc ! (start - 1)) (doc ! start))) ||
(start == end - 2 &&
isArabicProclitic2 (doc ! start) (doc ! (start + 1)) &&
(start == 0 || isDifferent (doc ! (start - 1)) (doc ! start))) ||
-- Is preceeded by proclitic
(start /= 0 && isArabicProclitic (doc ! (start - 1)) &&
(end == length doc ||
isDifferent (doc ! (end - 1)) (doc ! (end)))) ||
-- Is Arabic enclitic?
(start == (end - 2) && isArabicEnclitic (doc ! start) (doc ! (end - 1)) &&
(end == length doc || isDifferent (doc ! end) (doc ! (end + 1)))) ||
-- Is followed by enclitic
((start ==0 || isDifferent (doc ! (start - 1)) (doc ! (start))) &&
(end <= (length doc - 2) &&
isArabicEnclitic (doc ! (end)) (doc ! (end + 1))))
where
-- This list isn't exhasutive since Arabic have some diacritics and rarely used characters in Unicode
isArabic :: Char -> Bool
isArabic c = elem c ['ا', 'ب', 'ت', 'ة', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'ي', 'ء', 'آ', 'أ', 'إ', 'ؤ', 'و', 'ئ', 'ى']
-- TODO: Add all Arabic proclitics
isArabicProclitic :: Char -> Bool
isArabicProclitic c = elem c ['و', 'ف', 'ل', 'ب', 'ك']
isArabicProclitic2 :: Char -> Char -> Bool
isArabicProclitic2 c1 c2 = elem c1 ['ا', 'ل'] && elem c2 ['ل']
-- TODO: Add all Arabic proclitics
isArabicEnclitic :: Char -> Char -> Bool
isArabicEnclitic c1 c2 = elem c1 ['ا', 'ي'] && elem c2 ['ن']
charClass :: Char -> CharClass
charClass c
| Char.isLower c || Char.isUpper c || isArabic c = Alpha
| Char.isDigit c = Digit
| otherwise = Self c
isDifferent :: Char -> Char -> Bool
isDifferent a b = charClass a /= charClass b
zhIsRangeValid :: Document -> Int -> Int -> Bool zhIsRangeValid :: Document -> Int -> Int -> Bool
zhIsRangeValid doc start end = zhIsRangeValid doc start end =
(start == 0 || (start == 0 ||

View File

@ -20,4 +20,5 @@ import Duckling.Testing.Asserts
tests :: TestTree tests :: TestTree
tests = testGroup "AR Tests" tests = testGroup "AR Tests"
[ makeCorpusTest [Seal Quantity] corpus [ makeCorpusTest [Seal Quantity] corpus
, makeNegativeCorpusTest [Seal Time] negativeCorpus
] ]