Add isArabic rule (#577)

Summary:
Fixes https://github.com/facebook/duckling/issues/437, fixes https://github.com/facebook/duckling/issues/571

Pull Request resolved: https://github.com/facebook/duckling/pull/577

Reviewed By: stroxler

Differential Revision: D29664126

Pulled By: chessai

fbshipit-source-id: b6365699231527b0869322c798e32a21328f1071
This commit is contained in:
Amr Keleg 2021-07-12 13:27:56 -07:00 committed by Facebook GitHub Bot
parent ed291c2a3a
commit 79ac8f63f9
7 changed files with 647 additions and 630 deletions

View File

@ -110,6 +110,69 @@ ruleInteger21 = Rule
_ -> Nothing
}
ruleInteger300 :: Rule
ruleInteger300 = Rule
{ name = "integer 300"
, pattern =
[ regex "(ثلاث)ما?[ئي][ةه]"
]
, prod = \_ -> integer 300
}
ruleInteger400 :: Rule
ruleInteger400 = Rule
{ name = "integer 400"
, pattern =
[ regex "([أا]ربع)ما?[ئي][ةه]"
]
, prod = \_ -> integer 400
}
ruleInteger500 :: Rule
ruleInteger500 = Rule
{ name = "integer 500"
, pattern =
[ regex "(خمس)ما?[ئي][ةه]"
]
, prod = \_ -> integer 500
}
ruleInteger600 :: Rule
ruleInteger600 = Rule
{ name = "integer 600"
, pattern =
[ regex "(ست)ما?[ئي][ةه]"
]
, prod = \_ -> integer 600
}
ruleInteger700 :: Rule
ruleInteger700 = Rule
{ name = "integer 700"
, pattern =
[ regex "(سبع)ما?[ئي][ةه]"
]
, prod = \_ -> integer 700
}
ruleInteger800 :: Rule
ruleInteger800 = Rule
{ name = "integer 800"
, pattern =
[ regex "(ثمان[ي]?)ما?[ئي][ةه]"
]
, prod = \_ -> integer 800
}
ruleInteger900 :: Rule
ruleInteger900 = Rule
{ name = "integer 900"
, pattern =
[ regex "(تسع)ما?[ئي][ةه]"
]
, prod = \_ -> integer 900
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
@ -402,6 +465,13 @@ rules =
, ruleInteger5
, ruleInteger7
, ruleInteger9
, ruleInteger300
, ruleInteger400
, ruleInteger500
, ruleInteger600
, ruleInteger700
, ruleInteger800
, ruleInteger900
, ruleIntegerWithThousandsSeparator
, ruleMultiply
, ruleNumeralDotNumeral

View File

@ -9,6 +9,7 @@
module Duckling.Quantity.AR.Corpus
( corpus
, negativeCorpus
) where
import Data.String
@ -19,6 +20,15 @@ import Duckling.Quantity.Types
import Duckling.Resolve
import Duckling.Testing.Types
negativeCorpus :: NegativeCorpus
negativeCorpus = (testContext, testOptions, examples)
where
examples =
[ "جمبري"
, "جمهور"
, "غمامة"
]
corpus :: Corpus
corpus = (testContext {locale = makeLocale AR Nothing}, testOptions, allExamples)

View File

@ -65,13 +65,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("noon|midnight|EOD|end of day",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("<hour-of-day> third",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -119,7 +112,7 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) <named-month>",
Classifier{okData =
ClassData{prior = -0.3101549283038396,
ClassData{prior = -0.2411620568168881,
unseen = -3.4339872044851463,
likelihoods =
HashMap.fromList
@ -133,14 +126,13 @@ classifiers
("integer (numeric)March", -2.70805020110221)],
n = 11},
koData =
ClassData{prior = -1.3217558399823195, unseen = -2.833213344056216,
ClassData{prior = -1.540445040947149, unseen = -2.70805020110221,
likelihoods =
HashMap.fromList
[("integer (numeric)in <named-month>", -2.0794415416798357),
("integer (numeric)August", -2.0794415416798357),
("month", -1.1631508098056809),
("integer (numeric)July", -1.6739764335716716)],
n = 4}}),
[("integer (numeric)in <named-month>", -1.9459101490553135),
("month", -1.252762968495368),
("integer (numeric)July", -1.540445040947149)],
n = 3}}),
("<time> <part-of-day>",
Classifier{okData =
ClassData{prior = -0.41616039722491244,
@ -327,36 +319,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("on <date>",
Classifier{okData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308,
ClassData{prior = -0.6131044728864089,
unseen = -3.7612001156935624,
likelihoods =
HashMap.fromList
[("<day-of-month> (ordinal or number) <named-month>",
-2.5902671654458267),
("September", -2.995732273553991),
-2.639057329615259),
("September", -3.044522437723423),
("second", -3.044522437723423),
("<day-of-month> (ordinal or number) of <named-month>",
-2.995732273553991),
("day", -1.3862943611198906), ("year", -2.5902671654458267),
("part of <named-month>", -2.0794415416798357),
("month", -2.995732273553991),
("year (integer)", -2.5902671654458267),
("this|last the <cycle>", -2.995732273553991),
("day of <named-month>", -2.995732273553991)],
n = 12},
-3.044522437723423),
("day", -1.4350845252893227), ("year", -2.639057329615259),
("part of <named-month>", -2.128231705849268),
("month", -3.044522437723423),
("year (integer)", -2.639057329615259),
("this|last the <cycle>", -2.639057329615259),
("day of <named-month>", -3.044522437723423)],
n = 13},
koData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308,
ClassData{prior = -0.7801585575495751,
unseen = -3.6635616461296463,
likelihoods =
HashMap.fromList
[("September", -2.995732273553991),
("October", -2.995732273553991),
("intersect", -2.0794415416798357),
("Saturday", -2.995732273553991),
("<time> for <duration>", -2.5902671654458267),
("day", -2.995732273553991),
("month (ordinal)", -2.995732273553991),
[("September", -2.9444389791664407),
("October", -2.9444389791664407),
("intersect", -2.0281482472922856),
("<time> for <duration>", -2.538973871058276),
("month (ordinal)", -2.9444389791664407),
("intersect by \",\", \"of\", \"from\", \"'s\"",
-2.5902671654458267),
("month", -1.2039728043259361)],
n = 12}}),
-2.538973871058276),
("month", -1.1526795099383855)],
n = 11}}),
("<hour-of-day> and integer minutes",
Classifier{okData =
ClassData{prior = -0.15415067982725836,
@ -459,166 +452,163 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 39}}),
("intersect",
Classifier{okData =
ClassData{prior = -0.569352963213281, unseen = -5.529429087511423,
ClassData{prior = -0.5458224658030868,
unseen = -5.5134287461649825,
likelihoods =
HashMap.fromList
[("Tuesdayon <date>", -4.832305758571839),
("in <named-month>year", -4.832305758571839),
[("Tuesdayon <date>", -4.816241156068032),
("in <named-month>year", -4.816241156068032),
("<hour-of-day> till thirdafter part of days",
-4.426840650463674),
("dayhour", -3.9160150266976834),
("daymonth", -4.832305758571839),
("in <named-month>on <date>", -4.832305758571839),
("monthyear", -3.3282283617955644),
("Mondayon <date>", -4.832305758571839),
("Christmasyear", -4.832305758571839),
-4.410776047959867),
("dayhour", -3.899950424193877),
("daymonth", -4.816241156068032),
("in <named-month>on <date>", -4.816241156068032),
("monthyear", -3.312163759291758),
("Mondayon <date>", -4.816241156068032),
("Christmasyear", -4.816241156068032),
("Monday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("month (integer)year (integer)", -4.426840650463674),
-4.816241156068032),
("month (integer)year (integer)", -4.410776047959867),
("\1593\1610\1583 \1575\1604\1605\1610\1604\1575\1583year",
-4.832305758571839),
-4.816241156068032),
("<day-of-week> the last of <time><cycle> this|last|next",
-4.832305758571839),
("at <time-of-day>before part of days", -4.139158578011894),
("hh:mmbefore part of days", -4.832305758571839),
-4.816241156068032),
("at <time-of-day>before part of days", -4.123093975508087),
("hh:mmbefore part of days", -4.816241156068032),
("intersect by \",\", \"of\", \"from\", \"'s\"year",
-4.832305758571839),
("intersect<time> <part-of-day>", -4.426840650463674),
("month (ordinal)year (integer)", -4.832305758571839),
-4.816241156068032),
("intersect<time> <part-of-day>", -4.410776047959867),
("month (ordinal)year (integer)", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1589\1581year",
-4.832305758571839),
("dd/mm<time> <part-of-day>", -4.832305758571839),
("Thursday<time> timezone", -4.426840650463674),
-4.816241156068032),
("dd/mm<time> <part-of-day>", -4.816241156068032),
("Thursday<time> timezone", -4.410776047959867),
("<hour-of-day> till quarterthis <part-of-day>",
-4.426840650463674),
("dayday", -3.0405462893437836),
-4.410776047959867),
("dayday", -3.024481686839977),
("absorption of , after named day<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("hourhour", -3.733693469903729),
("month (integer)year", -4.832305758571839),
-4.816241156068032),
("hourhour", -3.717628867399922),
("month (integer)year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1591\1585year",
-4.832305758571839),
-4.816241156068032),
("Thursday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("dayyear", -2.960503581670247),
("Thursday<time> <part-of-day>", -4.832305758571839),
("<cycle> last of <time>year (integer)", -4.426840650463674),
("last <cycle> of <time>year (integer)", -4.832305758571839),
("<hour-of-day> quarterafter part of days", -4.832305758571839),
("minutehour", -2.529720665577793),
-4.816241156068032),
("dayyear", -2.9444389791664407),
("Thursday<time> <part-of-day>", -4.816241156068032),
("<cycle> last of <time>year (integer)", -4.410776047959867),
("last <cycle> of <time>year (integer)", -4.816241156068032),
("<hour-of-day> quarterafter part of days", -4.816241156068032),
("minutehour", -2.513656063073986),
("Thursdayfrom <datetime> - <datetime> (interval)",
-4.832305758571839),
-4.816241156068032),
("\1585\1571\1587 \1575\1604\1587\1606\1577 \1575\1604\1607\1580\1585\1610\1577year",
-4.832305758571839),
("Monday<cycle> this|last|next", -4.832305758571839),
-4.816241156068032),
("Monday<cycle> this|last|next", -4.816241156068032),
("<day-of-month> (ordinal or number) of <named-month>year",
-4.832305758571839),
("<cycle> <ordinal> day? of <time>year", -4.832305758571839),
-4.816241156068032),
("<cycle> <ordinal> day? of <time>year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1571\1590\1581\1609year",
-4.832305758571839),
("on a <named-day>dd/mm", -4.832305758571839),
("on a <named-day>on <date>", -4.832305758571839),
-4.816241156068032),
("on a <named-day>dd/mm", -4.816241156068032),
("on a <named-day>on <date>", -4.816241156068032),
("on a <named-day><day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("Tuesday<cycle> this|last|next", -4.832305758571839),
("dayminute", -3.5795427900764705),
("dd/mmintersect", -4.832305758571839),
("in <named-month><cycle> this|last|next", -4.832305758571839),
-4.816241156068032),
("Tuesday<cycle> this|last|next", -4.816241156068032),
("dayminute", -3.563478187572664),
("dd/mmintersect", -4.816241156068032),
("in <named-month><cycle> this|last|next", -4.816241156068032),
("<hour-of-day> and integer minutesafter part of days",
-4.426840650463674),
("Saturdaydd/mm", -4.832305758571839),
("\1585\1605\1590\1575\1606year", -4.832305758571839),
("the <cycle> the <ordinal> of <time>year", -4.832305758571839),
("at <time-of-day>after part of days", -3.733693469903729),
("<ordinal> <cycle> of <time>year", -4.832305758571839),
("Sunday<cycle> this|last|next", -4.832305758571839),
-4.410776047959867),
("Saturdaydd/mm", -4.816241156068032),
("\1585\1605\1590\1575\1606year", -4.816241156068032),
("the <cycle> the <ordinal> of <time>year", -4.816241156068032),
("at <time-of-day>after part of days", -3.717628867399922),
("<ordinal> <cycle> of <time>year", -4.816241156068032),
("Sunday<cycle> this|last|next", -4.816241156068032),
("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.426840650463674),
("Septemberon <date>", -4.832305758571839),
-4.410776047959867),
("Septemberon <date>", -4.816241156068032),
("from <datetime> - <datetime> (interval)after part of days",
-4.832305758571839),
("intersectintersect", -4.426840650463674),
("dayweek", -4.139158578011894),
("weekyear", -3.9160150266976834),
-4.816241156068032),
("intersectintersect", -4.410776047959867),
("dayweek", -4.123093975508087),
("weekyear", -3.899950424193877),
("<datetime> - <datetime> (interval)after part of days",
-4.832305758571839),
-4.816241156068032),
("Friday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
-4.816241156068032),
("<hour-of-day> till quarterbefore part of days",
-4.426840650463674),
-4.410776047959867),
("<time-of-day> - <time-of-day> (interval)after part of days",
-4.832305758571839),
("at <time-of-day>this <part-of-day>", -4.832305758571839),
-4.816241156068032),
("at <time-of-day>this <part-of-day>", -4.816241156068032),
("<hour-of-day> till thirdbefore part of days",
-4.426840650463674)],
-4.410776047959867)],
n = 73},
koData =
ClassData{prior = -0.8344607136265229, unseen = -5.384495062789089,
ClassData{prior = -0.8659899933993561, unseen = -5.337538079701318,
likelihoods =
HashMap.fromList
[("Tuesdayon <date>", -4.686750172980514),
("dayhour", -3.18267277620424),
("daymonth", -3.5881378843124043),
("monthday", -4.686750172980514),
("monthyear", -2.814947996078923),
("hh:mmafter part of days", -4.686750172980514),
("houryear", -4.28128506487235),
("intersectat <time-of-day>", -3.770459441106359),
("intersect<time> <part-of-day>", -4.28128506487235),
("Octoberon <date>", -4.686750172980514),
("until <time-of-day>after part of days", -4.686750172980514),
("this|last <cycle>on <date>", -4.686750172980514),
("Tuesdayafter <time-of-day>", -4.686750172980514),
("Wednesdayon <date>", -4.28128506487235),
("dd/mm<time> <part-of-day>", -4.686750172980514),
("yearyear", -4.28128506487235),
[("Tuesdayon <date>", -4.639571612705423),
("dayhour", -3.1354942159291497),
("daymonth", -3.5409593240373143),
("monthday", -4.639571612705423),
("monthyear", -2.7677694358038325),
("hh:mmafter part of days", -4.639571612705423),
("houryear", -4.639571612705423),
("intersectat <time-of-day>", -3.7232808808312687),
("intersect<time> <part-of-day>", -4.23410650459726),
("Octoberon <date>", -4.639571612705423),
("until <time-of-day>after part of days", -4.639571612705423),
("Tuesdayafter <time-of-day>", -4.639571612705423),
("Wednesdayon <date>", -4.23410650459726),
("dd/mm<time> <part-of-day>", -4.639571612705423),
("yearyear", -4.23410650459726),
("<hour-of-day> till quarterthis <part-of-day>",
-4.686750172980514),
("dayday", -3.993602992420569),
("dd/mmat <time-of-day>", -4.28128506487235),
("hourhour", -4.28128506487235),
("dayyear", -3.4339872044851463),
("Thursdayat <time-of-day>", -4.28128506487235),
("Februaryyear", -4.686750172980514),
("minutemonth", -4.686750172980514),
("minutehour", -4.28128506487235),
("for <duration> from <time>on <date>", -4.686750172980514),
-4.639571612705423),
("dayday", -3.9464244321454784),
("dd/mmat <time-of-day>", -4.23410650459726),
("hourhour", -4.23410650459726),
("dayyear", -3.386808644210056),
("Thursdayat <time-of-day>", -4.23410650459726),
("Februaryyear", -4.639571612705423),
("minutemonth", -4.639571612705423),
("minutehour", -4.23410650459726),
("<day-of-month> (ordinal or number) of <named-month>year",
-4.28128506487235),
("March<cycle> this|last|next", -4.686750172980514),
("nth <time> of <time>year", -4.686750172980514),
-4.23410650459726),
("March<cycle> this|last|next", -4.639571612705423),
("nth <time> of <time>year", -4.639571612705423),
("Wednesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514),
("secondyear", -4.28128506487235),
-4.639571612705423),
("secondyear", -4.639571612705423),
("Wednesdayintersect by \",\", \"of\", \"from\", \"'s\"",
-4.686750172980514),
("on <date>year (integer)", -4.686750172980514),
("<time> for <duration>year", -3.5881378843124043),
-4.639571612705423),
("on <date>year (integer)", -4.639571612705423),
("<time> for <duration>year", -3.5409593240373143),
("<duration> after|before|from <time>on <date>",
-4.686750172980514),
("dayminute", -3.18267277620424),
("Julyuntil <time-of-day>", -4.686750172980514),
("intersecton <date>", -4.686750172980514),
("at <time-of-day>after part of days", -4.686750172980514),
("Octoberyear", -4.686750172980514),
("in|within|after <duration>year", -4.28128506487235),
-4.639571612705423),
("dayminute", -3.1354942159291497),
("Julyuntil <time-of-day>", -4.639571612705423),
("intersecton <date>", -4.639571612705423),
("at <time-of-day>after part of days", -4.639571612705423),
("Octoberyear", -4.639571612705423),
("in|within|after <duration>year", -4.23410650459726),
("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514),
("on a <named-day>intersect", -4.28128506487235),
("Saturdayintersect", -4.28128506487235),
("at <time-of-day>year", -4.686750172980514),
("on <date>year", -4.28128506487235),
("on <date>on <date>", -4.686750172980514),
("weekday", -4.686750172980514),
("Tuesdaynth <time> of <time>", -4.686750172980514),
("weekyear", -4.686750172980514),
("Tuesdayintersect", -4.686750172980514),
("after <time-of-day>year", -4.686750172980514),
("in <number> (implicit minutes)February", -4.686750172980514),
("last <cycle> of <time>year", -4.686750172980514),
-4.639571612705423),
("on a <named-day>intersect", -4.23410650459726),
("Saturdayintersect", -4.23410650459726),
("on <date>year", -4.23410650459726),
("on <date>on <date>", -4.639571612705423),
("Tuesdaynth <time> of <time>", -4.639571612705423),
("weekyear", -4.639571612705423),
("Tuesdayintersect", -4.639571612705423),
("after <time-of-day>year", -4.639571612705423),
("in <number> (implicit minutes)February", -4.639571612705423),
("last <cycle> of <time>year", -4.639571612705423),
("<day-of-month> (ordinal or number) of <named-month>on <date>",
-4.686750172980514)],
n = 56}}),
-4.639571612705423)],
n = 53}}),
("\1585\1605\1590\1575\1606",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -706,13 +696,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("Saturday",
Classifier{okData =
ClassData{prior = -0.2876820724517809,
unseen = -1.6094379124341003,
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData =
ClassData{prior = -1.3862943611198906,
unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("integer 21..99",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -796,12 +784,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("yesterday",
Classifier{okData =
ClassData{prior = -0.1823215567939546,
unseen = -1.9459101490553135,
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -1.791759469228055, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("hh:mm:ss",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -976,39 +963,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) of <named-month>",
Classifier{okData =
ClassData{prior = -0.6131044728864089,
unseen = -3.7376696182833684,
ClassData{prior = -0.570544858467613, unseen = -3.713572066704308,
likelihoods =
HashMap.fromList
[("ordinals (thirtieth..nineteenth)February",
-2.6149597780361984),
("ordinals (first..tenth)October", -2.6149597780361984),
("ordinals (first..tenth)April", -2.6149597780361984),
("ordinals (first..tenth)February", -3.0204248861443626),
("ordinals (first..tenth)month (integer)", -2.6149597780361984),
("ordinals (first..tenth)March", -2.6149597780361984),
("month", -1.074514737089049),
-2.5902671654458267),
("ordinals (first..tenth)October", -2.5902671654458267),
("ordinals (first..tenth)April", -2.5902671654458267),
("ordinals (first..tenth)February", -2.995732273553991),
("ordinals (first..tenth)month (integer)", -2.5902671654458267),
("ordinals (first..tenth)March", -2.5902671654458267),
("month", -1.0498221244986778),
("ordinals (first..tenth)in <named-month>",
-2.6149597780361984)],
-2.5902671654458267)],
n = 13},
koData =
ClassData{prior = -0.7801585575495751,
unseen = -3.6375861597263857,
ClassData{prior = -0.832909122935104, unseen = -3.5553480614894135,
likelihoods =
HashMap.fromList
[("integer 2in <named-month>", -2.917770732084279),
("integer 10February", -2.512305623976115),
("integer (numeric)month (ordinal)", -2.917770732084279),
("ordinals (first..tenth)month (integer)", -2.917770732084279),
("integer (20..90)in <named-month>", -2.917770732084279),
[("integer 2in <named-month>", -2.833213344056216),
("integer 10February", -2.4277482359480516),
("integer (numeric)month (ordinal)", -2.833213344056216),
("ordinals (first..tenth)month (integer)", -2.833213344056216),
("integer (20..90)in <named-month>", -2.833213344056216),
("ordinals (composite, e.g., eighty-seven)in <named-month>",
-2.917770732084279),
-2.833213344056216),
("ordinals (twenty, thirty..ninety)in <named-month>",
-2.917770732084279),
("month", -1.126011262856224),
("ordinals (first..tenth)in <named-month>", -2.512305623976115),
("ordinals (first..tenth)August", -2.917770732084279)],
n = 11}}),
-2.833213344056216),
("month", -1.128465251817791),
("ordinals (first..tenth)in <named-month>",
-2.4277482359480516)],
n = 10}}),
("this <part-of-day>",
Classifier{okData =
ClassData{prior = -1.0986122886681098,
@ -1045,13 +1030,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [], n = 0}}),
("powers of tens",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6}}),
("Friday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -1199,19 +1177,15 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("year",
Classifier{okData =
ClassData{prior = -0.9932517730102834,
unseen = -2.5649493574615367,
likelihoods =
HashMap.fromList [("integer (numeric)", -8.701137698962981e-2)],
ClassData{prior = -0.7419373447293773,
unseen = -2.4849066497880004,
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
n = 10},
koData =
ClassData{prior = -0.46262352194811296,
unseen = -2.995732273553991,
likelihoods =
HashMap.fromList
[("integer (numeric)", -0.4595323293784402),
("powers of tens", -0.9985288301111273)],
n = 17}}),
ClassData{prior = -0.6466271649250525,
unseen = -2.5649493574615367,
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
n = 11}}),
("last <day-of-week> of <time>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1256,12 +1230,13 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("integer 10",
Classifier{okData =
ClassData{prior = -0.8472978603872037,
ClassData{prior = -0.6931471805599453,
unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData =
ClassData{prior = -0.5596157879354228, unseen = -2.639057329615259,
likelihoods = HashMap.fromList [("", 0.0)], n = 12}}),
ClassData{prior = -0.6931471805599453,
unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9}}),
("from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1297,28 +1272,26 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("<hour-of-day> and integer",
Classifier{okData =
ClassData{prior = -1.3217558399823195, unseen = -2.890371757896165,
ClassData{prior = -1.252762968495368, unseen = -2.833213344056216,
likelihoods =
HashMap.fromList
[("at <time-of-day>integer (13..19)", -2.1400661634962708),
("at <time-of-day>integer 21..99", -1.7346010553881064),
("hour", -1.2237754316221157),
("at <time-of-day>integer (numeric)", -2.1400661634962708)],
[("at <time-of-day>integer (13..19)", -2.0794415416798357),
("at <time-of-day>integer 21..99", -1.6739764335716716),
("hour", -1.1631508098056809),
("at <time-of-day>integer (numeric)", -2.0794415416798357)],
n = 4},
koData =
ClassData{prior = -0.3101549283038396,
unseen = -3.4657359027997265,
ClassData{prior = -0.3364722366212129, unseen = -3.367295829986474,
likelihoods =
HashMap.fromList
[("at <time-of-day>integer 5", -2.3353749158170367),
("at <time-of-day>integer (13..19)", -2.3353749158170367),
("at <time-of-day>integer 3", -2.740840023925201),
("at <time-of-day>integer 4", -2.740840023925201),
("at <time-of-day>integer (20..90)", -2.740840023925201),
("at <time-of-day>integer 8", -2.3353749158170367),
("hour", -0.9490805546971459),
("at <time-of-day>integer (numeric)", -2.3353749158170367)],
n = 11}}),
[("at <time-of-day>integer 5", -2.2335922215070942),
("at <time-of-day>integer (13..19)", -2.2335922215070942),
("at <time-of-day>integer 3", -2.639057329615259),
("at <time-of-day>integer (20..90)", -2.639057329615259),
("at <time-of-day>integer 8", -2.2335922215070942),
("hour", -0.9343092373768334),
("at <time-of-day>integer (numeric)", -2.2335922215070942)],
n = 10}}),
("month (ordinal)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1327,19 +1300,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("integer 4",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods = HashMap.fromList [("", 0.0)], n = 7}}),
("second (grain) ",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
ClassData{prior = -1.6094379124341003,
unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
ClassData{prior = -0.2231435513142097, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
("\1593\1610\1583 \1575\1604\1601\1589\1581",
Classifier{okData =
@ -1459,12 +1426,11 @@ classifiers
n = 17}}),
("integer 3",
Classifier{okData =
ClassData{prior = -1.0560526742493137,
unseen = -2.3025850929940455,
ClassData{prior = 0.0, unseen = -2.3025850929940455,
likelihoods = HashMap.fromList [("", 0.0)], n = 8},
koData =
ClassData{prior = -0.4274440148269396, unseen = -2.833213344056216,
likelihoods = HashMap.fromList [("", 0.0)], n = 15}}),
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("last <time>",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.791759469228055,
@ -1681,19 +1647,21 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("this|last the <cycle>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.9444389791664407,
ClassData{prior = 0.0, unseen = -3.1354942159291497,
likelihoods =
HashMap.fromList
[("week", -1.791759469228055),
("month (grain)", -2.1972245773362196),
("year (grain)", -2.1972245773362196),
("week (grain)", -1.791759469228055),
("day", -2.1972245773362196), ("year", -2.1972245773362196),
("month", -2.1972245773362196),
("day (grain)", -2.1972245773362196)],
n = 5},
[("week", -1.9924301646902063),
("month (grain)", -2.3978952727983707),
("year (grain)", -2.3978952727983707),
("second", -2.3978952727983707),
("week (grain)", -1.9924301646902063),
("day", -2.3978952727983707), ("year", -2.3978952727983707),
("second (grain) ", -2.3978952727983707),
("month", -2.3978952727983707),
("day (grain)", -2.3978952727983707)],
n = 6},
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
ClassData{prior = -infinity, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [], n = 0}}),
("in <named-month>",
Classifier{okData =
@ -1759,28 +1727,27 @@ classifiers
n = 3}}),
("last <cycle> of <time>",
Classifier{okData =
ClassData{prior = -0.5596157879354228,
unseen = -2.9444389791664407,
ClassData{prior = -0.40546510810816444,
unseen = -2.833213344056216,
likelihoods =
HashMap.fromList
[("daymonth", -1.791759469228055),
[("daymonth", -1.6739764335716716),
("week (grain)intersect by \",\", \"of\", \"from\", \"'s\"",
-2.1972245773362196),
("day (grain)intersect", -2.1972245773362196),
("weekmonth", -1.791759469228055),
("week (grain)September", -2.1972245773362196),
("day (grain)month (integer)", -2.1972245773362196)],
-2.0794415416798357),
("day (grain)intersect", -2.0794415416798357),
("weekmonth", -1.6739764335716716),
("week (grain)September", -2.0794415416798357),
("day (grain)month (integer)", -2.0794415416798357)],
n = 4},
koData =
ClassData{prior = -0.8472978603872037, unseen = -2.833213344056216,
ClassData{prior = -1.0986122886681098,
unseen = -2.5649493574615367,
likelihoods =
HashMap.fromList
[("week (grain)<time> for <duration>", -2.0794415416798357),
("weekmonth", -1.6739764335716716),
("weekday", -2.0794415416798357),
("week (grain)intersect", -2.0794415416798357),
("week (grain)Saturday", -2.0794415416798357)],
n = 3}}),
[("week (grain)<time> for <duration>", -1.791759469228055),
("weekmonth", -1.3862943611198906),
("week (grain)intersect", -1.791759469228055)],
n = 2}}),
("ordinals (composite, e.g., eighty-seven)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1832,19 +1799,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("for <duration> from <time>",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods =
HashMap.fromList
[("single <unit-of-duration>intersect", -1.252762968495368),
("single <unit-of-duration>in <named-month>",
-1.252762968495368),
("secondmonth", -0.8472978603872037)],
n = 2}}),
("<day-of-week> the last of <time>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3978952727983707,
@ -1888,15 +1842,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}}),
("compose by multiplication",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods =
HashMap.fromList [("integer (numeric)powers of tens", 0.0)],
n = 1}}),
("<month> dd-dd (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -1953,7 +1898,7 @@ classifiers
n = 14}}),
("single <unit-of-duration>",
Classifier{okData =
ClassData{prior = -3.58351893845611, unseen = -3.1354942159291497,
ClassData{prior = -3.590439381300684, unseen = -3.1354942159291497,
likelihoods =
HashMap.fromList
[("week", -1.9924301646902063),
@ -1962,24 +1907,23 @@ classifiers
("day (grain)", -1.9924301646902063)],
n = 4},
koData =
ClassData{prior = -2.8170876966696335e-2,
unseen = -5.68697535633982,
ClassData{prior = -2.797385204240618e-2, unseen = -5.6937321388027,
likelihoods =
HashMap.fromList
[("week", -2.639057329615259),
("month (grain)", -2.249592562853535),
("hour (grain)", -1.9947003132247452),
("year (grain)", -2.793208009442517),
("second", -4.074141854904581),
("week (grain)", -2.639057329615259),
("day", -2.592537313980366),
("minute (grain)", -3.380994674344636),
("year", -2.793208009442517),
("second (grain) ", -4.074141854904581),
("hour", -1.9947003132247452), ("month", -2.249592562853535),
("minute", -3.380994674344636),
("day (grain)", -2.592537313980366)],
n = 140}}),
[("week", -2.6458370166006375),
("month (grain)", -2.2563722498389143),
("hour (grain)", -2.001480000210124),
("year (grain)", -2.7999876964278956),
("second", -3.898599985096005),
("week (grain)", -2.6458370166006375),
("day", -2.5993170009657445),
("minute (grain)", -3.3877743613300146),
("year", -2.7999876964278956),
("second (grain) ", -3.898599985096005),
("hour", -2.001480000210124), ("month", -2.2563722498389143),
("minute", -3.3877743613300146),
("day (grain)", -2.5993170009657445)],
n = 141}}),
("dd-dd <month> (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -2005,10 +1949,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("August",
Classifier{okData =
ClassData{prior = -2.0794415416798357,
unseen = -1.3862943611198906,
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -0.13353139262452263,
unseen = -2.772588722239781,
likelihoods = HashMap.fromList [("", 0.0)], n = 14}})]
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}})]

View File

@ -65,13 +65,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("noon|midnight|EOD|end of day",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("<hour-of-day> third",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -119,7 +112,7 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) <named-month>",
Classifier{okData =
ClassData{prior = -0.3101549283038396,
ClassData{prior = -0.2411620568168881,
unseen = -3.4339872044851463,
likelihoods =
HashMap.fromList
@ -133,14 +126,13 @@ classifiers
("integer (numeric)March", -2.70805020110221)],
n = 11},
koData =
ClassData{prior = -1.3217558399823195, unseen = -2.833213344056216,
ClassData{prior = -1.540445040947149, unseen = -2.70805020110221,
likelihoods =
HashMap.fromList
[("integer (numeric)in <named-month>", -2.0794415416798357),
("integer (numeric)August", -2.0794415416798357),
("month", -1.1631508098056809),
("integer (numeric)July", -1.6739764335716716)],
n = 4}}),
[("integer (numeric)in <named-month>", -1.9459101490553135),
("month", -1.252762968495368),
("integer (numeric)July", -1.540445040947149)],
n = 3}}),
("<time> <part-of-day>",
Classifier{okData =
ClassData{prior = -0.41616039722491244,
@ -327,36 +319,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("on <date>",
Classifier{okData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308,
ClassData{prior = -0.6131044728864089,
unseen = -3.7612001156935624,
likelihoods =
HashMap.fromList
[("<day-of-month> (ordinal or number) <named-month>",
-2.5902671654458267),
("September", -2.995732273553991),
-2.639057329615259),
("September", -3.044522437723423),
("second", -3.044522437723423),
("<day-of-month> (ordinal or number) of <named-month>",
-2.995732273553991),
("day", -1.3862943611198906), ("year", -2.5902671654458267),
("part of <named-month>", -2.0794415416798357),
("month", -2.995732273553991),
("year (integer)", -2.5902671654458267),
("this|last the <cycle>", -2.995732273553991),
("day of <named-month>", -2.995732273553991)],
n = 12},
-3.044522437723423),
("day", -1.4350845252893227), ("year", -2.639057329615259),
("part of <named-month>", -2.128231705849268),
("month", -3.044522437723423),
("year (integer)", -2.639057329615259),
("this|last the <cycle>", -2.639057329615259),
("day of <named-month>", -3.044522437723423)],
n = 13},
koData =
ClassData{prior = -0.6931471805599453, unseen = -3.713572066704308,
ClassData{prior = -0.7801585575495751,
unseen = -3.6635616461296463,
likelihoods =
HashMap.fromList
[("September", -2.995732273553991),
("October", -2.995732273553991),
("intersect", -2.0794415416798357),
("Saturday", -2.995732273553991),
("<time> for <duration>", -2.5902671654458267),
("day", -2.995732273553991),
("month (ordinal)", -2.995732273553991),
[("September", -2.9444389791664407),
("October", -2.9444389791664407),
("intersect", -2.0281482472922856),
("<time> for <duration>", -2.538973871058276),
("month (ordinal)", -2.9444389791664407),
("intersect by \",\", \"of\", \"from\", \"'s\"",
-2.5902671654458267),
("month", -1.2039728043259361)],
n = 12}}),
-2.538973871058276),
("month", -1.1526795099383855)],
n = 11}}),
("<hour-of-day> and integer minutes",
Classifier{okData =
ClassData{prior = -0.15415067982725836,
@ -459,166 +452,163 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 39}}),
("intersect",
Classifier{okData =
ClassData{prior = -0.569352963213281, unseen = -5.529429087511423,
ClassData{prior = -0.5458224658030868,
unseen = -5.5134287461649825,
likelihoods =
HashMap.fromList
[("Tuesdayon <date>", -4.832305758571839),
("in <named-month>year", -4.832305758571839),
[("Tuesdayon <date>", -4.816241156068032),
("in <named-month>year", -4.816241156068032),
("<hour-of-day> till thirdafter part of days",
-4.426840650463674),
("dayhour", -3.9160150266976834),
("daymonth", -4.832305758571839),
("in <named-month>on <date>", -4.832305758571839),
("monthyear", -3.3282283617955644),
("Mondayon <date>", -4.832305758571839),
("Christmasyear", -4.832305758571839),
-4.410776047959867),
("dayhour", -3.899950424193877),
("daymonth", -4.816241156068032),
("in <named-month>on <date>", -4.816241156068032),
("monthyear", -3.312163759291758),
("Mondayon <date>", -4.816241156068032),
("Christmasyear", -4.816241156068032),
("Monday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("month (integer)year (integer)", -4.426840650463674),
-4.816241156068032),
("month (integer)year (integer)", -4.410776047959867),
("\1593\1610\1583 \1575\1604\1605\1610\1604\1575\1583year",
-4.832305758571839),
-4.816241156068032),
("<day-of-week> the last of <time><cycle> this|last|next",
-4.832305758571839),
("at <time-of-day>before part of days", -4.139158578011894),
("hh:mmbefore part of days", -4.832305758571839),
-4.816241156068032),
("at <time-of-day>before part of days", -4.123093975508087),
("hh:mmbefore part of days", -4.816241156068032),
("intersect by \",\", \"of\", \"from\", \"'s\"year",
-4.832305758571839),
("intersect<time> <part-of-day>", -4.426840650463674),
("month (ordinal)year (integer)", -4.832305758571839),
-4.816241156068032),
("intersect<time> <part-of-day>", -4.410776047959867),
("month (ordinal)year (integer)", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1589\1581year",
-4.832305758571839),
("dd/mm<time> <part-of-day>", -4.832305758571839),
("Thursday<time> timezone", -4.426840650463674),
-4.816241156068032),
("dd/mm<time> <part-of-day>", -4.816241156068032),
("Thursday<time> timezone", -4.410776047959867),
("<hour-of-day> till quarterthis <part-of-day>",
-4.426840650463674),
("dayday", -3.0405462893437836),
-4.410776047959867),
("dayday", -3.024481686839977),
("absorption of , after named day<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("hourhour", -3.733693469903729),
("month (integer)year", -4.832305758571839),
-4.816241156068032),
("hourhour", -3.717628867399922),
("month (integer)year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1601\1591\1585year",
-4.832305758571839),
-4.816241156068032),
("Thursday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("dayyear", -2.960503581670247),
("Thursday<time> <part-of-day>", -4.832305758571839),
("<cycle> last of <time>year (integer)", -4.426840650463674),
("last <cycle> of <time>year (integer)", -4.832305758571839),
("<hour-of-day> quarterafter part of days", -4.832305758571839),
("minutehour", -2.529720665577793),
-4.816241156068032),
("dayyear", -2.9444389791664407),
("Thursday<time> <part-of-day>", -4.816241156068032),
("<cycle> last of <time>year (integer)", -4.410776047959867),
("last <cycle> of <time>year (integer)", -4.816241156068032),
("<hour-of-day> quarterafter part of days", -4.816241156068032),
("minutehour", -2.513656063073986),
("Thursdayfrom <datetime> - <datetime> (interval)",
-4.832305758571839),
-4.816241156068032),
("\1585\1571\1587 \1575\1604\1587\1606\1577 \1575\1604\1607\1580\1585\1610\1577year",
-4.832305758571839),
("Monday<cycle> this|last|next", -4.832305758571839),
-4.816241156068032),
("Monday<cycle> this|last|next", -4.816241156068032),
("<day-of-month> (ordinal or number) of <named-month>year",
-4.832305758571839),
("<cycle> <ordinal> day? of <time>year", -4.832305758571839),
-4.816241156068032),
("<cycle> <ordinal> day? of <time>year", -4.816241156068032),
("\1593\1610\1583 \1575\1604\1571\1590\1581\1609year",
-4.832305758571839),
("on a <named-day>dd/mm", -4.832305758571839),
("on a <named-day>on <date>", -4.832305758571839),
-4.816241156068032),
("on a <named-day>dd/mm", -4.816241156068032),
("on a <named-day>on <date>", -4.816241156068032),
("on a <named-day><day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
("Tuesday<cycle> this|last|next", -4.832305758571839),
("dayminute", -3.5795427900764705),
("dd/mmintersect", -4.832305758571839),
("in <named-month><cycle> this|last|next", -4.832305758571839),
-4.816241156068032),
("Tuesday<cycle> this|last|next", -4.816241156068032),
("dayminute", -3.563478187572664),
("dd/mmintersect", -4.816241156068032),
("in <named-month><cycle> this|last|next", -4.816241156068032),
("<hour-of-day> and integer minutesafter part of days",
-4.426840650463674),
("Saturdaydd/mm", -4.832305758571839),
("\1585\1605\1590\1575\1606year", -4.832305758571839),
("the <cycle> the <ordinal> of <time>year", -4.832305758571839),
("at <time-of-day>after part of days", -3.733693469903729),
("<ordinal> <cycle> of <time>year", -4.832305758571839),
("Sunday<cycle> this|last|next", -4.832305758571839),
-4.410776047959867),
("Saturdaydd/mm", -4.816241156068032),
("\1585\1605\1590\1575\1606year", -4.816241156068032),
("the <cycle> the <ordinal> of <time>year", -4.816241156068032),
("at <time-of-day>after part of days", -3.717628867399922),
("<ordinal> <cycle> of <time>year", -4.816241156068032),
("Sunday<cycle> this|last|next", -4.816241156068032),
("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.426840650463674),
("Septemberon <date>", -4.832305758571839),
-4.410776047959867),
("Septemberon <date>", -4.816241156068032),
("from <datetime> - <datetime> (interval)after part of days",
-4.832305758571839),
("intersectintersect", -4.426840650463674),
("dayweek", -4.139158578011894),
("weekyear", -3.9160150266976834),
-4.816241156068032),
("intersectintersect", -4.410776047959867),
("dayweek", -4.123093975508087),
("weekyear", -3.899950424193877),
("<datetime> - <datetime> (interval)after part of days",
-4.832305758571839),
-4.816241156068032),
("Friday<day-of-month> (ordinal or number) <named-month>",
-4.832305758571839),
-4.816241156068032),
("<hour-of-day> till quarterbefore part of days",
-4.426840650463674),
-4.410776047959867),
("<time-of-day> - <time-of-day> (interval)after part of days",
-4.832305758571839),
("at <time-of-day>this <part-of-day>", -4.832305758571839),
-4.816241156068032),
("at <time-of-day>this <part-of-day>", -4.816241156068032),
("<hour-of-day> till thirdbefore part of days",
-4.426840650463674)],
-4.410776047959867)],
n = 73},
koData =
ClassData{prior = -0.8344607136265229, unseen = -5.384495062789089,
ClassData{prior = -0.8659899933993561, unseen = -5.337538079701318,
likelihoods =
HashMap.fromList
[("Tuesdayon <date>", -4.686750172980514),
("dayhour", -3.18267277620424),
("daymonth", -3.5881378843124043),
("monthday", -4.686750172980514),
("monthyear", -2.814947996078923),
("hh:mmafter part of days", -4.686750172980514),
("houryear", -4.28128506487235),
("intersectat <time-of-day>", -3.770459441106359),
("intersect<time> <part-of-day>", -4.28128506487235),
("Octoberon <date>", -4.686750172980514),
("until <time-of-day>after part of days", -4.686750172980514),
("this|last <cycle>on <date>", -4.686750172980514),
("Tuesdayafter <time-of-day>", -4.686750172980514),
("Wednesdayon <date>", -4.28128506487235),
("dd/mm<time> <part-of-day>", -4.686750172980514),
("yearyear", -4.28128506487235),
[("Tuesdayon <date>", -4.639571612705423),
("dayhour", -3.1354942159291497),
("daymonth", -3.5409593240373143),
("monthday", -4.639571612705423),
("monthyear", -2.7677694358038325),
("hh:mmafter part of days", -4.639571612705423),
("houryear", -4.639571612705423),
("intersectat <time-of-day>", -3.7232808808312687),
("intersect<time> <part-of-day>", -4.23410650459726),
("Octoberon <date>", -4.639571612705423),
("until <time-of-day>after part of days", -4.639571612705423),
("Tuesdayafter <time-of-day>", -4.639571612705423),
("Wednesdayon <date>", -4.23410650459726),
("dd/mm<time> <part-of-day>", -4.639571612705423),
("yearyear", -4.23410650459726),
("<hour-of-day> till quarterthis <part-of-day>",
-4.686750172980514),
("dayday", -3.993602992420569),
("dd/mmat <time-of-day>", -4.28128506487235),
("hourhour", -4.28128506487235),
("dayyear", -3.4339872044851463),
("Thursdayat <time-of-day>", -4.28128506487235),
("Februaryyear", -4.686750172980514),
("minutemonth", -4.686750172980514),
("minutehour", -4.28128506487235),
("for <duration> from <time>on <date>", -4.686750172980514),
-4.639571612705423),
("dayday", -3.9464244321454784),
("dd/mmat <time-of-day>", -4.23410650459726),
("hourhour", -4.23410650459726),
("dayyear", -3.386808644210056),
("Thursdayat <time-of-day>", -4.23410650459726),
("Februaryyear", -4.639571612705423),
("minutemonth", -4.639571612705423),
("minutehour", -4.23410650459726),
("<day-of-month> (ordinal or number) of <named-month>year",
-4.28128506487235),
("March<cycle> this|last|next", -4.686750172980514),
("nth <time> of <time>year", -4.686750172980514),
-4.23410650459726),
("March<cycle> this|last|next", -4.639571612705423),
("nth <time> of <time>year", -4.639571612705423),
("Wednesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514),
("secondyear", -4.28128506487235),
-4.639571612705423),
("secondyear", -4.639571612705423),
("Wednesdayintersect by \",\", \"of\", \"from\", \"'s\"",
-4.686750172980514),
("on <date>year (integer)", -4.686750172980514),
("<time> for <duration>year", -3.5881378843124043),
-4.639571612705423),
("on <date>year (integer)", -4.639571612705423),
("<time> for <duration>year", -3.5409593240373143),
("<duration> after|before|from <time>on <date>",
-4.686750172980514),
("dayminute", -3.18267277620424),
("Julyuntil <time-of-day>", -4.686750172980514),
("intersecton <date>", -4.686750172980514),
("at <time-of-day>after part of days", -4.686750172980514),
("Octoberyear", -4.686750172980514),
("in|within|after <duration>year", -4.28128506487235),
-4.639571612705423),
("dayminute", -3.1354942159291497),
("Julyuntil <time-of-day>", -4.639571612705423),
("intersecton <date>", -4.639571612705423),
("at <time-of-day>after part of days", -4.639571612705423),
("Octoberyear", -4.639571612705423),
("in|within|after <duration>year", -4.23410650459726),
("Tuesday<day-of-month> (ordinal or number) of <named-month>",
-4.686750172980514),
("on a <named-day>intersect", -4.28128506487235),
("Saturdayintersect", -4.28128506487235),
("at <time-of-day>year", -4.686750172980514),
("on <date>year", -4.28128506487235),
("on <date>on <date>", -4.686750172980514),
("weekday", -4.686750172980514),
("Tuesdaynth <time> of <time>", -4.686750172980514),
("weekyear", -4.686750172980514),
("Tuesdayintersect", -4.686750172980514),
("after <time-of-day>year", -4.686750172980514),
("in <number> (implicit minutes)February", -4.686750172980514),
("last <cycle> of <time>year", -4.686750172980514),
-4.639571612705423),
("on a <named-day>intersect", -4.23410650459726),
("Saturdayintersect", -4.23410650459726),
("on <date>year", -4.23410650459726),
("on <date>on <date>", -4.639571612705423),
("Tuesdaynth <time> of <time>", -4.639571612705423),
("weekyear", -4.639571612705423),
("Tuesdayintersect", -4.639571612705423),
("after <time-of-day>year", -4.639571612705423),
("in <number> (implicit minutes)February", -4.639571612705423),
("last <cycle> of <time>year", -4.639571612705423),
("<day-of-month> (ordinal or number) of <named-month>on <date>",
-4.686750172980514)],
n = 56}}),
-4.639571612705423)],
n = 53}}),
("\1585\1605\1590\1575\1606",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -706,13 +696,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("Saturday",
Classifier{okData =
ClassData{prior = -0.2876820724517809,
unseen = -1.6094379124341003,
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData =
ClassData{prior = -1.3862943611198906,
unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("integer 21..99",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -796,12 +784,11 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("yesterday",
Classifier{okData =
ClassData{prior = -0.1823215567939546,
unseen = -1.9459101490553135,
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -1.791759469228055, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("hh:mm:ss",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
@ -976,39 +963,37 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) of <named-month>",
Classifier{okData =
ClassData{prior = -0.6131044728864089,
unseen = -3.7376696182833684,
ClassData{prior = -0.570544858467613, unseen = -3.713572066704308,
likelihoods =
HashMap.fromList
[("ordinals (thirtieth..nineteenth)February",
-2.6149597780361984),
("ordinals (first..tenth)October", -2.6149597780361984),
("ordinals (first..tenth)April", -2.6149597780361984),
("ordinals (first..tenth)February", -3.0204248861443626),
("ordinals (first..tenth)month (integer)", -2.6149597780361984),
("ordinals (first..tenth)March", -2.6149597780361984),
("month", -1.074514737089049),
-2.5902671654458267),
("ordinals (first..tenth)October", -2.5902671654458267),
("ordinals (first..tenth)April", -2.5902671654458267),
("ordinals (first..tenth)February", -2.995732273553991),
("ordinals (first..tenth)month (integer)", -2.5902671654458267),
("ordinals (first..tenth)March", -2.5902671654458267),
("month", -1.0498221244986778),
("ordinals (first..tenth)in <named-month>",
-2.6149597780361984)],
-2.5902671654458267)],
n = 13},
koData =
ClassData{prior = -0.7801585575495751,
unseen = -3.6375861597263857,
ClassData{prior = -0.832909122935104, unseen = -3.5553480614894135,
likelihoods =
HashMap.fromList
[("integer 2in <named-month>", -2.917770732084279),
("integer 10February", -2.512305623976115),
("integer (numeric)month (ordinal)", -2.917770732084279),
("ordinals (first..tenth)month (integer)", -2.917770732084279),
("integer (20..90)in <named-month>", -2.917770732084279),
[("integer 2in <named-month>", -2.833213344056216),
("integer 10February", -2.4277482359480516),
("integer (numeric)month (ordinal)", -2.833213344056216),
("ordinals (first..tenth)month (integer)", -2.833213344056216),
("integer (20..90)in <named-month>", -2.833213344056216),
("ordinals (composite, e.g., eighty-seven)in <named-month>",
-2.917770732084279),
-2.833213344056216),
("ordinals (twenty, thirty..ninety)in <named-month>",
-2.917770732084279),
("month", -1.126011262856224),
("ordinals (first..tenth)in <named-month>", -2.512305623976115),
("ordinals (first..tenth)August", -2.917770732084279)],
n = 11}}),
-2.833213344056216),
("month", -1.128465251817791),
("ordinals (first..tenth)in <named-month>",
-2.4277482359480516)],
n = 10}}),
("this <part-of-day>",
Classifier{okData =
ClassData{prior = -1.0986122886681098,
@ -1045,13 +1030,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [], n = 0}}),
("powers of tens",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6}}),
("Friday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -1199,19 +1177,15 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("year",
Classifier{okData =
ClassData{prior = -0.9932517730102834,
unseen = -2.5649493574615367,
likelihoods =
HashMap.fromList [("integer (numeric)", -8.701137698962981e-2)],
ClassData{prior = -0.7419373447293773,
unseen = -2.4849066497880004,
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
n = 10},
koData =
ClassData{prior = -0.46262352194811296,
unseen = -2.995732273553991,
likelihoods =
HashMap.fromList
[("integer (numeric)", -0.4595323293784402),
("powers of tens", -0.9985288301111273)],
n = 17}}),
ClassData{prior = -0.6466271649250525,
unseen = -2.5649493574615367,
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
n = 11}}),
("last <day-of-week> of <time>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1256,12 +1230,13 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("integer 10",
Classifier{okData =
ClassData{prior = -0.8472978603872037,
ClassData{prior = -0.6931471805599453,
unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData =
ClassData{prior = -0.5596157879354228, unseen = -2.639057329615259,
likelihoods = HashMap.fromList [("", 0.0)], n = 12}}),
ClassData{prior = -0.6931471805599453,
unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9}}),
("from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1297,28 +1272,26 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("<hour-of-day> and integer",
Classifier{okData =
ClassData{prior = -1.3217558399823195, unseen = -2.890371757896165,
ClassData{prior = -1.252762968495368, unseen = -2.833213344056216,
likelihoods =
HashMap.fromList
[("at <time-of-day>integer (13..19)", -2.1400661634962708),
("at <time-of-day>integer 21..99", -1.7346010553881064),
("hour", -1.2237754316221157),
("at <time-of-day>integer (numeric)", -2.1400661634962708)],
[("at <time-of-day>integer (13..19)", -2.0794415416798357),
("at <time-of-day>integer 21..99", -1.6739764335716716),
("hour", -1.1631508098056809),
("at <time-of-day>integer (numeric)", -2.0794415416798357)],
n = 4},
koData =
ClassData{prior = -0.3101549283038396,
unseen = -3.4657359027997265,
ClassData{prior = -0.3364722366212129, unseen = -3.367295829986474,
likelihoods =
HashMap.fromList
[("at <time-of-day>integer 5", -2.3353749158170367),
("at <time-of-day>integer (13..19)", -2.3353749158170367),
("at <time-of-day>integer 3", -2.740840023925201),
("at <time-of-day>integer 4", -2.740840023925201),
("at <time-of-day>integer (20..90)", -2.740840023925201),
("at <time-of-day>integer 8", -2.3353749158170367),
("hour", -0.9490805546971459),
("at <time-of-day>integer (numeric)", -2.3353749158170367)],
n = 11}}),
[("at <time-of-day>integer 5", -2.2335922215070942),
("at <time-of-day>integer (13..19)", -2.2335922215070942),
("at <time-of-day>integer 3", -2.639057329615259),
("at <time-of-day>integer (20..90)", -2.639057329615259),
("at <time-of-day>integer 8", -2.2335922215070942),
("hour", -0.9343092373768334),
("at <time-of-day>integer (numeric)", -2.2335922215070942)],
n = 10}}),
("month (ordinal)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1327,19 +1300,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("integer 4",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods = HashMap.fromList [("", 0.0)], n = 7}}),
("second (grain) ",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
ClassData{prior = -1.6094379124341003,
unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
ClassData{prior = -0.2231435513142097, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
("\1593\1610\1583 \1575\1604\1601\1589\1581",
Classifier{okData =
@ -1459,12 +1426,11 @@ classifiers
n = 17}}),
("integer 3",
Classifier{okData =
ClassData{prior = -1.0560526742493137,
unseen = -2.3025850929940455,
ClassData{prior = 0.0, unseen = -2.3025850929940455,
likelihoods = HashMap.fromList [("", 0.0)], n = 8},
koData =
ClassData{prior = -0.4274440148269396, unseen = -2.833213344056216,
likelihoods = HashMap.fromList [("", 0.0)], n = 15}}),
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("last <time>",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.791759469228055,
@ -1681,19 +1647,21 @@ classifiers
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("this|last the <cycle>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.9444389791664407,
ClassData{prior = 0.0, unseen = -3.1354942159291497,
likelihoods =
HashMap.fromList
[("week", -1.791759469228055),
("month (grain)", -2.1972245773362196),
("year (grain)", -2.1972245773362196),
("week (grain)", -1.791759469228055),
("day", -2.1972245773362196), ("year", -2.1972245773362196),
("month", -2.1972245773362196),
("day (grain)", -2.1972245773362196)],
n = 5},
[("week", -1.9924301646902063),
("month (grain)", -2.3978952727983707),
("year (grain)", -2.3978952727983707),
("second", -2.3978952727983707),
("week (grain)", -1.9924301646902063),
("day", -2.3978952727983707), ("year", -2.3978952727983707),
("second (grain) ", -2.3978952727983707),
("month", -2.3978952727983707),
("day (grain)", -2.3978952727983707)],
n = 6},
koData =
ClassData{prior = -infinity, unseen = -2.1972245773362196,
ClassData{prior = -infinity, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [], n = 0}}),
("in <named-month>",
Classifier{okData =
@ -1759,28 +1727,27 @@ classifiers
n = 3}}),
("last <cycle> of <time>",
Classifier{okData =
ClassData{prior = -0.5596157879354228,
unseen = -2.9444389791664407,
ClassData{prior = -0.40546510810816444,
unseen = -2.833213344056216,
likelihoods =
HashMap.fromList
[("daymonth", -1.791759469228055),
[("daymonth", -1.6739764335716716),
("week (grain)intersect by \",\", \"of\", \"from\", \"'s\"",
-2.1972245773362196),
("day (grain)intersect", -2.1972245773362196),
("weekmonth", -1.791759469228055),
("week (grain)September", -2.1972245773362196),
("day (grain)month (integer)", -2.1972245773362196)],
-2.0794415416798357),
("day (grain)intersect", -2.0794415416798357),
("weekmonth", -1.6739764335716716),
("week (grain)September", -2.0794415416798357),
("day (grain)month (integer)", -2.0794415416798357)],
n = 4},
koData =
ClassData{prior = -0.8472978603872037, unseen = -2.833213344056216,
ClassData{prior = -1.0986122886681098,
unseen = -2.5649493574615367,
likelihoods =
HashMap.fromList
[("week (grain)<time> for <duration>", -2.0794415416798357),
("weekmonth", -1.6739764335716716),
("weekday", -2.0794415416798357),
("week (grain)intersect", -2.0794415416798357),
("week (grain)Saturday", -2.0794415416798357)],
n = 3}}),
[("week (grain)<time> for <duration>", -1.791759469228055),
("weekmonth", -1.3862943611198906),
("week (grain)intersect", -1.791759469228055)],
n = 2}}),
("ordinals (composite, e.g., eighty-seven)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -1832,19 +1799,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("for <duration> from <time>",
Classifier{okData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods =
HashMap.fromList
[("single <unit-of-duration>intersect", -1.252762968495368),
("single <unit-of-duration>in <named-month>",
-1.252762968495368),
("secondmonth", -0.8472978603872037)],
n = 2}}),
("<day-of-week> the last of <time>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3978952727983707,
@ -1888,15 +1842,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}}),
("compose by multiplication",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods =
HashMap.fromList [("integer (numeric)powers of tens", 0.0)],
n = 1}}),
("<month> dd-dd (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -1953,7 +1898,7 @@ classifiers
n = 14}}),
("single <unit-of-duration>",
Classifier{okData =
ClassData{prior = -3.58351893845611, unseen = -3.1354942159291497,
ClassData{prior = -3.590439381300684, unseen = -3.1354942159291497,
likelihoods =
HashMap.fromList
[("week", -1.9924301646902063),
@ -1962,24 +1907,23 @@ classifiers
("day (grain)", -1.9924301646902063)],
n = 4},
koData =
ClassData{prior = -2.8170876966696335e-2,
unseen = -5.68697535633982,
ClassData{prior = -2.797385204240618e-2, unseen = -5.6937321388027,
likelihoods =
HashMap.fromList
[("week", -2.639057329615259),
("month (grain)", -2.249592562853535),
("hour (grain)", -1.9947003132247452),
("year (grain)", -2.793208009442517),
("second", -4.074141854904581),
("week (grain)", -2.639057329615259),
("day", -2.592537313980366),
("minute (grain)", -3.380994674344636),
("year", -2.793208009442517),
("second (grain) ", -4.074141854904581),
("hour", -1.9947003132247452), ("month", -2.249592562853535),
("minute", -3.380994674344636),
("day (grain)", -2.592537313980366)],
n = 140}}),
[("week", -2.6458370166006375),
("month (grain)", -2.2563722498389143),
("hour (grain)", -2.001480000210124),
("year (grain)", -2.7999876964278956),
("second", -3.898599985096005),
("week (grain)", -2.6458370166006375),
("day", -2.5993170009657445),
("minute (grain)", -3.3877743613300146),
("year", -2.7999876964278956),
("second (grain) ", -3.898599985096005),
("hour", -2.001480000210124), ("month", -2.2563722498389143),
("minute", -3.3877743613300146),
("day (grain)", -2.5993170009657445)],
n = 141}}),
("dd-dd <month> (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3025850929940455,
@ -2005,10 +1949,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("August",
Classifier{okData =
ClassData{prior = -2.0794415416798357,
unseen = -1.3862943611198906,
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -0.13353139262452263,
unseen = -2.772588722239781,
likelihoods = HashMap.fromList [("", 0.0)], n = 14}})]
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}})]

View File

@ -22,19 +22,21 @@ import Duckling.Time.Types hiding (Month)
import Duckling.TimeGrain.Types hiding (add)
import Duckling.Testing.Types hiding (examples)
context :: Context
context = testContext {locale = makeLocale AR Nothing}
corpus :: Corpus
corpus = (context, testOptions, allExamples)
negativeCorpus :: NegativeCorpus
negativeCorpus = (context, testOptions, examples)
where
examples =
[ "حب"
, "اياب"
, "ابحث"
]
context :: Context
context = testContext {locale = makeLocale AR Nothing}
corpus :: Corpus
corpus = (context, testOptions, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (datetime (2013, 2, 12, 4, 30, 0) Second)

View File

@ -125,9 +125,59 @@ data CharClass
-- the reasonability of the match to actually be a word.
isRangeValid :: Lang -> Document -> Int -> Int -> Bool
isRangeValid = \case
AR -> arIsRangeValid
ZH -> zhIsRangeValid
_ -> defaultIsRangeValid
where
arIsRangeValid :: Document -> Int -> Int -> Bool
arIsRangeValid doc start end =
((start == 0 ||
isDifferent (doc ! (start - 1)) (doc ! (start))) &&
(end == length doc ||
isDifferent (doc ! (end - 1)) (doc ! (end)))) ||
-- Is Arabic proclitic?
(start == end - 1 &&
isArabicProclitic (doc ! start) &&
(start == 0 || isDifferent (doc ! (start - 1)) (doc ! start))) ||
(start == end - 2 &&
isArabicProclitic2 (doc ! start) (doc ! (start + 1)) &&
(start == 0 || isDifferent (doc ! (start - 1)) (doc ! start))) ||
-- Is preceeded by proclitic
(start /= 0 && isArabicProclitic (doc ! (start - 1)) &&
(end == length doc ||
isDifferent (doc ! (end - 1)) (doc ! (end)))) ||
-- Is Arabic enclitic?
(start == (end - 2) && isArabicEnclitic (doc ! start) (doc ! (end - 1)) &&
(end == length doc || isDifferent (doc ! end) (doc ! (end + 1)))) ||
-- Is followed by enclitic
((start ==0 || isDifferent (doc ! (start - 1)) (doc ! (start))) &&
(end <= (length doc - 2) &&
isArabicEnclitic (doc ! (end)) (doc ! (end + 1))))
where
-- This list isn't exhasutive since Arabic have some diacritics and rarely used characters in Unicode
isArabic :: Char -> Bool
isArabic c = elem c ['ا', 'ب', 'ت', 'ة', 'ث', 'ج', 'ح', 'خ', 'د', 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 'ي', 'ء', 'آ', 'أ', 'إ', 'ؤ', 'و', 'ئ', 'ى']
-- TODO: Add all Arabic proclitics
isArabicProclitic :: Char -> Bool
isArabicProclitic c = elem c ['و', 'ف', 'ل', 'ب', 'ك']
isArabicProclitic2 :: Char -> Char -> Bool
isArabicProclitic2 c1 c2 = elem c1 ['ا', 'ل'] && elem c2 ['ل']
-- TODO: Add all Arabic proclitics
isArabicEnclitic :: Char -> Char -> Bool
isArabicEnclitic c1 c2 = elem c1 ['ا', 'ي'] && elem c2 ['ن']
charClass :: Char -> CharClass
charClass c
| Char.isLower c || Char.isUpper c || isArabic c = Alpha
| Char.isDigit c = Digit
| otherwise = Self c
isDifferent :: Char -> Char -> Bool
isDifferent a b = charClass a /= charClass b
zhIsRangeValid :: Document -> Int -> Int -> Bool
zhIsRangeValid doc start end =
(start == 0 ||

View File

@ -20,4 +20,5 @@ import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "AR Tests"
[ makeCorpusTest [Seal Quantity] corpus
, makeNegativeCorpusTest [Seal Time] negativeCorpus
]