Add support for parsing day intervals beginning with from "from 10 to 16 August"

Summary: Added EN rule "ruleIntervalFromDDDDMonth" to support "from 10 to 16 August". Used "isDOMValue" helper rather than regex.

Reviewed By: patapizza

Differential Revision: D5610623

fbshipit-source-id: 00a5208
This commit is contained in:
Hiten Parmar 2017-08-12 01:52:48 -07:00 committed by Facebook Github Bot
parent 4c348b1b9d
commit be113689ac
3 changed files with 286 additions and 223 deletions

View File

@ -23,7 +23,7 @@ classifiers
= HashMap.fromList
[("<integer> to|till|before <hour-of-day>",
Classifier{okData =
ClassData{prior = -1.791759469228055, unseen = -1.791759469228055,
ClassData{prior = -1.9459101490553135, unseen = -1.791759469228055,
likelihoods =
HashMap.fromList
[("integer (numeric)noon|midnight|EOD|end of day",
@ -31,12 +31,13 @@ classifiers
("hour", -0.916290731874155)],
n = 1},
koData =
ClassData{prior = -0.1823215567939546, unseen = -2.639057329615259,
ClassData{prior = -0.15415067982725836,
unseen = -2.772588722239781,
likelihoods =
HashMap.fromList
[("hour", -0.7731898882334817),
("integer (numeric)time-of-day (latent)", -0.7731898882334817)],
n = 5}}),
[("hour", -0.7621400520468967),
("integer (numeric)time-of-day (latent)", -0.7621400520468967)],
n = 6}}),
("<time> timezone",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004,
@ -59,11 +60,12 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("integer (numeric)",
Classifier{okData =
ClassData{prior = -0.845487905141964, unseen = -5.075173815233827,
likelihoods = HashMap.fromList [("", 0.0)], n = 158},
ClassData{prior = -0.8437200390393196,
unseen = -5.0875963352323845,
likelihoods = HashMap.fromList [("", 0.0)], n = 160},
koData =
ClassData{prior = -0.5609754074514622, unseen = -5.356586274672012,
likelihoods = HashMap.fromList [("", 0.0)], n = 210}}),
ClassData{prior = -0.562307579601134, unseen = -5.365976015021851,
likelihoods = HashMap.fromList [("", 0.0)], n = 212}}),
("<duration> hence|ago",
Classifier{okData =
ClassData{prior = 0.0, unseen = -3.784189633918261,
@ -159,23 +161,25 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal or number) <named-month>",
Classifier{okData =
ClassData{prior = -0.6931471805599453, unseen = -2.639057329615259,
ClassData{prior = -0.9808292530117262, unseen = -2.70805020110221,
likelihoods =
HashMap.fromList
[("ordinal (digits)December", -1.8718021769015913),
("ordinal (digits)February", -1.8718021769015913),
("integer (numeric)April", -1.8718021769015913),
("month", -1.1786549963416462)],
[("ordinal (digits)December", -1.9459101490553135),
("ordinal (digits)February", -1.9459101490553135),
("integer (numeric)April", -1.9459101490553135),
("month", -1.252762968495368)],
n = 3},
koData =
ClassData{prior = -0.6931471805599453, unseen = -2.639057329615259,
ClassData{prior = -0.4700036292457356,
unseen = -2.9444389791664407,
likelihoods =
HashMap.fromList
[("integer (numeric)August", -1.8718021769015913),
("ordinal (digits)April", -1.8718021769015913),
("month", -1.1786549963416462),
("integer (numeric)July", -1.8718021769015913)],
n = 3}}),
[("ordinal (digits)July", -2.1972245773362196),
("integer (numeric)August", -2.1972245773362196),
("ordinal (digits)April", -2.1972245773362196),
("month", -1.0986122886681098),
("integer (numeric)July", -1.791759469228055)],
n = 5}}),
("<time> <part-of-day>",
Classifier{okData =
ClassData{prior = -9.844007281325252e-2,
@ -487,8 +491,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("July",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.70805020110221,
likelihoods = HashMap.fromList [("", 0.0)], n = 13},
ClassData{prior = 0.0, unseen = -2.833213344056216,
likelihoods = HashMap.fromList [("", 0.0)], n = 15},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
@ -529,224 +533,232 @@ classifiers
n = 3}}),
("intersect",
Classifier{okData =
ClassData{prior = -0.4546243440662479, unseen = -6.159095388491933,
ClassData{prior = -0.4634795031687923, unseen = -6.169610732491456,
likelihoods =
HashMap.fromList
[("<datetime> - <datetime> (interval)on <date>",
-4.547541073151455),
-4.558078578454241),
("<time-of-day> - <time-of-day> (interval)on <date>",
-4.547541073151455),
("hourday", -3.759083712787185),
("dayhour", -3.212540006419115),
("daymonth", -5.058366696917446),
("monthday", -5.058366696917446),
("monthyear", -3.8543938925915096),
("Tuesdaythe <day-of-month> (ordinal)", -5.4638318050256105),
("Christmasyear", -5.4638318050256105),
("houryear", -5.4638318050256105),
-4.558078578454241),
("hourday", -3.769621218089971),
("dayhour", -3.223077511721901),
("daymonth", -4.781222129768451),
("monthday", -5.0689042022202315),
("monthyear", -3.8649313978942956),
("Tuesdaythe <day-of-month> (ordinal)", -5.474369310328396),
("Christmasyear", -5.474369310328396),
("from <datetime> - <datetime> (interval)July",
-5.474369310328396),
("houryear", -5.474369310328396),
("this|next <day-of-week>hh(:mm) - <time-of-day> am|pm",
-5.4638318050256105),
-5.474369310328396),
("<time-of-day> am|pmintersect by \",\", \"of\", \"from\", \"'s\"",
-4.770684624465665),
("<time-of-day> am|pmintersect", -4.211068836530242),
-4.781222129768451),
("<time-of-day> am|pmintersect", -4.221606341833028),
("intersect by \",\", \"of\", \"from\", \"'s\"year",
-5.4638318050256105),
("Marchyear", -5.4638318050256105),
-5.474369310328396),
("Marchyear", -5.474369310328396),
("<named-month>|<named-day> <day-of-month> (ordinal)year",
-5.058366696917446),
("intersect<time-of-day> am|pm", -5.4638318050256105),
("Thursdayhh(:mm) - <time-of-day> am|pm", -5.4638318050256105),
("monthhour", -5.058366696917446),
("last <day-of-week> of <time>year", -5.4638318050256105),
("todayat <time-of-day>", -5.4638318050256105),
("Thursday<time> timezone", -5.058366696917446),
-5.0689042022202315),
("intersect<time-of-day> am|pm", -5.474369310328396),
("Thursdayhh(:mm) - <time-of-day> am|pm", -5.474369310328396),
("monthhour", -5.0689042022202315),
("last <day-of-week> of <time>year", -5.474369310328396),
("todayat <time-of-day>", -5.474369310328396),
("Thursday<time> timezone", -5.0689042022202315),
("this <time>hh(:mm) - <time-of-day> am|pm",
-5.4638318050256105),
("dayday", -3.4489287844833454),
("Thanksgiving Dayyear", -4.3652195163575005),
("<time> <part-of-day>at <time-of-day>", -5.4638318050256105),
("Tuesdayin <named-month>", -5.4638318050256105),
("mm/ddat <time-of-day>", -5.4638318050256105),
("tonightat <time-of-day>", -5.4638318050256105),
-5.474369310328396),
("dayday", -3.459466289786131),
("Thanksgiving Dayyear", -4.375757021660286),
("<time> <part-of-day>at <time-of-day>", -5.474369310328396),
("Tuesdayin <named-month>", -5.474369310328396),
("mm/ddat <time-of-day>", -5.474369310328396),
("tonightat <time-of-day>", -5.474369310328396),
("<time-of-day> am|pmabsorption of , after named day",
-4.770684624465665),
("today<time-of-day> am|pm", -5.4638318050256105),
("Februarythe <day-of-month> (ordinal)", -5.058366696917446),
("at <time-of-day><time> <part-of-day>", -5.4638318050256105),
("mm/dd<time-of-day> am|pm", -5.4638318050256105),
("hourhour", -4.211068836530242),
("<time-of-day> am|pmon <date>", -3.4489287844833454),
("Wednesdaythis|last|next <cycle>", -5.4638318050256105),
-4.781222129768451),
("today<time-of-day> am|pm", -5.474369310328396),
("Februarythe <day-of-month> (ordinal)", -5.0689042022202315),
("at <time-of-day><time> <part-of-day>", -5.474369310328396),
("mm/dd<time-of-day> am|pm", -5.474369310328396),
("hourhour", -4.221606341833028),
("<time-of-day> am|pmon <date>", -3.459466289786131),
("Wednesdaythis|last|next <cycle>", -5.474369310328396),
("intersect<named-month> <day-of-month> (non ordinal)",
-3.959754408249336),
("dayyear", -3.0214847696564058),
("last weekend of <named-month>year", -5.4638318050256105),
-3.970291913552122),
("dayyear", -3.0320222749591914),
("last weekend of <named-month>year", -5.474369310328396),
("<time-of-day> o'clockin|during the <part-of-day>",
-5.4638318050256105),
("<time-of-day> am|pmtomorrow", -4.770684624465665),
("minutehour", -4.547541073151455),
("Mother's Dayyear", -5.4638318050256105),
-5.474369310328396),
("<time-of-day> am|pmtomorrow", -4.781222129768451),
("minutehour", -4.558078578454241),
("Mother's Dayyear", -5.474369310328396),
("at <time-of-day>in|during the <part-of-day>",
-5.058366696917446),
-5.0689042022202315),
("absorption of , after named day<named-month> <day-of-month> (non ordinal)",
-3.759083712787185),
("for <duration> from <time>December", -5.4638318050256105),
("tomorrow<time-of-day> sharp|exactly", -5.4638318050256105),
-3.769621218089971),
("for <duration> from <time>December", -5.474369310328396),
("tomorrow<time-of-day> sharp|exactly", -5.474369310328396),
("Thursdayfrom <datetime> - <datetime> (interval)",
-4.547541073151455),
-4.558078578454241),
("on <date><named-month> <day-of-month> (non ordinal)",
-5.058366696917446),
-5.0689042022202315),
("Thursdayfrom <time-of-day> - <time-of-day> (interval)",
-4.547541073151455),
("Mondayin|during the <part-of-day>", -5.4638318050256105),
-4.558078578454241),
("Mondayin|during the <part-of-day>", -5.474369310328396),
("tomorrowfrom <time-of-day> - <time-of-day> (interval)",
-5.058366696917446),
("intersectin|during the <part-of-day>", -5.4638318050256105),
-5.0689042022202315),
("intersectin|during the <part-of-day>", -5.474369310328396),
("<day-of-month> (ordinal or number) of <named-month>in|during the <part-of-day>",
-5.4638318050256105),
-5.474369310328396),
("from <time-of-day> - <time-of-day> (interval)on <date>",
-4.770684624465665),
-4.781222129768451),
("intersect by \",\", \"of\", \"from\", \"'s\"<time-of-day> am|pm",
-4.547541073151455),
("at <time-of-day>intersect", -5.058366696917446),
-4.558078578454241),
("at <time-of-day>intersect", -5.0689042022202315),
("<time-of-day> - <time-of-day> (interval)tomorrow",
-5.4638318050256105),
-5.474369310328396),
("at <time-of-day>intersect by \",\", \"of\", \"from\", \"'s\"",
-5.4638318050256105),
("dayminute", -3.1612467120315646),
-5.474369310328396),
("dayminute", -3.1717842173343502),
("from <datetime> - <datetime> (interval)on <date>",
-5.058366696917446),
-5.0689042022202315),
("<datetime> - <datetime> (interval)tomorrow",
-5.4638318050256105),
-5.474369310328396),
("absorption of , after named dayintersect by \",\", \"of\", \"from\", \"'s\"",
-5.058366696917446),
("<ordinal> <cycle> of <time>year", -5.4638318050256105),
("minuteday", -2.0626344233634546),
-5.0689042022202315),
("<ordinal> <cycle> of <time>year", -5.474369310328396),
("minuteday", -2.0731719286662407),
("absorption of , after named dayintersect",
-5.4638318050256105),
("Octoberyear", -4.211068836530242),
-5.474369310328396),
("Octoberyear", -4.221606341833028),
("the <day-of-month> (ordinal)in|during the <part-of-day>",
-5.4638318050256105),
-5.474369310328396),
("at <time-of-day>absorption of , after named day",
-5.4638318050256105),
-5.474369310328396),
("<day-of-month> (ordinal or number) <named-month>year",
-5.4638318050256105),
("year<time-of-day> am|pm", -5.4638318050256105),
("Septemberyear", -5.058366696917446),
("at <time-of-day>on <date>", -4.3652195163575005),
-5.474369310328396),
("year<time-of-day> am|pm", -5.474369310328396),
("Septemberyear", -5.0689042022202315),
("at <time-of-day>on <date>", -4.375757021660286),
("between <time-of-day> and <time-of-day> (interval)on <date>",
-4.770684624465665),
("Halloweenyear", -5.4638318050256105),
("dayweek", -5.4638318050256105),
("weekyear", -5.058366696917446),
("hh:mmin|during the <part-of-day>", -4.770684624465665),
("Father's Dayyear", -5.4638318050256105),
-4.781222129768451),
("Halloweenyear", -5.474369310328396),
("dayweek", -5.474369310328396),
("weekyear", -5.0689042022202315),
("hh:mmin|during the <part-of-day>", -4.781222129768451),
("Father's Dayyear", -5.474369310328396),
("<cycle> after|before <time><time-of-day> am|pm",
-5.058366696917446),
("February<time> <part-of-day>", -5.4638318050256105),
("Martin Luther King's Dayyear", -5.058366696917446),
("tomorrowat <time-of-day>", -4.770684624465665),
("between <time> and <time>on <date>", -4.770684624465665),
("at <time-of-day>tomorrow", -5.058366696917446),
("tomorrow<time-of-day> am|pm", -5.4638318050256105),
-5.0689042022202315),
("February<time> <part-of-day>", -5.474369310328396),
("Martin Luther King's Dayyear", -5.0689042022202315),
("tomorrowat <time-of-day>", -4.781222129768451),
("between <time> and <time>on <date>", -4.781222129768451),
("at <time-of-day>tomorrow", -5.0689042022202315),
("tomorrow<time-of-day> am|pm", -5.474369310328396),
("in|during the <part-of-day>at <time-of-day>",
-5.4638318050256105),
("black fridayyear", -5.058366696917446),
("Labor Dayyear", -5.4638318050256105),
("Februaryintersect", -5.4638318050256105),
("last <cycle> of <time>year", -4.770684624465665),
-5.474369310328396),
("black fridayyear", -5.0689042022202315),
("Labor Dayyear", -5.474369310328396),
("Februaryintersect", -5.474369310328396),
("last <cycle> of <time>year", -4.781222129768451),
("<named-month> <day-of-month> (non ordinal)year",
-5.4638318050256105),
("yearminute", -5.4638318050256105)],
n = 172},
-5.474369310328396),
("yearminute", -5.474369310328396)],
n = 173},
koData =
ClassData{prior = -1.006998970745111, unseen = -5.7899601708972535,
ClassData{prior = -0.9917982843823002, unseen = -5.817111159963204,
likelihoods =
HashMap.fromList
[("in <named-month>year", -5.093750200806762),
[("in <named-month>year", -5.120983351265121),
("<time-of-day> - <time-of-day> (interval)on <date>",
-5.093750200806762),
("hourday", -5.093750200806762),
-5.120983351265121),
("hourday", -5.120983351265121),
("<named-month> <day-of-month> (non ordinal)July",
-5.093750200806762),
("dayhour", -3.3019907315787074),
("daymonth", -3.0143086591269266),
("monthday", -4.688285092698598),
("monthyear", -4.400603020246817),
("intersecthh:mm", -5.093750200806762),
("houryear", -5.093750200806762),
("until <time-of-day><time-of-day> am|pm", -5.093750200806762),
-5.120983351265121),
("dayhour", -3.329223882037066),
("daymonth", -3.041541809585285),
("monthday", -4.715518243156957),
("monthyear", -4.427836170705175),
("intersecthh:mm", -5.120983351265121),
("from <datetime> - <datetime> (interval)July",
-5.120983351265121),
("houryear", -5.120983351265121),
("from <time-of-day> - <time-of-day> (interval)July",
-5.120983351265121),
("until <time-of-day><time-of-day> am|pm", -5.120983351265121),
("<time-of-day> am|pmintersect by \",\", \"of\", \"from\", \"'s\"",
-4.400603020246817),
("<time-of-day> am|pmintersect", -3.8409872323113943),
-4.427836170705175),
("<time-of-day> am|pmintersect", -3.868220382769753),
("intersect by \",\", \"of\", \"from\", \"'s\"year",
-4.177459468932607),
("Tuesdayafter <time-of-day>", -5.093750200806762),
-4.204692619390966),
("Tuesdayafter <time-of-day>", -5.120983351265121),
("July<day-of-month> (ordinal or number) <named-month>",
-5.093750200806762),
("absorption of , after named dayJuly", -4.688285092698598),
("monthhour", -4.688285092698598),
("hourmonth", -5.093750200806762),
("todayat <time-of-day>", -5.093750200806762),
("dayday", -4.688285092698598),
("mm/ddat <time-of-day>", -4.688285092698598),
("<time-of-day> am|pmon <date>", -3.8409872323113943),
("dayyear", -3.8409872323113943),
("Thursdaymm/dd", -5.093750200806762),
("Thursdayat <time-of-day>", -5.093750200806762),
("August<time-of-day> am|pm", -5.093750200806762),
("monthminute", -5.093750200806762),
("<time-of-day> am|pmtomorrow", -5.093750200806762),
("Thursdayhh:mm", -5.093750200806762),
-5.120983351265121),
("absorption of , after named dayJuly", -4.715518243156957),
("monthhour", -4.715518243156957),
("hourmonth", -4.427836170705175),
("todayat <time-of-day>", -5.120983351265121),
("dayday", -4.715518243156957),
("mm/ddat <time-of-day>", -4.715518243156957),
("<time-of-day> am|pmon <date>", -3.868220382769753),
("dayyear", -3.868220382769753),
("Thursdaymm/dd", -5.120983351265121),
("Thursdayat <time-of-day>", -5.120983351265121),
("August<time-of-day> am|pm", -5.120983351265121),
("monthminute", -5.120983351265121),
("<time-of-day> am|pmtomorrow", -5.120983351265121),
("Thursdayhh:mm", -5.120983351265121),
("August<day-of-month> (ordinal or number) <named-month>",
-5.093750200806762),
("Fridayyear", -4.688285092698598),
("minutemonth", -3.589672804030488),
-5.120983351265121),
("Fridayyear", -4.715518243156957),
("minutemonth", -3.5115454388310208),
("Thursdayfrom <datetime> - <datetime> (interval)",
-4.688285092698598),
-4.715518243156957),
("Thursdayfrom <time-of-day> - <time-of-day> (interval)",
-4.688285092698598),
("Aprilyear", -5.093750200806762),
-4.715518243156957),
("Aprilyear", -5.120983351265121),
("mm/dd<time-of-day> - <time-of-day> (interval)",
-4.688285092698598),
-4.715518243156957),
("tomorrowfrom <time-of-day> - <time-of-day> (interval)",
-5.093750200806762),
-5.120983351265121),
("<duration> after|before|from <time>December",
-5.093750200806762),
("yesterday<time-of-day> am|pm", -5.093750200806762),
-5.120983351265121),
("yesterday<time-of-day> am|pm", -5.120983351265121),
("intersect by \",\", \"of\", \"from\", \"'s\"hh:mm",
-4.177459468932607),
-4.204692619390966),
("<named-month> <day-of-month> (non ordinal)August",
-5.093750200806762),
("until <time-of-day>on <date>", -4.400603020246817),
("at <time-of-day>intersect", -4.688285092698598),
-5.120983351265121),
("until <time-of-day>on <date>", -4.427836170705175),
("at <time-of-day>intersect", -4.715518243156957),
("at <time-of-day>intersect by \",\", \"of\", \"from\", \"'s\"",
-5.093750200806762),
("dayminute", -3.147840051751449),
("intersectSeptember", -3.589672804030488),
-5.120983351265121),
("dayminute", -3.175073202209808),
("intersectSeptember", -3.616905954488847),
("absorption of , after named dayintersect by \",\", \"of\", \"from\", \"'s\"",
-5.093750200806762),
("minuteday", -2.2903898199002275),
-5.120983351265121),
("minuteday", -2.317622970358586),
("absorption of , after named dayintersect",
-5.093750200806762),
("Februaryin|during the <part-of-day>", -5.093750200806762),
("week-endin <named-month>", -5.093750200806762),
("Octoberyear", -5.093750200806762),
("yearhh:mm", -5.093750200806762),
("hh:mmon <date>", -3.589672804030488),
-5.120983351265121),
("Februaryin|during the <part-of-day>", -5.120983351265121),
("week-endin <named-month>", -5.120983351265121),
("Octoberyear", -5.120983351265121),
("yearhh:mm", -5.120983351265121),
("hh:mmon <date>", -3.616905954488847),
("absorption of , after named daySeptember",
-4.177459468932607),
("on <date>September", -4.688285092698598),
("at <time-of-day>on <date>", -4.688285092698598),
("absorption of , after named dayFebruary", -4.177459468932607),
-4.204692619390966),
("on <date>September", -4.715518243156957),
("at <time-of-day>on <date>", -4.715518243156957),
("absorption of , after named dayFebruary", -4.204692619390966),
("July<integer> to|till|before <hour-of-day>",
-5.093750200806762),
("tomorrowat <time-of-day>", -5.093750200806762),
("tomorrow<time-of-day> am|pm", -5.093750200806762),
("after <time-of-day><time-of-day> am|pm", -5.093750200806762),
("after <time-of-day>year", -5.093750200806762),
("yearminute", -5.093750200806762)],
n = 99}}),
-5.120983351265121),
("tomorrowat <time-of-day>", -5.120983351265121),
("<integer> to|till|before <hour-of-day>July",
-5.120983351265121),
("tomorrow<time-of-day> am|pm", -5.120983351265121),
("after <time-of-day><time-of-day> am|pm", -5.120983351265121),
("after <time-of-day>year", -5.120983351265121),
("yearminute", -5.120983351265121)],
n = 102}}),
("after lunch/work/school",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
@ -805,33 +817,45 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("from <datetime> - <datetime> (interval)",
Classifier{okData =
ClassData{prior = -0.916290731874155, unseen = -3.2188758248682006,
ClassData{prior = -1.0498221244986778,
unseen = -3.4965075614664802,
likelihoods =
HashMap.fromList
[("minuteminute", -1.5686159179138452),
("hh:mmhh:mm", -1.5686159179138452),
("<time-of-day> am|pmtime-of-day (latent)",
-2.4849066497880004),
("hourhour", -2.0794415416798357),
("<time-of-day> am|pm<time-of-day> am|pm",
-2.4849066497880004)],
n = 6},
[("minuteminute", -1.8562979903656263),
("<day-of-month> (ordinal)<day-of-month> (ordinal)",
-2.772588722239781),
("hh:mmhh:mm", -1.8562979903656263),
("dayday", -2.772588722239781),
("<time-of-day> am|pmtime-of-day (latent)", -2.772588722239781),
("hourhour", -2.367123614131617),
("<time-of-day> am|pm<time-of-day> am|pm", -2.772588722239781)],
n = 7},
koData =
ClassData{prior = -0.5108256237659907,
unseen = -3.4339872044851463,
ClassData{prior = -0.4307829160924542,
unseen = -3.8066624897703196,
likelihoods =
HashMap.fromList
[("dayhour", -2.70805020110221),
("hh:mmtime-of-day (latent)", -1.791759469228055),
("minuteminute", -2.3025850929940455),
("time-of-day (latent)time-of-day (latent)", -2.70805020110221),
("hourhour", -2.3025850929940455),
("minutehour", -1.791759469228055),
("hh:mmintersect", -2.3025850929940455),
("time-of-day (latent)<time-of-day> am|pm", -2.70805020110221),
[("hourday", -3.0910424533583156),
("dayhour", -2.6855773452501515),
("<day-of-month> (ordinal)time-of-day (latent)",
-3.0910424533583156),
("hh:mmtime-of-day (latent)", -2.174751721484161),
("minuteminute", -2.6855773452501515),
("time-of-day (latent)time-of-day (latent)",
-2.6855773452501515),
("dayday", -3.0910424533583156),
("hourhour", -2.3978952727983707),
("minutehour", -2.174751721484161),
("hh:mmintersect", -2.6855773452501515),
("<day-of-month> (ordinal)<day-of-month> (ordinal or number) <named-month>",
-3.0910424533583156),
("time-of-day (latent)<time-of-day> am|pm",
-3.0910424533583156),
("time-of-day (latent)<day-of-month> (ordinal or number) <named-month>",
-3.0910424533583156),
("<named-month> <day-of-month> (non ordinal)time-of-day (latent)",
-2.70805020110221)],
n = 9}}),
-3.0910424533583156)],
n = 13}}),
("Saturday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -864,7 +888,7 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("from <time-of-day> - <time-of-day> (interval)",
Classifier{okData =
ClassData{prior = -0.6359887667199967,
ClassData{prior = -0.6931471805599453,
unseen = -3.4339872044851463,
likelihoods =
HashMap.fromList
@ -879,17 +903,17 @@ classifiers
("<time-of-day> am|pm<time-of-day> am|pm", -2.70805020110221)],
n = 9},
koData =
ClassData{prior = -0.7537718023763802, unseen = -3.367295829986474,
ClassData{prior = -0.6931471805599453,
unseen = -3.4339872044851463,
likelihoods =
HashMap.fromList
[("hh:mmtime-of-day (latent)", -1.540445040947149),
[("hh:mmtime-of-day (latent)", -1.6094379124341003),
("time-of-day (latent)time-of-day (latent)",
-2.2335922215070942),
("hourhour", -1.9459101490553135),
("minutehour", -1.540445040947149),
("time-of-day (latent)<time-of-day> am|pm",
-2.639057329615259)],
n = 8}}),
-2.0149030205422647),
("hourhour", -1.791759469228055),
("minutehour", -1.6094379124341003),
("time-of-day (latent)<time-of-day> am|pm", -2.70805020110221)],
n = 9}}),
("integer 21..99",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
@ -1310,17 +1334,18 @@ classifiers
n = 1}}),
("time-of-day (latent)",
Classifier{okData =
ClassData{prior = -0.5207759546191588, unseen = -4.143134726391533,
ClassData{prior = -0.5500463369192721, unseen = -4.143134726391533,
likelihoods =
HashMap.fromList
[("integer (numeric)", -0.10178269430994236),
("integer (0..19)", -2.3353749158170367)],
n = 60},
koData =
ClassData{prior = -0.9015484501369516, unseen = -3.784189633918261,
ClassData{prior = -0.8602012652231115,
unseen = -3.8501476017100584,
likelihoods =
HashMap.fromList [("integer (numeric)", -2.353049741019416e-2)],
n = 41}}),
HashMap.fromList [("integer (numeric)", -2.197890671877523e-2)],
n = 44}}),
("year",
Classifier{okData =
ClassData{prior = -0.2113090936672069, unseen = -3.58351893845611,
@ -1430,6 +1455,18 @@ classifiers
("hour", -1.0082282271998406), ("minute", -2.917770732084279),
("after <time-of-day>", -3.6109179126442243)],
n = 29}}),
("from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods =
HashMap.fromList
[("ordinal (digits)ordinal (digits)July", -1.252762968495368),
("integer (numeric)integer (numeric)July", -1.252762968495368),
("month", -0.8472978603872037)],
n = 2},
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("Thanksgiving Day",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.5649493574615367,
@ -1721,8 +1758,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("ordinal (digits)", 0.0)], n = 2},
ClassData{prior = 0.0, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("ordinal (digits)", 0.0)], n = 4},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
@ -2063,8 +2100,8 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("ordinal (digits)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -3.2188758248682006,
likelihoods = HashMap.fromList [("", 0.0)], n = 23},
ClassData{prior = 0.0, unseen = -3.295836866004329,
likelihoods = HashMap.fromList [("", 0.0)], n = 25},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),

View File

@ -707,6 +707,8 @@ allExamples = concat
, "July 13 through 15"
, "July 13 - July 15"
, "from July 13-15"
, "from 13 to 15 July"
, "from 13th to 15th July"
]
, examples (datetimeInterval ((2013, 8, 8, 0, 0, 0), (2013, 8, 13, 0, 0, 0)) Day)
[ "Aug 8 - Aug 12"

View File

@ -971,6 +971,29 @@ ruleIntervalFromMonthDDDD = Rule
_ -> Nothing
}
ruleIntervalFromDDDDMonth :: Rule
ruleIntervalFromDDDDMonth = Rule
{ name = "from <day-of-month> (ordinal or number) to <day-of-month> (ordinal or number) <named-month> (interval)"
, pattern =
[ regex "from"
, Predicate isDOMValue
, regex "\\-|to|th?ru|through|(un)?til(l)?"
, Predicate isDOMValue
, Predicate isAMonth
]
, prod = \tokens -> case tokens of
(_:
token1:
_:
token2:
Token Time td:
_) -> do
dom1 <- intersectDOM td token1
dom2 <- intersectDOM td token2
Token Time <$> interval TTime.Closed dom1 dom2
_ -> Nothing
}
-- Blocked for :latent time. May need to accept certain latents only, like hours
ruleIntervalDash :: Rule
ruleIntervalDash = Rule
@ -1667,6 +1690,7 @@ rules =
, ruleTODPrecision
, rulePrecisionTOD
, ruleIntervalFromMonthDDDD
, ruleIntervalFromDDDDMonth
, ruleIntervalMonthDDDD
, ruleIntervalDash
, ruleIntervalFrom