2017-03-08 21:33:55 +03:00
|
|
|
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
|
|
-- All rights reserved.
|
|
|
|
--
|
|
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
|
|
-----------------------------------------------------------------
|
|
|
|
-- Auto-generated by regenClassifiers
|
|
|
|
--
|
|
|
|
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
|
|
|
-- @generated
|
|
|
|
-----------------------------------------------------------------
|
|
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Duckling.Ranking.Classifiers.SV (classifiers) where
|
|
|
|
import Prelude
|
|
|
|
import Duckling.Ranking.Types
|
|
|
|
import qualified Data.HashMap.Strict as HashMap
|
|
|
|
import Data.String
|
|
|
|
|
|
|
|
classifiers :: Classifiers
|
|
|
|
classifiers
|
|
|
|
= HashMap.fromList
|
|
|
|
[("<time> timezone",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.044522437723423,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("at <time-of-day>", -1.2039728043259361),
|
|
|
|
("hh:mm", -1.6094379124341003), ("hour", -1.6094379124341003),
|
|
|
|
("minute", -1.2039728043259361)],
|
|
|
|
n = 8},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.6094379124341003,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("integer (numeric)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.7672551527136672, unseen = -4.882801922586371,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 130},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.6241543090729939,
|
|
|
|
unseen = -5.0238805208462765,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 150}}),
|
|
|
|
("the day before yesterday",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("lunch",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
|
|
|
|
("integer (20..90)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<time> <part-of-day>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.639057329615259,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("dayhour", -0.9555114450274363),
|
|
|
|
("tomorrowevening", -1.8718021769015913),
|
|
|
|
("named-daymorning", -1.8718021769015913),
|
|
|
|
("tomorrowlunch", -1.8718021769015913),
|
|
|
|
("yesterdayevening", -1.8718021769015913)],
|
|
|
|
n = 4},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.791759469228055,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("couple, a pair",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("dd/mm",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.2231435513142097,
|
|
|
|
unseen = -2.3025850929940455,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 8},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.6094379124341003,
|
|
|
|
unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
|
|
|
|
("today",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("at <time-of-day>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.2607262624632527, unseen = -4.624972813284271,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("<time> timezone", -3.5165082281731497),
|
|
|
|
("time-of-day (latent)", -1.2139231351791042),
|
|
|
|
("relative minutes after|past <integer> (hour-of-day)",
|
|
|
|
-3.5165082281731497),
|
|
|
|
("hh:mm", -2.0501711593797225),
|
|
|
|
("<time-of-day> sharp", -3.5165082281731497),
|
|
|
|
("hour", -1.149384614041533), ("minute", -1.7819071727850433)],
|
|
|
|
n = 47},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.4718165345580525, unseen = -3.58351893845611,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("time-of-day (latent)", -0.8472978603872037),
|
|
|
|
("hour", -0.8472978603872037)],
|
|
|
|
n = 14}}),
|
|
|
|
("absorption of , after named day",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.9444389791664407,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("day", -0.6931471805599453),
|
|
|
|
("named-day", -0.6931471805599453)],
|
|
|
|
n = 8},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("tonight",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("on <date>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.45198512374305727,
|
|
|
|
unseen = -4.430816798843313,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("on <date>", -3.3202283191284883),
|
|
|
|
("the <day-of-month> (non ordinal)", -3.3202283191284883),
|
|
|
|
("<day-of-month>(ordinal) <named-month>", -2.339399066116762),
|
|
|
|
("day", -0.8634925463071842),
|
|
|
|
("the <day-of-month> (ordinal)", -3.3202283191284883),
|
|
|
|
("afternoon", -3.7256934272366524),
|
|
|
|
("named-day", -2.1162555148025524),
|
|
|
|
("<day-of-month> (ordinal)", -1.9339339580085977),
|
|
|
|
("hour", -3.7256934272366524),
|
|
|
|
("<day-of-month> (non ordinal) <named-month>",
|
|
|
|
-3.7256934272366524)],
|
|
|
|
n = 35},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.0116009116784799,
|
|
|
|
unseen = -3.9889840465642745,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("on <date>", -2.3608540011180215),
|
|
|
|
("year (latent)", -1.7730673362159024),
|
|
|
|
("time-of-day (latent)", -1.7730673362159024),
|
|
|
|
("year", -1.5723966407537513), ("hour", -1.5723966407537513)],
|
|
|
|
n = 20}}),
|
|
|
|
("integer (0..19)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.2803019651541584, unseen = -3.58351893845611,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 34},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.4087672169719492,
|
|
|
|
unseen = -2.5649493574615367,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 11}}),
|
|
|
|
("between <time-of-day> and <time-of-day> (interval)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -2.1972245773362196,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("minuteminute", -0.9808292530117262),
|
|
|
|
("hh:mmhh:mm", -0.9808292530117262)],
|
|
|
|
n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -2.1972245773362196,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("hh:mmtime-of-day (latent)", -0.9808292530117262),
|
|
|
|
("minutehour", -0.9808292530117262)],
|
|
|
|
n = 2}}),
|
|
|
|
("between <datetime> and <datetime> (interval)",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -1.252762968495368, unseen = -2.4849066497880004,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("minuteminute", -1.2992829841302609),
|
|
|
|
("hh:mmhh:mm", -1.2992829841302609)],
|
2017-03-08 21:33:55 +03:00
|
|
|
n = 2},
|
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.3364722366212129, unseen = -2.890371757896165,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("hh:mm<day-of-month> (ordinal)", -1.7346010553881064),
|
|
|
|
("hh:mmtime-of-day (latent)", -1.7346010553881064),
|
|
|
|
("minuteminute", -2.1400661634962708),
|
|
|
|
("minutehour", -1.7346010553881064),
|
|
|
|
("hh:mmintersect", -2.1400661634962708),
|
|
|
|
("minuteday", -1.7346010553881064)],
|
|
|
|
n = 5}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("month (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.3978952727983707,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 9},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<integer> more <unit-of-duration>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.0794415416798357,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("integer (numeric)minute (grain)", -1.252762968495368),
|
|
|
|
("integer (0..19)minute (grain)", -1.252762968495368),
|
|
|
|
("minute", -0.8472978603872037)],
|
|
|
|
n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<time-of-day> o'clock",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("time-of-day (latent)", -0.6931471805599453),
|
|
|
|
("hour", -0.6931471805599453)],
|
|
|
|
n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("hour (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.11778303565638351,
|
|
|
|
unseen = -2.3025850929940455,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 8},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -2.1972245773362196,
|
|
|
|
unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
|
|
|
|
("<ordinal> quarter",
|
|
|
|
Classifier{okData =
|
2017-03-13 21:38:34 +03:00
|
|
|
ClassData{prior = -0.5108256237659907,
|
|
|
|
unseen = -2.3025850929940455,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-13 21:38:34 +03:00
|
|
|
[("ordinal (digits)quarter (grain)", -1.0986122886681098),
|
|
|
|
("quarter", -0.8109302162163288),
|
|
|
|
("ordinals (first..31st)quarter (grain)", -1.5040773967762742)],
|
|
|
|
n = 3},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-13 21:38:34 +03:00
|
|
|
ClassData{prior = -0.916290731874155, unseen = -2.0794415416798357,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinal (digits)quarter (grain)", -1.252762968495368),
|
|
|
|
("quarter", -0.8472978603872037),
|
|
|
|
("ordinals (first..31st)quarter (grain)", -1.252762968495368)],
|
|
|
|
n = 2}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("Last year",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("intersect",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.4382549309311553, unseen = -6.003887067106539,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("<datetime> - <datetime> (interval)on <date>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.209655408733095),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<time-of-day> - <time-of-day> (interval)on <date>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.209655408733095),
|
|
|
|
("hourday", -5.308267697401205),
|
|
|
|
("dayhour", -3.0569758987947098),
|
|
|
|
("daymonth", -2.956892440237727),
|
|
|
|
("monthyear", -3.29336467685894),
|
|
|
|
("intersecthh:mm", -4.9028025892930405),
|
|
|
|
("named-daylast <cycle>", -5.308267697401205),
|
Optimize simple time predicates
Summary:
This is the next step for:
https://fb.facebook.com/groups/527352907463243/permalink/600056483526218/
This:
* changes the time language to be able to track contradictions (`EmptyPredicate`)
* changes the time language to be able to collect non-contradicting pieces, like month and hour and unify them
* provides an efficient way to convert those pieces into (past,future) time series
* adds AMPM predicate runner - there's a bit of overlap with is12H, but it basically works
* changes a test case that was wrong before
* regenerates classifiers, I'm not sure why they changed exactly
Before:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(15.50 secs, 6,171,188,928 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(110.82 secs, 44,031,569,512 bytes)
```
After:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(1.24 secs, 703,020,912 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(9.51 secs, 5,891,109,592 bytes)
```
Reviewed By: JonCoens
Differential Revision: D4676812
fbshipit-source-id: 9810203
2017-03-14 02:49:47 +03:00
|
|
|
("the <day-of-month> (ordinal)named-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.9219733362813143),
|
2017-03-08 21:33:55 +03:00
|
|
|
("intersect by \"of\", \"from\", \"'s\"year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.9028025892930405),
|
|
|
|
("last <day-of-week> of <time>year", -5.308267697401205),
|
|
|
|
("todayat <time-of-day>", -4.9028025892930405),
|
|
|
|
("dayday", -2.6341190479746763),
|
|
|
|
("dd/mmat <time-of-day>", -4.39197696552705),
|
|
|
|
("intersect by \",\"hh:mm", -3.804190300624931),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<named-month> <day-of-month> (ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
|
|
|
("intersectnamed-month", -4.9028025892930405),
|
|
|
|
("dayyear", -3.362357548345891),
|
|
|
|
("named-daythis <time>", -4.209655408733095),
|
|
|
|
("tomorrow<time-of-day> sharp", -4.9028025892930405),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month>(ordinal) <named-month>year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.39197696552705),
|
2017-03-08 21:33:55 +03:00
|
|
|
("absorption of , after named day<day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.61512051684126),
|
2017-03-08 21:33:55 +03:00
|
|
|
("absorption of , after named day<named-month> <day-of-month> (ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.209655408733095),
|
|
|
|
("named-day<time> timezone", -4.61512051684126),
|
|
|
|
("named-monthyear", -3.29336467685894),
|
2017-03-08 21:33:55 +03:00
|
|
|
("absorption of , after named day<named-month> <day-of-month> (non ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.209655408733095),
|
|
|
|
("on <date>named-month", -3.9219733362813143),
|
|
|
|
("tomorrowuntil <time-of-day>", -4.9028025892930405),
|
2017-03-08 21:33:55 +03:00
|
|
|
("absorption of , after named day<day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.61512051684126),
|
|
|
|
("after <time-of-day>at <time-of-day>", -4.9028025892930405),
|
2017-03-08 21:33:55 +03:00
|
|
|
("intersect by \",\"<day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.9028025892930405),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<named-month> <day-of-month> (non ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-dayfrom <datetime> - <datetime> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.9028025892930405),
|
|
|
|
("named-daynext <cycle>", -5.308267697401205),
|
|
|
|
("named-dayintersect", -4.9028025892930405),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-dayfrom <time-of-day> - <time-of-day> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.9028025892930405),
|
|
|
|
("tomorrowafter <time-of-day>", -4.9028025892930405),
|
2017-03-08 21:33:55 +03:00
|
|
|
("from <time-of-day> - <time-of-day> (interval)on <date>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.39197696552705),
|
|
|
|
("dayminute", -2.6692103677859462),
|
2017-03-08 21:33:55 +03:00
|
|
|
("from <datetime> - <datetime> (interval)on <date>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.61512051684126),
|
|
|
|
("<ordinal> <cycle> of <time>year", -4.9028025892930405),
|
|
|
|
("minuteday", -2.6341190479746763),
|
Optimize simple time predicates
Summary:
This is the next step for:
https://fb.facebook.com/groups/527352907463243/permalink/600056483526218/
This:
* changes the time language to be able to track contradictions (`EmptyPredicate`)
* changes the time language to be able to collect non-contradicting pieces, like month and hour and unify them
* provides an efficient way to convert those pieces into (past,future) time series
* adds AMPM predicate runner - there's a bit of overlap with is12H, but it basically works
* changes a test case that was wrong before
* regenerates classifiers, I'm not sure why they changed exactly
Before:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(15.50 secs, 6,171,188,928 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(110.82 secs, 44,031,569,512 bytes)
```
After:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(1.24 secs, 703,020,912 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(9.51 secs, 5,891,109,592 bytes)
```
Reviewed By: JonCoens
Differential Revision: D4676812
fbshipit-source-id: 9810203
2017-03-14 02:49:47 +03:00
|
|
|
("absorption of , after named dayintersect",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.9028025892930405),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-daybetween <time-of-day> and <time-of-day> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
|
|
|
("named-dayon <date>", -4.61512051684126),
|
|
|
|
("named-dayat <time-of-day>", -4.9028025892930405),
|
|
|
|
("yearhh:mm", -5.308267697401205),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-daybetween <datetime> and <datetime> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
Optimize simple time predicates
Summary:
This is the next step for:
https://fb.facebook.com/groups/527352907463243/permalink/600056483526218/
This:
* changes the time language to be able to track contradictions (`EmptyPredicate`)
* changes the time language to be able to collect non-contradicting pieces, like month and hour and unify them
* provides an efficient way to convert those pieces into (past,future) time series
* adds AMPM predicate runner - there's a bit of overlap with is12H, but it basically works
* changes a test case that was wrong before
* regenerates classifiers, I'm not sure why they changed exactly
Before:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(15.50 secs, 6,171,188,928 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(110.82 secs, 44,031,569,512 bytes)
```
After:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(1.24 secs, 703,020,912 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(9.51 secs, 5,891,109,592 bytes)
```
Reviewed By: JonCoens
Differential Revision: D4676812
fbshipit-source-id: 9810203
2017-03-14 02:49:47 +03:00
|
|
|
("absorption of , after named dayintersect by \",\"",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.39197696552705),
|
|
|
|
("dd/mmyear", -4.9028025892930405),
|
|
|
|
("at <time-of-day>on <date>", -5.308267697401205),
|
2017-03-08 21:33:55 +03:00
|
|
|
("between <time-of-day> and <time-of-day> (interval)on <date>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
2017-03-08 21:33:55 +03:00
|
|
|
("between <datetime> and <datetime> (interval)on <date>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
|
|
|
("dayweek", -4.055504728905837),
|
2017-03-08 21:33:55 +03:00
|
|
|
("intersect by \",\"<day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.9028025892930405),
|
|
|
|
("weekyear", -4.39197696552705),
|
|
|
|
("hh:mmtomorrow", -4.61512051684126),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-5.308267697401205),
|
|
|
|
("tomorrowat <time-of-day>", -4.055504728905837),
|
|
|
|
("named-daythis <cycle>", -4.61512051684126),
|
|
|
|
("named-daythe <day-of-month> (ordinal)", -5.308267697401205),
|
|
|
|
("at <time-of-day>tomorrow", -4.9028025892930405),
|
|
|
|
("last <cycle> of <time>year", -4.39197696552705),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month> (non ordinal) <named-month>year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.9028025892930405),
|
|
|
|
("yearminute", -5.308267697401205)],
|
|
|
|
n = 160},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -1.0360919316867756, unseen = -5.564520407322694,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("named-daynamed-month", -4.867534450455582),
|
|
|
|
("dayhour", -3.258096538021482),
|
|
|
|
("daymonth", -2.0343211063993665),
|
|
|
|
("monthday", -3.3634570536793085),
|
|
|
|
("monthyear", -3.951243718581427),
|
2017-03-08 21:33:55 +03:00
|
|
|
("intersect by \"of\", \"from\", \"'s\"year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.7689221617874726),
|
|
|
|
("hourmonth", -4.462069342347418),
|
|
|
|
("named-dayhh:mm", -4.867534450455582),
|
|
|
|
("dayday", -4.867534450455582),
|
|
|
|
("dd/mmat <time-of-day>", -3.951243718581427),
|
|
|
|
("intersectnamed-month", -4.462069342347418),
|
|
|
|
("dayyear", -3.481240089335692),
|
|
|
|
("named-daythis <time>", -2.921624301400269),
|
2017-03-08 21:33:55 +03:00
|
|
|
("year<hour-of-day> <integer> (as relative minutes)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.462069342347418),
|
|
|
|
("minutemonth", -4.462069342347418),
|
|
|
|
("named-monthyear", -3.951243718581427),
|
|
|
|
("on <date>named-month", -4.462069342347418),
|
2017-03-08 21:33:55 +03:00
|
|
|
("absorption of , after named daynamed-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.7689221617874726),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-dayfrom <datetime> - <datetime> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.7689221617874726),
|
|
|
|
("named-dayintersect", -4.462069342347418),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-month<day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.951243718581427),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<hour-of-day> <integer> (as relative minutes)named-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.462069342347418),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-dayfrom <time-of-day> - <time-of-day> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.867534450455582),
|
|
|
|
("yearmonth", -4.867534450455582),
|
|
|
|
("until <time-of-day>on <date>", -4.867534450455582),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<named-month> <day-of-month> (ordinal)named-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.951243718581427),
|
|
|
|
("dayminute", -3.258096538021482),
|
|
|
|
("minuteday", -3.3634570536793085),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-daybetween <time-of-day> and <time-of-day> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.867534450455582),
|
|
|
|
("named-dayon <date>", -4.462069342347418),
|
|
|
|
("named-dayat <time-of-day>", -3.951243718581427),
|
|
|
|
("hh:mmon <date>", -3.481240089335692),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-daybetween <datetime> and <datetime> (interval)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.462069342347418),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month>(ordinal) <named-month> yearnamed-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.951243718581427),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-month<day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.951243718581427),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<named-month> <day-of-month> (non ordinal)named-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.951243718581427),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<hour-of-day> <integer> (as relative minutes)year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.462069342347418),
|
|
|
|
("minuteyear", -4.462069342347418),
|
|
|
|
("yearminute", -4.462069342347418)],
|
|
|
|
n = 88}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<ordinal> <cycle> of <time>",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = 0.0, unseen = -2.890371757896165,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("daymonth", -1.7346010553881064),
|
2017-03-08 21:33:55 +03:00
|
|
|
("ordinals (first..31st)week (grain)intersect",
|
2017-03-14 14:50:10 +03:00
|
|
|
-1.7346010553881064),
|
2017-03-08 21:33:55 +03:00
|
|
|
("ordinals (first..31st)week (grain)named-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-1.7346010553881064),
|
|
|
|
("weekmonth", -1.2237754316221157),
|
2017-03-08 21:33:55 +03:00
|
|
|
("ordinals (first..31st)day (grain)named-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-1.7346010553881064)],
|
|
|
|
n = 6},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -infinity, unseen = -1.791759469228055,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("season",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.791759469228055,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 4},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("year (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.5649493574615367,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 11},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("from <datetime> - <datetime> (interval)",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -1.4816045409242156,
|
|
|
|
unseen = -3.4657359027997265,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("minuteminute", -1.824549292051046),
|
2017-03-08 21:33:55 +03:00
|
|
|
("time-of-day (latent)time-of-day (latent)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-2.740840023925201),
|
|
|
|
("hh:mmhh:mm", -1.824549292051046),
|
|
|
|
("hourhour", -2.740840023925201)],
|
2017-03-08 21:33:55 +03:00
|
|
|
n = 5},
|
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.2578291093020998, unseen = -4.02535169073515,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("hourday", -3.3141860046725258),
|
|
|
|
("dayhour", -3.3141860046725258),
|
|
|
|
("yearhour", -3.3141860046725258),
|
|
|
|
("hh:mm<day-of-month> (ordinal)", -2.3978952727983707),
|
2017-03-08 21:33:55 +03:00
|
|
|
("time-of-day (latent)<day-of-month> (ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.3141860046725258),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month> (ordinal)time-of-day (latent)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.3141860046725258),
|
|
|
|
("hh:mmtime-of-day (latent)", -2.3978952727983707),
|
|
|
|
("minuteminute", -2.6210388241125804),
|
|
|
|
("yearyear", -3.3141860046725258),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month> (ordinal)<day-of-month> (ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.3141860046725258),
|
|
|
|
("dayday", -3.3141860046725258),
|
|
|
|
("year (latent)year (latent)", -3.3141860046725258),
|
|
|
|
("minutehour", -2.3978952727983707),
|
|
|
|
("hh:mmintersect", -2.6210388241125804),
|
|
|
|
("year (latent)time-of-day (latent)", -3.3141860046725258),
|
|
|
|
("minuteday", -2.3978952727983707),
|
|
|
|
("year (latent)<day-of-month> (ordinal)", -3.3141860046725258),
|
|
|
|
("yearday", -3.3141860046725258)],
|
|
|
|
n = 17}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("next <cycle>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.044522437723423,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -1.6094379124341003),
|
|
|
|
("month (grain)", -2.3025850929940455),
|
|
|
|
("year (grain)", -2.3025850929940455),
|
|
|
|
("week (grain)", -1.6094379124341003),
|
|
|
|
("quarter", -2.3025850929940455), ("year", -2.3025850929940455),
|
|
|
|
("month", -2.3025850929940455),
|
|
|
|
("quarter (grain)", -2.3025850929940455)],
|
|
|
|
n = 6},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -2.1972245773362196,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("number.number hours",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("from <time-of-day> - <time-of-day> (interval)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6061358035703156,
|
|
|
|
unseen = -2.9444389791664407,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("minuteminute", -1.0986122886681098),
|
|
|
|
("time-of-day (latent)time-of-day (latent)",
|
|
|
|
-2.1972245773362196),
|
|
|
|
("hh:mmhh:mm", -1.0986122886681098),
|
|
|
|
("hourhour", -2.1972245773362196)],
|
|
|
|
n = 6},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.7884573603642702, unseen = -2.833213344056216,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("hh:mmtime-of-day (latent)", -0.9808292530117262),
|
|
|
|
("minutehour", -0.9808292530117262)],
|
|
|
|
n = 5}}),
|
|
|
|
("yyyy-mm-dd",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("year (latent)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.4965075614664802,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("integer (numeric)", -0.3746934494414107),
|
|
|
|
("integer (0..19)", -1.1631508098056809)],
|
|
|
|
n = 30}}),
|
|
|
|
("dd/mm/yyyy",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.1972245773362196,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 7},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("yesterday",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<ordinal> quarter <year>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.0794415416798357,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinals (first..31st)quarter (grain)year",
|
|
|
|
-1.252762968495368),
|
|
|
|
("quarteryear", -0.8472978603872037),
|
|
|
|
("ordinal (digits)quarter (grain)year", -1.252762968495368)],
|
|
|
|
n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("after lunch",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("hh:mm:ss",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("quarter to|till|before <integer> (hour-of-day)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.3025850929940455,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("time-of-day (latent)", -1.0986122886681098),
|
|
|
|
("noon", -1.5040773967762742), ("hour", -0.8109302162163288)],
|
|
|
|
n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("nth <time> of <time>",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.5596157879354228, unseen = -2.995732273553991,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("daymonth", -0.7472144018302211),
|
2017-03-08 21:33:55 +03:00
|
|
|
("ordinals (first..31st)named-dayintersect",
|
2017-03-14 14:50:10 +03:00
|
|
|
-0.9985288301111273),
|
2017-03-08 21:33:55 +03:00
|
|
|
("ordinals (first..31st)named-daynamed-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-1.845826690498331)],
|
|
|
|
n = 8},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.8472978603872037, unseen = -2.772588722239781,
|
2017-03-13 21:38:34 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("daymonth", -0.7621400520468967),
|
2017-03-13 21:38:34 +03:00
|
|
|
("ordinals (first..31st)named-daynamed-month",
|
2017-03-14 14:50:10 +03:00
|
|
|
-0.7621400520468967)],
|
2017-03-13 21:38:34 +03:00
|
|
|
n = 6}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("the <day-of-month> (non ordinal)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
|
|
|
|
n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("ordinals (first..31st)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.044522437723423,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 19},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("named-month",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -4.174387269895637,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 63},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("week (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.295836866004329,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 25},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("now",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
|
|
|
|
("this <part-of-day>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("hour", -0.6931471805599453),
|
|
|
|
("morning", -0.6931471805599453)],
|
|
|
|
n = 1}}),
|
|
|
|
("<day-of-month>(ordinal) <named-month>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.12921173148000623,
|
|
|
|
unseen = -4.127134385045092,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinals (first..31st)named-month", -2.501435951739211),
|
|
|
|
("ordinal (digits)named-month", -0.8527773261518292),
|
|
|
|
("month", -0.7096764825111559)],
|
|
|
|
n = 29},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -2.1102132003465894,
|
|
|
|
unseen = -2.4849066497880004,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinals (first..31st)named-month", -1.7047480922384253),
|
|
|
|
("ordinal (digits)named-month", -1.0116009116784799),
|
|
|
|
("month", -0.7884573603642702)],
|
|
|
|
n = 4}}),
|
|
|
|
("numbers prefix with -, negative or minus",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.2188758248682006,
|
|
|
|
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
|
|
|
|
n = 23}}),
|
|
|
|
("in|during the <part-of-day>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("hour", -0.6931471805599453),
|
|
|
|
("morning", -0.6931471805599453)],
|
|
|
|
n = 1}}),
|
|
|
|
("new year's eve",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("tomorrow",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.833213344056216,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 15},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<time> after next",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.5649493574615367,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("named-month", -1.791759469228055),
|
|
|
|
("day", -1.0986122886681098),
|
|
|
|
("named-day", -1.0986122886681098),
|
|
|
|
("month", -1.791759469228055)],
|
|
|
|
n = 4},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.6094379124341003,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("half an hour",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.5108256237659907,
|
|
|
|
unseen = -1.6094379124341003,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.916290731874155, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
|
|
|
|
("the <day-of-month> (ordinal)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.639057329615259,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinals (first..31st)", -1.1786549963416462),
|
|
|
|
("ordinal (digits)", -0.3677247801253174)],
|
|
|
|
n = 11},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("afternoon",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<duration> from now",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.3978952727983707,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("second", -1.2039728043259361),
|
|
|
|
("<integer> <unit-of-duration>", -1.2039728043259361),
|
|
|
|
("a <unit-of-duration>", -1.6094379124341003),
|
|
|
|
("minute", -1.6094379124341003)],
|
|
|
|
n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.6094379124341003,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("this <cycle>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.044522437723423,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -1.3862943611198906),
|
|
|
|
("year (grain)", -1.8971199848858813),
|
|
|
|
("week (grain)", -1.3862943611198906),
|
|
|
|
("quarter", -2.3025850929940455), ("year", -1.8971199848858813),
|
|
|
|
("quarter (grain)", -2.3025850929940455)],
|
|
|
|
n = 7},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.9459101490553135,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("minute (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.70805020110221,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 13},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("about <time-of-day>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.5108256237659907,
|
|
|
|
unseen = -2.1972245773362196,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("time-of-day (latent)", -0.6931471805599453),
|
|
|
|
("hour", -0.6931471805599453)],
|
|
|
|
n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.916290731874155, unseen = -1.9459101490553135,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("time-of-day (latent)", -0.6931471805599453),
|
|
|
|
("hour", -0.6931471805599453)],
|
|
|
|
n = 2}}),
|
|
|
|
("time-of-day (latent)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -1.006804739414987, unseen = -3.713572066704308,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("integer (numeric)", -5.129329438755058e-2),
|
|
|
|
("integer (0..19)", -2.995732273553991)],
|
|
|
|
n = 38},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.4547361571149472, unseen = -4.23410650459726,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("integer (numeric)", -0.24921579162398486),
|
|
|
|
("integer (0..19)", -1.5114575040738967)],
|
|
|
|
n = 66}}),
|
|
|
|
("year",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.14842000511827333,
|
|
|
|
unseen = -3.295836866004329,
|
|
|
|
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
|
|
|
|
n = 25},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.9810014688665833, unseen = -1.791759469228055,
|
|
|
|
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
|
|
|
|
n = 4}}),
|
|
|
|
("last <day-of-week> of <time>",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = 0.0, unseen = -2.3025850929940455,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("named-daynamed-month", -1.0986122886681098),
|
|
|
|
("daymonth", -0.8109302162163288),
|
|
|
|
("named-dayintersect", -1.5040773967762742)],
|
|
|
|
n = 3},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<integer> <unit-of-duration>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6375773294051346, unseen = -4.59511985013459,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -2.639057329615259),
|
|
|
|
("integer (0..19)year (grain)", -3.891820298110627),
|
|
|
|
("integer (numeric)day (grain)", -2.793208009442517),
|
|
|
|
("couple, a pairhour (grain)", -3.891820298110627),
|
|
|
|
("integer (0..19)hour (grain)", -3.891820298110627),
|
|
|
|
("second", -3.1986731175506815),
|
|
|
|
("integer (numeric)second (grain)", -3.891820298110627),
|
|
|
|
("integer (numeric)year (grain)", -3.891820298110627),
|
|
|
|
("day", -2.187072205872201), ("year", -3.4863551900024623),
|
|
|
|
("integer (numeric)week (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)month (grain)", -3.891820298110627),
|
|
|
|
("integer (0..19)second (grain)", -3.4863551900024623),
|
|
|
|
("hour", -2.793208009442517), ("month", -3.4863551900024623),
|
|
|
|
("integer (numeric)minute (grain)", -2.793208009442517),
|
|
|
|
("integer (0..19)minute (grain)", -2.9755295662364714),
|
|
|
|
("integer (numeric)month (grain)", -3.891820298110627),
|
|
|
|
("minute", -2.2823823856765264),
|
|
|
|
("integer (numeric)hour (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)day (grain)", -2.793208009442517),
|
|
|
|
("integer (0..19)week (grain)", -3.1986731175506815)],
|
|
|
|
n = 37},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.7519876805828788, unseen = -4.51085950651685,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -2.70805020110221),
|
|
|
|
("integer (0..19)year (grain)", -3.4011973816621555),
|
|
|
|
("integer (numeric)day (grain)", -3.1135153092103742),
|
|
|
|
("integer (numeric)quarter (grain)", -3.8066624897703196),
|
|
|
|
("integer (0..19)hour (grain)", -3.8066624897703196),
|
|
|
|
("second", -2.890371757896165),
|
|
|
|
("integer (numeric)second (grain)", -3.4011973816621555),
|
|
|
|
("integer (numeric)year (grain)", -3.1135153092103742),
|
|
|
|
("day", -2.70805020110221), ("quarter", -3.8066624897703196),
|
|
|
|
("year", -2.70805020110221),
|
|
|
|
("integer (numeric)week (grain)", -3.4011973816621555),
|
|
|
|
("integer (0..19)month (grain)", -3.1135153092103742),
|
|
|
|
("integer (0..19)second (grain)", -3.4011973816621555),
|
|
|
|
("hour", -2.890371757896165), ("month", -2.70805020110221),
|
|
|
|
("integer (numeric)minute (grain)", -3.4011973816621555),
|
|
|
|
("integer (0..19)minute (grain)", -3.4011973816621555),
|
|
|
|
("integer (numeric)month (grain)", -3.4011973816621555),
|
|
|
|
("minute", -2.890371757896165),
|
|
|
|
("integer (numeric)hour (grain)", -3.1135153092103742),
|
|
|
|
("integer (0..19)day (grain)", -3.4011973816621555),
|
|
|
|
("integer (0..19)week (grain)", -3.1135153092103742)],
|
|
|
|
n = 33}}),
|
|
|
|
("relative minutes after|past <integer> (hour-of-day)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.3025850929940455,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("hour", -0.8109302162163288),
|
|
|
|
("integer (numeric)time-of-day (latent)", -1.0986122886681098),
|
|
|
|
("integer (20..90)time-of-day (latent)", -1.5040773967762742)],
|
|
|
|
n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("a <unit-of-duration>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.2188758248682006,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -2.0794415416798357),
|
|
|
|
("hour (grain)", -2.4849066497880004),
|
|
|
|
("second", -2.0794415416798357),
|
|
|
|
("week (grain)", -2.0794415416798357),
|
|
|
|
("day", -2.4849066497880004),
|
|
|
|
("minute (grain)", -2.4849066497880004),
|
|
|
|
("second (grain)", -2.0794415416798357),
|
|
|
|
("hour", -2.4849066497880004), ("minute", -2.4849066497880004),
|
|
|
|
("day (grain)", -2.4849066497880004)],
|
|
|
|
n = 7},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -2.3978952727983707,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("intersect by \",\"",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -8.855339734144506e-2,
|
|
|
|
unseen = -4.948759890378168,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
Optimize simple time predicates
Summary:
This is the next step for:
https://fb.facebook.com/groups/527352907463243/permalink/600056483526218/
This:
* changes the time language to be able to track contradictions (`EmptyPredicate`)
* changes the time language to be able to collect non-contradicting pieces, like month and hour and unify them
* provides an efficient way to convert those pieces into (past,future) time series
* adds AMPM predicate runner - there's a bit of overlap with is12H, but it basically works
* changes a test case that was wrong before
* regenerates classifiers, I'm not sure why they changed exactly
Before:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(15.50 secs, 6,171,188,928 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(110.82 secs, 44,031,569,512 bytes)
```
After:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(1.24 secs, 703,020,912 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(9.51 secs, 5,891,109,592 bytes)
```
Reviewed By: JonCoens
Differential Revision: D4676812
fbshipit-source-id: 9810203
2017-03-14 02:49:47 +03:00
|
|
|
[("<named-month> <day-of-month> (ordinal)intersect",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.248495242049359),
|
|
|
|
("at <time-of-day>named-day", -4.248495242049359),
|
|
|
|
("intersect by \",\"year", -3.8430301339411947),
|
|
|
|
("hh:mmintersect by \",\"", -3.8430301339411947),
|
|
|
|
("dayday", -1.8971199848858813),
|
|
|
|
("hh:mmnamed-day", -4.248495242049359),
|
|
|
|
("named-dayintersect by \",\"", -3.332204510175204),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<named-month> <day-of-month> (ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.1498829533812494),
|
|
|
|
("dayyear", -2.744417845273085),
|
Optimize simple time predicates
Summary:
This is the next step for:
https://fb.facebook.com/groups/527352907463243/permalink/600056483526218/
This:
* changes the time language to be able to track contradictions (`EmptyPredicate`)
* changes the time language to be able to collect non-contradicting pieces, like month and hour and unify them
* provides an efficient way to convert those pieces into (past,future) time series
* adds AMPM predicate runner - there's a bit of overlap with is12H, but it basically works
* changes a test case that was wrong before
* regenerates classifiers, I'm not sure why they changed exactly
Before:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(15.50 secs, 6,171,188,928 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(110.82 secs, 44,031,569,512 bytes)
```
After:
```
res <- H.io $ let sentence = "10am thurs 4.30 thurs 12pm sat" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(1.24 secs, 703,020,912 bytes)
res <- H.io $ let sentence = "I have 9 am 12 pm 1 pm 2pm 4 pm 3 pm on Saturday" in (debugTokens sentence $ analyze sentence (testContext {lang = EN}) HashSet.empty)
(9.51 secs, 5,891,109,592 bytes)
```
Reviewed By: JonCoens
Differential Revision: D4676812
fbshipit-source-id: 9810203
2017-03-14 02:49:47 +03:00
|
|
|
("<named-month> <day-of-month> (non ordinal)intersect",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.248495242049359),
|
2017-03-08 21:33:55 +03:00
|
|
|
("intersect by \",\"<day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.8430301339411947),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<named-month> <day-of-month> (non ordinal)named-day",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.248495242049359),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.5553480614894135),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<named-month> <day-of-month> (non ordinal)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.1498829533812494),
|
|
|
|
("hh:mmintersect", -3.8430301339411947),
|
|
|
|
("intersect by \",\"intersect", -3.8430301339411947),
|
|
|
|
("named-dayintersect", -3.8430301339411947),
|
|
|
|
("at <time-of-day>intersect", -3.8430301339411947),
|
|
|
|
("dayminute", -2.5437471498109336),
|
|
|
|
("intersectyear", -3.8430301339411947),
|
|
|
|
("minuteday", -2.108429078553088),
|
|
|
|
("hh:mmabsorption of , after named day", -4.248495242049359),
|
|
|
|
("at <time-of-day>intersect by \",\"", -3.8430301339411947),
|
2017-03-08 21:33:55 +03:00
|
|
|
("at <time-of-day>absorption of , after named day",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.248495242049359),
|
|
|
|
("intersectintersect", -3.8430301339411947),
|
2017-03-08 21:33:55 +03:00
|
|
|
("intersect by \",\"<day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.8430301339411947),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-day<day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.5553480614894135),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<named-month> <day-of-month> (ordinal)named-day",
|
2017-03-14 14:50:10 +03:00
|
|
|
-4.248495242049359),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<named-month> <day-of-month> (ordinal)year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.8430301339411947),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<named-month> <day-of-month> (non ordinal)year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.8430301339411947)],
|
|
|
|
n = 54},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -2.468099531471619, unseen = -3.7612001156935624,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("named-daynamed-month", -1.9459101490553135),
|
|
|
|
("daymonth", -1.9459101490553135)],
|
2017-03-08 21:33:55 +03:00
|
|
|
n = 5}}),
|
|
|
|
("hh:mm",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -2.197890671877523e-2,
|
|
|
|
unseen = -3.8501476017100584,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 45},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -3.828641396489095, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
|
|
|
|
("quarter after|past <integer> (hour-of-day)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("time-of-day (latent)", -0.6931471805599453),
|
|
|
|
("hour", -0.6931471805599453)],
|
|
|
|
n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("named-day",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -4.248495242049359,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 68},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("second (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.1972245773362196,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 7},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<time-of-day> sharp",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.4849066497880004,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("at <time-of-day>", -1.2992829841302609),
|
|
|
|
("time-of-day (latent)", -1.2992829841302609),
|
|
|
|
("hour", -0.7884573603642702)],
|
|
|
|
n = 4},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("intersect by \"of\", \"from\", \"'s\"",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.8649974374866046,
|
|
|
|
unseen = -3.1354942159291497,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("named-daynamed-month", -1.4816045409242156),
|
|
|
|
("daymonth", -1.1451323043030026),
|
|
|
|
("named-daylast <cycle>", -2.3978952727983707),
|
|
|
|
("named-daynext <cycle>", -2.3978952727983707),
|
|
|
|
("named-dayintersect", -1.9924301646902063),
|
|
|
|
("dayweek", -1.9924301646902063)],
|
|
|
|
n = 8},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.5465437063680699, unseen = -3.367295829986474,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("named-daynamed-month", -1.3862943611198906),
|
|
|
|
("daymonth", -0.8472978603872037),
|
|
|
|
("named-dayintersect", -1.540445040947149)],
|
|
|
|
n = 11}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<duration> ago",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.4965075614664802,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -1.6739764335716716), ("day", -1.8562979903656263),
|
|
|
|
("year", -2.367123614131617),
|
|
|
|
("<integer> <unit-of-duration>", -0.9007865453381898),
|
|
|
|
("a <unit-of-duration>", -2.772588722239781),
|
|
|
|
("month", -2.367123614131617)],
|
|
|
|
n = 13},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.9459101490553135,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("last <time>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -2.8134107167600364, unseen = -2.772588722239781,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("day", -1.6094379124341003),
|
|
|
|
("named-day", -1.6094379124341003),
|
|
|
|
("hour", -2.0149030205422647),
|
|
|
|
("week-end", -2.0149030205422647)],
|
|
|
|
n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -6.187540371808753e-2,
|
|
|
|
unseen = -4.6443908991413725,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("year (latent)", -1.7443572303334711),
|
|
|
|
("day", -1.9956716586143772),
|
|
|
|
("time-of-day (latent)", -1.7443572303334711),
|
|
|
|
("year", -1.7443572303334711),
|
|
|
|
("named-day", -3.536116699561526),
|
|
|
|
("intersect by \"of\", \"from\", \"'s\"", -3.536116699561526),
|
|
|
|
("<day-of-month> (ordinal)", -2.33214389523559),
|
|
|
|
("hour", -1.7443572303334711)],
|
|
|
|
n = 47}}),
|
|
|
|
("<day-of-month> (ordinal)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -1.0360919316867756, unseen = -2.639057329615259,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinals (first..31st)", -1.1786549963416462),
|
|
|
|
("ordinal (digits)", -0.3677247801253174)],
|
|
|
|
n = 11},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.4382549309311553,
|
|
|
|
unseen = -3.1354942159291497,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList [("ordinal (digits)", -4.652001563489282e-2)],
|
|
|
|
n = 20}}),
|
|
|
|
("the day after tomorrow",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("noon",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("until <time-of-day>",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.2876820724517809, unseen = -2.890371757896165,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("at <time-of-day>", -0.8873031950009028),
|
|
|
|
("hour", -0.8873031950009028)],
|
2017-03-08 21:33:55 +03:00
|
|
|
n = 6},
|
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -1.3862943611198906,
|
|
|
|
unseen = -2.3025850929940455,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("intersect", -1.5040773967762742),
|
|
|
|
("hh:mm", -1.5040773967762742),
|
|
|
|
("minute", -1.0986122886681098)],
|
|
|
|
n = 2}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<integer> and an half hours",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("integer (numeric)", -0.6931471805599453),
|
|
|
|
("integer (0..19)", -0.6931471805599453)],
|
|
|
|
n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("after <duration>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.9459101490553135,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("day", -0.6931471805599453),
|
|
|
|
("<integer> <unit-of-duration>", -0.6931471805599453)],
|
|
|
|
n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("evening",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("decimal number",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
|
|
|
|
("next <time>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -2.5649493574615367,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("named-month", -1.791759469228055),
|
|
|
|
("day", -1.0986122886681098),
|
|
|
|
("named-day", -1.0986122886681098),
|
|
|
|
("month", -1.791759469228055)],
|
|
|
|
n = 4},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -2.5649493574615367,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("named-month", -1.791759469228055),
|
|
|
|
("day", -1.0986122886681098),
|
|
|
|
("named-day", -1.0986122886681098),
|
|
|
|
("month", -1.791759469228055)],
|
|
|
|
n = 4}}),
|
|
|
|
("last <cycle>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.587786664902119, unseen = -2.9444389791664407,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -1.5040773967762742),
|
|
|
|
("month (grain)", -2.1972245773362196),
|
|
|
|
("year (grain)", -2.1972245773362196),
|
|
|
|
("week (grain)", -1.5040773967762742),
|
|
|
|
("year", -2.1972245773362196), ("month", -2.1972245773362196)],
|
|
|
|
n = 5},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.8109302162163288, unseen = -2.833213344056216,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -1.6739764335716716),
|
|
|
|
("week (grain)", -1.6739764335716716),
|
|
|
|
("day", -1.6739764335716716),
|
|
|
|
("day (grain)", -1.6739764335716716)],
|
|
|
|
n = 4}}),
|
|
|
|
("christmas",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("new year's day",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.3862943611198906,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("next n <cycle>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.912023005428146,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -2.793208009442517),
|
|
|
|
("integer (0..19)year (grain)", -3.1986731175506815),
|
|
|
|
("integer (numeric)day (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)hour (grain)", -3.1986731175506815),
|
|
|
|
("second", -2.793208009442517),
|
|
|
|
("integer (numeric)second (grain)", -3.1986731175506815),
|
|
|
|
("integer (numeric)year (grain)", -3.1986731175506815),
|
|
|
|
("day", -2.793208009442517), ("year", -2.793208009442517),
|
|
|
|
("integer (numeric)week (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)month (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)second (grain)", -3.1986731175506815),
|
|
|
|
("hour", -2.793208009442517), ("month", -2.793208009442517),
|
|
|
|
("integer (numeric)minute (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)minute (grain)", -3.1986731175506815),
|
|
|
|
("integer (numeric)month (grain)", -3.1986731175506815),
|
|
|
|
("minute", -2.793208009442517),
|
|
|
|
("integer (numeric)hour (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)day (grain)", -3.1986731175506815),
|
|
|
|
("integer (0..19)week (grain)", -3.1986731175506815)],
|
|
|
|
n = 14},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -3.0910424533583156,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("in <duration>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -4.3694478524670215,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -3.258096538021482),
|
|
|
|
("<integer> more <unit-of-duration>", -3.258096538021482),
|
|
|
|
("number.number hours", -3.6635616461296463),
|
|
|
|
("second", -2.9704144655697013), ("day", -2.5649493574615367),
|
|
|
|
("half an hour", -3.6635616461296463),
|
|
|
|
("<integer> <unit-of-duration>", -1.3121863889661687),
|
|
|
|
("a <unit-of-duration>", -2.5649493574615367),
|
|
|
|
("<integer> and an half hours", -3.258096538021482),
|
|
|
|
("hour", -2.4107986776342782), ("minute", -1.466337068793427),
|
|
|
|
("about <duration>", -3.258096538021482)],
|
|
|
|
n = 33},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -2.5649493574615367,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<datetime> - <datetime> (interval)",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.579818495252942, unseen = -4.304065093204169,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("<day-of-month>(ordinal) <named-month><day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-2.498699971920336),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month> (non ordinal) <named-month><day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-2.498699971920336),
|
|
|
|
("minuteminute", -2.093234863812172),
|
|
|
|
("hh:mmhh:mm", -2.093234863812172),
|
|
|
|
("dayday", -1.2459370034249682),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month> (non ordinal) <named-month><day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-2.498699971920336),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<day-of-month>(ordinal) <named-month><day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-2.498699971920336)],
|
2017-03-08 21:33:55 +03:00
|
|
|
n = 28},
|
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.8209805520698302, unseen = -4.127134385045092,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("monthday", -1.7129785913749407),
|
|
|
|
("minuteminute", -2.164963715117998),
|
|
|
|
("hh:mmhh:mm", -3.417726683613366),
|
|
|
|
("dayyear", -3.012261575505202),
|
2017-03-08 21:33:55 +03:00
|
|
|
("year<hour-of-day> <integer> (as relative minutes)",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.012261575505202),
|
|
|
|
("hh:mmintersect", -2.3191143949452564),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-month<day-of-month>(ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-2.3191143949452564),
|
|
|
|
("dd/mmyear", -3.012261575505202),
|
2017-03-08 21:33:55 +03:00
|
|
|
("named-month<day-of-month> (non ordinal) <named-month>",
|
2017-03-14 14:50:10 +03:00
|
|
|
-2.3191143949452564),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<hour-of-day> <integer> (as relative minutes)year",
|
2017-03-14 14:50:10 +03:00
|
|
|
-3.012261575505202),
|
|
|
|
("minuteyear", -3.012261575505202),
|
|
|
|
("yearminute", -3.012261575505202)],
|
|
|
|
n = 22}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<time-of-day> - <time-of-day> (interval)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.8109302162163288, unseen = -3.044522437723423,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("minuteminute", -0.7985076962177716),
|
|
|
|
("hh:mmhh:mm", -0.7985076962177716)],
|
|
|
|
n = 8},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.587786664902119, unseen = -3.2188758248682006,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("hh:mmtime-of-day (latent)", -0.8754687373538999),
|
|
|
|
("minuteminute", -2.4849066497880004),
|
|
|
|
("hh:mmhh:mm", -2.4849066497880004),
|
|
|
|
("minutehour", -0.8754687373538999)],
|
|
|
|
n = 10}}),
|
|
|
|
("last n <cycle>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -4.007333185232471,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -2.6026896854443837),
|
|
|
|
("integer (0..19)year (grain)", -3.295836866004329),
|
|
|
|
("integer (numeric)day (grain)", -2.890371757896165),
|
|
|
|
("second", -2.890371757896165),
|
|
|
|
("integer (numeric)second (grain)", -3.295836866004329),
|
|
|
|
("integer (numeric)year (grain)", -2.890371757896165),
|
|
|
|
("day", -2.6026896854443837), ("year", -2.6026896854443837),
|
|
|
|
("integer (numeric)week (grain)", -3.295836866004329),
|
|
|
|
("integer (0..19)month (grain)", -2.890371757896165),
|
|
|
|
("integer (0..19)second (grain)", -3.295836866004329),
|
|
|
|
("hour", -3.295836866004329), ("month", -2.6026896854443837),
|
|
|
|
("integer (numeric)minute (grain)", -3.295836866004329),
|
|
|
|
("integer (0..19)minute (grain)", -3.295836866004329),
|
|
|
|
("integer (numeric)month (grain)", -3.295836866004329),
|
|
|
|
("minute", -2.890371757896165),
|
|
|
|
("integer (numeric)hour (grain)", -3.295836866004329),
|
|
|
|
("integer (0..19)day (grain)", -3.295836866004329),
|
|
|
|
("integer (0..19)week (grain)", -2.890371757896165)],
|
|
|
|
n = 17},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -3.044522437723423,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<named-month> <day-of-month> (non ordinal)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.4418327522790392, unseen = -3.044522437723423,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("named-monthinteger (numeric)", -0.6931471805599453),
|
|
|
|
("month", -0.6931471805599453)],
|
|
|
|
n = 9},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.0296194171811581,
|
|
|
|
unseen = -2.5649493574615367,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("named-monthinteger (numeric)", -0.6931471805599453),
|
|
|
|
("month", -0.6931471805599453)],
|
|
|
|
n = 5}}),
|
|
|
|
("<day-of-month> (non ordinal) <named-month>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.1466034741918754, unseen = -3.713572066704308,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("integer (numeric)named-month", -0.6931471805599453),
|
|
|
|
("month", -0.6931471805599453)],
|
|
|
|
n = 19},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.9924301646902063,
|
|
|
|
unseen = -2.1972245773362196,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("integer (numeric)named-month", -0.6931471805599453),
|
|
|
|
("month", -0.6931471805599453)],
|
|
|
|
n = 3}}),
|
|
|
|
("this|next <day-of-week>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -2.1972245773362196,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("day", -0.6931471805599453),
|
|
|
|
("named-day", -0.6931471805599453)],
|
|
|
|
n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -2.1972245773362196,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("day", -0.6931471805599453),
|
|
|
|
("named-day", -0.6931471805599453)],
|
|
|
|
n = 3}}),
|
|
|
|
("ordinal (digits)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -1.9212175364649418,
|
|
|
|
unseen = -3.7612001156935624,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 41},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.158326051237739, unseen = -5.484796933490655,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 239}}),
|
|
|
|
("quarter (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.1972245773362196,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 7},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("last <cycle> of <time>",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = 0.0, unseen = -3.1354942159291497,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("daymonth", -1.4816045409242156),
|
|
|
|
("week (grain)named-month", -1.9924301646902063),
|
|
|
|
("day (grain)intersect", -1.9924301646902063),
|
|
|
|
("weekmonth", -1.4816045409242156),
|
|
|
|
("day (grain)named-month", -1.9924301646902063),
|
|
|
|
("week (grain)intersect", -1.9924301646902063)],
|
|
|
|
n = 8},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -infinity, unseen = -1.9459101490553135,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<day-of-month>(ordinal) <named-month> year",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.587786664902119, unseen = -2.639057329615259,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinals (first..31st)named-month", -1.466337068793427),
|
|
|
|
("ordinal (digits)named-month", -1.1786549963416462),
|
|
|
|
("month", -0.7731898882334817)],
|
|
|
|
n = 5},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.8109302162163288,
|
|
|
|
unseen = -2.4849066497880004,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("ordinal (digits)named-month", -0.7884573603642702),
|
|
|
|
("month", -0.7884573603642702)],
|
|
|
|
n = 4}}),
|
|
|
|
("morning",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -0.6931471805599453,
|
|
|
|
unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
|
|
|
|
("week-end",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.791759469228055,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 4},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("after <time-of-day>",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.8109302162163288, unseen = -3.367295829986474,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("at <time-of-day>", -1.7227665977411035),
|
|
|
|
("intersect", -2.2335922215070942),
|
|
|
|
("tomorrow", -2.2335922215070942), ("day", -2.2335922215070942),
|
|
|
|
("hour", -1.3862943611198906)],
|
|
|
|
n = 8},
|
2017-03-08 21:33:55 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.587786664902119, unseen = -3.4965075614664802,
|
2017-03-08 21:33:55 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("lunch", -2.772588722239781),
|
|
|
|
("year (latent)", -2.0794415416798357),
|
|
|
|
("day", -2.367123614131617),
|
|
|
|
("time-of-day (latent)", -2.0794415416798357),
|
|
|
|
("year", -2.0794415416798357), ("hh:mm", -2.772588722239781),
|
|
|
|
("<day-of-month> (ordinal)", -2.367123614131617),
|
|
|
|
("hour", -1.8562979903656263), ("minute", -2.772588722239781)],
|
2017-03-08 21:33:55 +03:00
|
|
|
n = 10}}),
|
|
|
|
("day (grain)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.044522437723423,
|
|
|
|
likelihoods = HashMap.fromList [("", 0.0)], n = 19},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<month> dd-dd (interval)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -2.1972245773362196,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("named-month", -0.6931471805599453),
|
|
|
|
("month", -0.6931471805599453)],
|
|
|
|
n = 3},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("about <duration>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.9459101490553135,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("half an hour", -0.6931471805599453),
|
|
|
|
("minute", -0.6931471805599453)],
|
|
|
|
n = 2},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}}),
|
|
|
|
("<hour-of-day> <integer> (as relative minutes)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = 0.0, unseen = -3.1354942159291497,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("time-of-day (latent)integer (numeric)", -0.6931471805599453),
|
|
|
|
("hour", -0.6931471805599453)],
|
|
|
|
n = 10}}),
|
|
|
|
("this <time>",
|
|
|
|
Classifier{okData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -1.2992829841302609,
|
|
|
|
unseen = -3.6375861597263857,
|
2017-03-13 21:38:34 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("week", -2.917770732084279), ("intersect", -2.512305623976115),
|
|
|
|
("season", -2.001480000210124),
|
|
|
|
("next <cycle>", -2.917770732084279),
|
|
|
|
("named-month", -2.512305623976115),
|
|
|
|
("day", -2.001480000210124), ("hour", -2.2246235515243336),
|
|
|
|
("month", -2.001480000210124),
|
|
|
|
("week-end", -2.2246235515243336)],
|
|
|
|
n = 12},
|
2017-03-13 21:38:34 +03:00
|
|
|
koData =
|
2017-03-14 14:50:10 +03:00
|
|
|
ClassData{prior = -0.3184537311185346, unseen = -4.356708826689592,
|
2017-03-13 21:38:34 +03:00
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
2017-03-14 14:50:10 +03:00
|
|
|
[("intersect", -1.8588987720656835),
|
|
|
|
("named-month", -1.5105920777974677),
|
|
|
|
("day", -3.6506582412937383),
|
|
|
|
("time-of-day (latent)", -3.245193133185574),
|
|
|
|
("<day-of-month> (ordinal)", -3.6506582412937383),
|
|
|
|
("noon", -3.6506582412937383), ("hour", -2.7343675094195836),
|
|
|
|
("month", -1.0116009116784799),
|
|
|
|
("morning", -3.6506582412937383)],
|
|
|
|
n = 32}}),
|
2017-03-08 21:33:55 +03:00
|
|
|
("<named-month> <day-of-month> (ordinal)",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = -0.4418327522790392, unseen = -3.044522437723423,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("month", -0.6931471805599453),
|
|
|
|
("named-monthordinal (digits)", -0.6931471805599453)],
|
|
|
|
n = 9},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -1.0296194171811581,
|
|
|
|
unseen = -2.5649493574615367,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("month", -0.6931471805599453),
|
|
|
|
("named-monthordinal (digits)", -0.6931471805599453)],
|
|
|
|
n = 5}}),
|
|
|
|
("within <duration>",
|
|
|
|
Classifier{okData =
|
|
|
|
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
|
|
|
likelihoods =
|
|
|
|
HashMap.fromList
|
|
|
|
[("week", -0.6931471805599453),
|
|
|
|
("<integer> <unit-of-duration>", -0.6931471805599453)],
|
|
|
|
n = 1},
|
|
|
|
koData =
|
|
|
|
ClassData{prior = -infinity, unseen = -1.0986122886681098,
|
|
|
|
likelihoods = HashMap.fromList [], n = 0}})]
|