Fix minor issues in german. (#362)

Summary:
1. Fix a bug for "fünfter"
3. support for "am ersten Dezember" to " am einunddreißigsten Dezember"
3. duckling recognizes  "der fünfte Dezember"

- [x] ``README.md`` is up to date with docker
- [x] ``docker-compose.yml`` is included
- [x] all ``apt-get`` commands are in a single RUN command to avoid multiple stages
- [x] dockerfile now use the files from the directory
- [ ] more tests should be include
Pull Request resolved: https://github.com/facebook/duckling/pull/362

Reviewed By: patapizza

Differential Revision: D15100843

Pulled By: haoxuany

fbshipit-source-id: 407c8bb2a05a1d0cee81c407e81d882fca63a2be
This commit is contained in:
Josua Blejeru 2019-05-01 13:52:50 -07:00 committed by Facebook Github Bot
parent 0b551597ce
commit 54cd64d0ec
3 changed files with 330 additions and 298 deletions

View File

@ -7,14 +7,18 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ordinal.DE.Rules
( rules ) where
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List as List
import qualified Data.Text as Text
import Prelude
import Data.String
import Data.Text (Text)
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers (parseInt)
@ -22,56 +26,63 @@ import Duckling.Ordinal.Helpers
import Duckling.Regex.Types
import Duckling.Types
ordinalList :: [(Text, Int)]
ordinalList =
[ ("erste", 1)
, ("zweite", 2)
, ("dritte", 3)
, ("vierte", 4)
, ("fünfte", 5)
, ("sechste", 6)
, ("siebte", 7)
, ("achte", 8)
, ("neunte", 9)
, ("zehnte", 10)
, ("elfte", 11)
, ("zwölfte", 12)
, ("dreizente", 13)
, ("vierzehnte", 14)
, ("fünfzehnte", 15)
, ("sechzente", 16)
, ("siebzehnte", 17)
, ("achtzehnte", 18)
, ("neunzehnte", 19)
, ("zwanzigste", 20)
, ("einundzwanzigste", 21)
, ("zweiundzwanzigste", 22)
, ("dreiundzwanzigste", 23)
, ("vierundzwanzigste", 24)
, ("fünfundzwanzigste", 25)
, ("sechsundzwanzigste", 26)
, ("siebenundzwanzigste", 27)
, ("achtundzwanzigste", 28)
, ("neunundzwanzigste", 29)
, ("dreissigste", 30)
, ("dreißigste", 30)
, ("einunddreissigste", 31)
, ("einunddreißigste", 31)
]
ruleOrdinalsFirstth :: Rule
ruleOrdinalsFirstth = Rule
{ name = "ordinals (first..19th)"
{ name = "ordinal (1..31)"
, pattern =
[ regex "(erste(r|s)?|zweite(r|s)|dritte(r|s)|vierte(r|s)|fuenfte(r|s)|sechste(r|s)|siebte(r|s)|achte(r|s)|neunte(r|s)|zehnte(r|s)|elfter|zwölfter|dreizenter|vierzehnter|fünfzehnter|sechzenter|siebzehnter|achtzehnter|neunzehnter)"
[ regex $ Text.unpack construction
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"erstes" -> Just $ ordinal 1
"erster" -> Just $ ordinal 1
"erste" -> Just $ ordinal 1
"zweiter" -> Just $ ordinal 2
"zweite" -> Just $ ordinal 2
"zweites" -> Just $ ordinal 2
"drittes" -> Just $ ordinal 3
"dritte" -> Just $ ordinal 3
"dritter" -> Just $ ordinal 3
"viertes" -> Just $ ordinal 4
"vierte" -> Just $ ordinal 4
"vierter" -> Just $ ordinal 4
"fünftes" -> Just $ ordinal 5
"fünfter" -> Just $ ordinal 5
"fünfte" -> Just $ ordinal 5
"sechste" -> Just $ ordinal 6
"sechstes" -> Just $ ordinal 6
"sechster" -> Just $ ordinal 6
"siebtes" -> Just $ ordinal 7
"siebte" -> Just $ ordinal 7
"siebter" -> Just $ ordinal 7
"achtes" -> Just $ ordinal 8
"achte" -> Just $ ordinal 8
"achter" -> Just $ ordinal 8
"neuntes" -> Just $ ordinal 9
"neunter" -> Just $ ordinal 9
"neunte" -> Just $ ordinal 9
"zehnte" -> Just $ ordinal 10
"zehnter" -> Just $ ordinal 10
"zehntes" -> Just $ ordinal 10
"elfter" -> Just $ ordinal 11
"zwölfter" -> Just $ ordinal 12
"dreizehnter" -> Just $ ordinal 13
"vierzehnter" -> Just $ ordinal 14
"fünfzehnter" -> Just $ ordinal 15
"sechzehnter" -> Just $ ordinal 16
"siebzehnter" -> Just $ ordinal 17
"achtzehnter" -> Just $ ordinal 18
"neunzehnter" -> Just $ ordinal 19
_ -> Nothing
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
ordinal <$> HashMap.lookup (Text.toLower match) ordinalMap
_ -> Nothing
}
where
ordinalMap :: HashMap.HashMap Text Int
ordinalMap = HashMap.fromList ordinalList
construction :: Text
construction =
"("
<> mconcat (List.intersperse "|" (fst <$> ordinalList))
<> ")[rsn]?"
ruleOrdinalDigits :: Rule
ruleOrdinalDigits = Rule
@ -79,7 +90,7 @@ ruleOrdinalDigits = Rule
, pattern =
[ regex "(?<!\\d|\\.)0*(\\d+)(\\.(?!\\d)| ?(te(n|r|s)?)|(ste(n|r|s)?))"
]
, prod = \tokens -> case tokens of
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
Just $ ordinal v
@ -88,6 +99,6 @@ ruleOrdinalDigits = Rule
rules :: [Rule]
rules =
[ ruleOrdinalDigits
, ruleOrdinalsFirstth
[ ruleOrdinalsFirstth
, ruleOrdinalDigits
]

View File

@ -242,28 +242,29 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("on <date>",
Classifier{okData =
ClassData{prior = -0.13133600206108698,
unseen = -4.897839799950911,
ClassData{prior = -0.12516314295400605,
unseen = -4.941642422609305,
likelihoods =
HashMap.fromList
[("absorption of , after named day", -3.791736839553644),
("Freitag", -4.197201947661808),
("intersect", -1.7122952978738082),
("after lunch", -2.9444389791664407),
("day", -1.6322525902002718),
("afternoon", -2.9444389791664407),
("intersect by ','", -2.9444389791664407),
("intersect by 'of', 'from', 's", -3.2809112157876537),
("Donnerstag", -3.5040547671018634),
("<day-of-month> (ordinal)", -4.197201947661808),
("hour", -1.9999773703255892), ("evening", -4.197201947661808),
("<datetime> - <datetime> (interval)", -4.197201947661808),
("minute", -2.1177604059819726),
("Samstag", -3.791736839553644),
("morning", -4.197201947661808)],
n = 57},
[("absorption of , after named day", -3.835861644462582),
("Freitag", -4.241326752570746),
("intersect", -1.7155981082624912),
("after lunch", -2.9885637840753785),
("<day-of-month>(ordinal) <named-month>", -4.241326752570746),
("day", -1.5671781031442178),
("afternoon", -2.9885637840753785),
("intersect by ','", -2.9885637840753785),
("intersect by 'of', 'from', 's", -3.3250360206965914),
("Donnerstag", -3.548179572010801),
("<day-of-month> (ordinal)", -3.835861644462582),
("hour", -2.044102175234527), ("evening", -4.241326752570746),
("<datetime> - <datetime> (interval)", -4.241326752570746),
("minute", -2.1618852108909103),
("Samstag", -3.835861644462582),
("morning", -4.241326752570746)],
n = 60},
koData =
ClassData{prior = -2.094945728215801, unseen = -3.58351893845611,
ClassData{prior = -2.1400661634962708, unseen = -3.58351893845611,
likelihoods =
HashMap.fromList
[("intersect", -2.456735772821304),
@ -383,8 +384,8 @@ classifiers
ClassData{prior = -0.6931471805599453, unseen = -1.791759469228055,
likelihoods =
HashMap.fromList
[("ordinals (first..19th)quarter (grain)", -0.916290731874155),
("quarter", -0.916290731874155)],
[("quarter", -0.916290731874155),
("ordinal (1..31)quarter (grain)", -0.916290731874155)],
n = 1},
koData =
ClassData{prior = -0.6931471805599453, unseen = -1.791759469228055,
@ -409,236 +410,239 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("intersect",
Classifier{okData =
ClassData{prior = -0.11700209067331212,
unseen = -6.917705609835305,
ClassData{prior = -0.11521958898218493,
unseen = -6.934397209928558,
likelihoods =
HashMap.fromList
[("Samstag<time-of-day> o'clock", -6.223567839793664),
("hourday", -4.970804871298296),
("<datetime> - <datetime> (interval)year", -4.970804871298296),
("dayhour", -2.5600061936640173),
("daymonth", -3.132525386435348),
[("Samstag<time-of-day> o'clock", -6.240275845170769),
("hourday", -4.987512876675401),
("<datetime> - <datetime> (interval)year", -4.987512876675401),
("dayhour", -2.576714199041123),
("daymonth", -3.001597393006389),
("<day-of-month>(ordinal) <named-month> year<time-of-day> o'clock",
-6.223567839793664),
("monthyear", -4.431808370565609),
("yearhour", -6.223567839793664),
("Juliintersect", -6.223567839793664),
("intersect<time-of-day> o'clock", -4.083501676297393),
("after lunchat <time-of-day>", -6.223567839793664),
-6.240275845170769),
("monthyear", -4.448516375942715),
("yearhour", -6.240275845170769),
("Juliintersect", -6.240275845170769),
("intersect<time-of-day> o'clock", -4.100209681674499),
("after lunchat <time-of-day>", -6.240275845170769),
("absorption of , after named daymm/dd/yyyy",
-6.223567839793664),
("intersectJuli", -5.530420659233719),
("<day-of-month> (ordinal)Dienstag", -6.223567839793664),
("intersect by 'of', 'from', 'syear", -5.818102731685499),
("<day-of-month> (ordinal)September", -5.818102731685499),
("intersect<time> <part-of-day>", -4.144126298113828),
("<time-of-day> o'clockafter lunch", -5.530420659233719),
("<day-of-month> (ordinal)Mittwoch", -6.223567839793664),
("Montagthis <cycle>", -6.223567839793664),
-6.240275845170769),
("intersectJuli", -5.547128664610824),
("<day-of-month> (ordinal)Dienstag", -6.240275845170769),
("intersect by 'of', 'from', 'syear", -5.834810737062605),
("<day-of-month> (ordinal)September", -5.834810737062605),
("intersect<time> <part-of-day>", -4.160834303490934),
("<time-of-day> o'clockafter lunch", -5.547128664610824),
("<day-of-month> (ordinal)Mittwoch", -6.240275845170769),
("Montagthis <cycle>", -6.240275845170769),
("<time> <part-of-day><time-of-day> o'clock",
-6.223567839793664),
("Oktoberyear", -5.124955551125554),
("today<time-of-day> o'clock", -6.223567839793664),
("<time-of-day> o'clockon <date>", -5.818102731685499),
("intersect by ','year", -5.124955551125554),
("on <date><time-of-day> o'clock", -6.223567839793664),
("exactly <time-of-day>tomorrow", -5.818102731685499),
("mm/dd<time-of-day> o'clock", -6.223567839793664),
("monthhour", -5.818102731685499),
-6.240275845170769),
("Oktoberyear", -5.14166355650266),
("today<time-of-day> o'clock", -6.240275845170769),
("<time-of-day> o'clockon <date>", -5.834810737062605),
("intersect by ','year", -5.14166355650266),
("on <date><time-of-day> o'clock", -6.240275845170769),
("exactly <time-of-day>tomorrow", -5.834810737062605),
("mm/dd<time-of-day> o'clock", -6.240275845170769),
("monthhour", -5.834810737062605),
("on <date>between <datetime> and <datetime> (interval)",
-5.818102731685499),
("last <day-of-week> of <time>year", -6.223567839793664),
("hourmonth", -5.818102731685499),
("todayat <time-of-day>", -5.530420659233719),
("mm/ddabout <time-of-day>", -5.818102731685499),
-5.834810737062605),
("last <day-of-week> of <time>year", -6.240275845170769),
("hourmonth", -5.834810737062605),
("todayat <time-of-day>", -5.547128664610824),
("mm/ddabout <time-of-day>", -5.834810737062605),
("Donnerstagfrom <datetime> - <datetime> (interval)",
-5.307277107919509),
("<day-of-month> (ordinal)Mai", -6.223567839793664),
-5.323985113296614),
("<day-of-month> (ordinal)Mai", -6.240275845170769),
("on <date>between <time-of-day> and <time-of-day> (interval)",
-5.818102731685499),
("on <date>at <time-of-day>", -5.818102731685499),
("dayday", -3.0455140094457183),
("<time> <part-of-day>at <time-of-day>", -5.818102731685499),
("about <time-of-day>on <date>", -6.223567839793664),
-5.834810737062605),
("on <date>at <time-of-day>", -5.834810737062605),
("dayday", -3.062222014822824),
("<time> <part-of-day>at <time-of-day>", -5.834810737062605),
("about <time-of-day>on <date>", -6.240275845170769),
("<hour-of-day> <integer> (as relative minutes)in|during the <part-of-day>",
-5.818102731685499),
("<day-of-month> (ordinal)intersect", -4.970804871298296),
("hourhour", -3.6978391954854084),
("hh:mmintersect by ','", -4.970804871298296),
-5.834810737062605),
("<day-of-month> (ordinal)intersect", -4.987512876675401),
("hourhour", -3.714547200862514),
("hh:mmintersect by ','", -4.987512876675401),
("Donnerstagfrom <time-of-day> - <time-of-day> (interval)",
-4.970804871298296),
("Dienstagthis <cycle>", -6.223567839793664),
("<part-of-day> of <time>Februar", -5.818102731685499),
("dayyear", -3.2531533742239627),
("Mittwochthis <cycle>", -6.223567839793664),
-4.987512876675401),
("Dienstagthis <cycle>", -6.240275845170769),
("<part-of-day> of <time>Februar", -5.834810737062605),
("dayyear", -3.2698613796010685),
("Mittwochthis <cycle>", -6.240275845170769),
("<time-of-day> o'clockin|during the <part-of-day>",
-5.124955551125554),
("tomorrow<time-of-day> o'clock", -6.223567839793664),
("<time-of-day> o'clocktomorrow", -5.530420659233719),
-5.14166355650266),
("tomorrow<time-of-day> o'clock", -6.240275845170769),
("<time-of-day> o'clocktomorrow", -5.547128664610824),
("<day-of-month>(ordinal) <named-month>year",
-5.530420659233719),
-5.547128664610824),
("half <integer> (german style hour-of-day)after lunch",
-6.223567839793664),
-6.240275845170769),
("absorption of , after named day<day-of-month>(ordinal) <named-month>",
-4.970804871298296),
("hourminute", -5.530420659233719),
("the <day-of-month> (ordinal)Juli", -6.223567839793664),
-4.987512876675401),
("hourminute", -5.547128664610824),
("the <day-of-month> (ordinal)Juli", -6.240275845170769),
("on <date><day-of-month>(ordinal) <named-month>",
-5.818102731685499),
("minutemonth", -3.6978391954854084),
("minutehour", -3.9722760411871687),
-5.834810737062605),
("minutemonth", -3.714547200862514),
("minutehour", -3.9889840465642745),
("at <time-of-day>in|during the <part-of-day>",
-5.307277107919509),
-5.323985113296614),
("absorption of , after named day<day-of-month>(ordinal) <named-month> year",
-5.818102731685499),
-5.834810737062605),
("absorption of , after named day<named-month> <day-of-month> (non ordinal)",
-5.124955551125554),
-5.14166355650266),
("<day-of-month>(ordinal) <named-month> year<time> <part-of-day>",
-6.223567839793664),
-6.240275845170769),
("<hour-of-day> <integer> (as relative minutes)after lunch",
-6.223567839793664),
("Donnerstag<time> timezone", -5.818102731685499),
("Samstagat <time-of-day>", -5.818102731685499),
-6.240275845170769),
("Donnerstag<time> timezone", -5.834810737062605),
("on <date>Dezember", -6.240275845170769),
("Samstagat <time-of-day>", -5.834810737062605),
("intersect<day-of-month>(ordinal) <named-month>",
-4.970804871298296),
-4.987512876675401),
("this <part-of-day><time-of-day> o'clock",
-6.223567839793664),
-6.240275845170769),
("<day-of-month>(ordinal) <named-month>intersect",
-6.223567839793664),
("hh:mmintersect", -3.7812208044244593),
("Donnerstaghh:mm", -6.223567839793664),
("Weihnachtenyear", -6.223567839793664),
("<day-of-month> (ordinal)Juli", -5.530420659233719),
("intersect by ','intersect", -4.970804871298296),
("intersect by 'of', 'from', 'sJuli", -6.223567839793664),
-6.240275845170769),
("hh:mmintersect", -3.797928809801565),
("Donnerstaghh:mm", -6.240275845170769),
("Weihnachtenyear", -6.240275845170769),
("<day-of-month> (ordinal)Juli", -5.547128664610824),
("intersect by ','intersect", -4.987512876675401),
("intersect by 'of', 'from', 'sJuli", -6.240275845170769),
("half <integer> (german style hour-of-day)in|during the <part-of-day>",
-5.818102731685499),
-5.834810737062605),
("from <datetime> - <datetime> (interval)year",
-5.530420659233719),
("at <time-of-day>intersect", -4.431808370565609),
-5.547128664610824),
("at <time-of-day>intersect", -4.448516375942715),
("on <date>from <time-of-day> - <time-of-day> (interval)",
-5.307277107919509),
-5.323985113296614),
("<time> <part-of-day>from <time-of-day> - <time-of-day> (interval)",
-6.223567839793664),
-6.240275845170769),
("absorption of , after named day<day-of-month> (ordinal)",
-4.970804871298296),
-4.987512876675401),
("Donnerstagbetween <time-of-day> and <time-of-day> (interval)",
-5.818102731685499),
("dayminute", -3.4509791175538824),
-5.834810737062605),
("the <day-of-month> (ordinal)Dezember", -6.240275845170769),
("dayminute", -3.4676871229309882),
("on <date>from <datetime> - <datetime> (interval)",
-5.818102731685499),
("Maiyear", -6.223567839793664),
("Mittwochnext <cycle>", -6.223567839793664),
-5.834810737062605),
("Maiyear", -6.240275845170769),
("Mittwochnext <cycle>", -6.240275845170769),
("Donnerstagbetween <datetime> and <datetime> (interval)",
-5.818102731685499),
-5.834810737062605),
("<time> <part-of-day>from <datetime> - <datetime> (interval)",
-6.223567839793664),
("intersectyear", -4.351765662892072),
("on <date>intersect", -5.818102731685499),
("on <date><day-of-month> (ordinal)", -5.818102731685499),
("intersectSeptember", -3.584510510178405),
("<ordinal> <cycle> of <time>year", -6.223567839793664),
("minuteday", -2.373420238083605),
-6.240275845170769),
("intersectyear", -4.3684736682691785),
("on <date>intersect", -5.834810737062605),
("on <date><day-of-month> (ordinal)", -5.834810737062605),
("intersectSeptember", -3.601218515555511),
("<ordinal> <cycle> of <time>year", -6.240275845170769),
("minuteday", -2.390128243460711),
("absorption of , after named dayintersect",
-3.8256725669952933),
("intersect by ','September", -4.837273478673773),
("year<time-of-day> o'clock", -6.223567839793664),
("Juliyear", -5.818102731685499),
("at <time-of-day>intersect by ','", -5.530420659233719),
("hh:mmabsorption of , after named day", -5.818102731685499),
("intersect by ','<time> <part-of-day>", -5.307277107919509),
("hh:mmon <date>", -3.7812208044244593),
-3.842380572372399),
("intersect by ','September", -4.853981484050879),
("year<time-of-day> o'clock", -6.240275845170769),
("Juliyear", -5.834810737062605),
("at <time-of-day>intersect by ','", -5.547128664610824),
("hh:mmabsorption of , after named day", -5.834810737062605),
("intersect by ','<time> <part-of-day>", -5.323985113296614),
("hh:mmon <date>", -3.797928809801565),
("at <time-of-day>absorption of , after named day",
-6.223567839793664),
("until <time-of-day>after lunch", -6.223567839793664),
("mm/ddyear", -5.818102731685499),
("intersect by ','<time-of-day> o'clock", -5.307277107919509),
("intersect<day-of-month> (ordinal)", -4.970804871298296),
("absorption of , after named daymm/dd", -6.223567839793664),
("on <date>September", -5.307277107919509),
("Septemberyear", -5.818102731685499),
-6.240275845170769),
("until <time-of-day>after lunch", -6.240275845170769),
("mm/ddyear", -5.834810737062605),
("intersect by ','<time-of-day> o'clock", -5.323985113296614),
("intersect<day-of-month> (ordinal)", -4.987512876675401),
("absorption of , after named daymm/dd", -6.240275845170769),
("on <date>September", -5.323985113296614),
("Septemberyear", -5.834810737062605),
("<day-of-month> (ordinal)intersect by 'of', 'from', 's",
-5.530420659233719),
("at <time-of-day>on <date>", -4.351765662892072),
("intersectintersect", -4.144126298113828),
("<day-of-month> (ordinal)Februar", -5.124955551125554),
("dayweek", -5.124955551125554),
("intersect by ','Juli", -5.530420659233719),
-5.547128664610824),
("at <time-of-day>on <date>", -4.3684736682691785),
("intersectintersect", -4.160834303490934),
("<day-of-month> (ordinal)Februar", -5.14166355650266),
("dayweek", -5.14166355650266),
("intersect by ','Juli", -5.547128664610824),
("absorption of , after named daythe <day-of-month> (ordinal)",
-6.223567839793664),
("weekyear", -5.818102731685499),
("hh:mmin|during the <part-of-day>", -5.307277107919509),
("Marzyear", -6.223567839793664),
("tomorrowat <time-of-day>", -5.818102731685499),
-6.240275845170769),
("weekyear", -5.834810737062605),
("<day-of-month> (ordinal)Dezember", -5.14166355650266),
("hh:mmin|during the <part-of-day>", -5.323985113296614),
("Marzyear", -6.240275845170769),
("tomorrowat <time-of-day>", -5.834810737062605),
("<hour-of-day> <integer> (as relative minutes)on <date>",
-6.223567839793664),
-6.240275845170769),
("Donnerstag<time-of-day> - <time-of-day> (interval)",
-5.818102731685499),
-5.834810737062605),
("Donnerstag<datetime> - <datetime> (interval)",
-6.223567839793664),
("Sonntaglast <cycle>", -6.223567839793664),
("at <time-of-day>tomorrow", -6.223567839793664),
-6.240275845170769),
("Sonntaglast <cycle>", -6.240275845170769),
("at <time-of-day>tomorrow", -6.240275845170769),
("about <time-of-day>in|during the <part-of-day>",
-5.818102731685499),
-5.834810737062605),
("half <integer> (german style hour-of-day)on <date>",
-6.223567839793664),
("this <part-of-day>at <time-of-day>", -5.818102731685499),
("<datetime> - <datetime> (interval)Juli", -5.818102731685499),
-6.240275845170769),
("this <part-of-day>at <time-of-day>", -5.834810737062605),
("<datetime> - <datetime> (interval)Juli", -5.834810737062605),
("after lunch<hour-of-day> <integer> (as relative minutes)",
-5.818102731685499),
("<day-of-month> (ordinal)Marz", -6.223567839793664),
("last <cycle> of <time>year", -5.818102731685499),
-5.834810737062605),
("<day-of-month> (ordinal)Marz", -6.240275845170769),
("last <cycle> of <time>year", -5.834810737062605),
("<named-month> <day-of-month> (non ordinal)year",
-6.223567839793664),
-6.240275845170769),
("<day-of-month> (non ordinal) <named-month>year",
-6.223567839793664)],
n = 427},
-6.240275845170769)],
n = 434},
koData =
ClassData{prior = -2.2034941903498146, unseen = -5.568344503761097,
ClassData{prior = -2.217972209530468, unseen = -5.579729825986222,
likelihoods =
HashMap.fromList
[("absorption of , after named daythe <day-of-month> (non ordinal)",
-4.871373226762748),
("<datetime> - <datetime> (interval)year", -4.871373226762748),
("dayhour", -4.178226046202803),
("daymonth", -3.166625134524323),
("monthyear", -4.871373226762748),
("yearhour", -4.871373226762748),
("after lunchat <time-of-day>", -4.465908118654584),
("mm/dduntil <time-of-day>", -4.465908118654584),
("until <time-of-day>year", -4.465908118654584),
("<day-of-month> (ordinal)Dienstag", -4.465908118654584),
("absorption of , after named dayFebruar", -3.7727609380946383),
("on <date>Februar", -4.871373226762748),
("intersect by 'of', 'from', 'syear", -4.178226046202803),
("intersect<time> <part-of-day>", -4.871373226762748),
("<time-of-day> o'clockafter lunch", -4.871373226762748),
("<day-of-month> (ordinal)Mittwoch", -4.871373226762748),
("after lunch<time-of-day> o'clock", -4.871373226762748),
("absorption of , after named dayhh:mm", -4.871373226762748),
-4.882801922586371),
("<datetime> - <datetime> (interval)year", -4.882801922586371),
("dayhour", -4.189654742026425),
("daymonth", -3.1780538303479458),
("monthyear", -4.882801922586371),
("yearhour", -4.882801922586371),
("after lunchat <time-of-day>", -4.477336814478207),
("mm/dduntil <time-of-day>", -4.477336814478207),
("until <time-of-day>year", -4.477336814478207),
("<day-of-month> (ordinal)Dienstag", -4.477336814478207),
("absorption of , after named dayFebruar", -3.784189633918261),
("on <date>Februar", -4.882801922586371),
("intersect by 'of', 'from', 'syear", -4.189654742026425),
("intersect<time> <part-of-day>", -4.882801922586371),
("<time-of-day> o'clockafter lunch", -4.882801922586371),
("<day-of-month> (ordinal)Mittwoch", -4.882801922586371),
("after lunch<time-of-day> o'clock", -4.882801922586371),
("absorption of , after named dayhh:mm", -4.882801922586371),
("<time-of-day> o'clock<time> <part-of-day>",
-3.955082494888593),
("monthhour", -4.871373226762748),
("todayat <time-of-day>", -4.871373226762748),
("dayday", -3.4850788656428575),
("hourhour", -3.367295829986474),
("Donnerstagafter <time-of-day>", -4.871373226762748),
("dayyear", -2.791931685082912),
("Aprilyear", -4.871373226762748),
("Dienstagafter <time-of-day>", -4.871373226762748),
("Donnerstaghh:mm", -4.465908118654584),
("<day-of-month> (ordinal)Juli", -4.465908118654584),
("dayminute", -3.4850788656428575),
("until <time-of-day>Juli", -4.871373226762748),
("year<time> <part-of-day>", -4.871373226762748),
("mm/ddyear", -3.367295829986474),
-3.966511190712216),
("monthhour", -4.882801922586371),
("todayat <time-of-day>", -4.882801922586371),
("dayday", -3.4965075614664802),
("hourhour", -3.378724525810097),
("Donnerstagafter <time-of-day>", -4.882801922586371),
("dayyear", -2.803360380906535),
("Aprilyear", -4.882801922586371),
("Dienstagafter <time-of-day>", -4.882801922586371),
("Donnerstaghh:mm", -4.477336814478207),
("<day-of-month> (ordinal)Juli", -4.477336814478207),
("dayminute", -3.4965075614664802),
("until <time-of-day>Juli", -4.882801922586371),
("year<time> <part-of-day>", -4.882801922586371),
("mm/ddyear", -3.378724525810097),
("<day-of-month> (ordinal)intersect by 'of', 'from', 's",
-4.465908118654584),
("Donnerstag<part-of-day> of <time>", -4.871373226762748),
("<day-of-month> (ordinal)April", -4.871373226762748),
("after <time-of-day>year", -4.871373226762748),
("on <date>after <time-of-day>", -4.871373226762748),
("tomorrownoon", -4.871373226762748)],
-4.477336814478207),
("Donnerstag<part-of-day> of <time>", -4.882801922586371),
("<day-of-month> (ordinal)April", -4.882801922586371),
("after <time-of-day>year", -4.882801922586371),
("on <date>after <time-of-day>", -4.882801922586371),
("tomorrownoon", -4.882801922586371)],
n = 53}}),
("<ordinal> <cycle> of <time>",
Classifier{okData =
@ -646,13 +650,10 @@ classifiers
likelihoods =
HashMap.fromList
[("daymonth", -1.7047480922384253),
("ordinals (first..19th)week (grain)Oktober",
-1.7047480922384253),
("ordinals (first..19th)day (grain)Oktober",
-1.7047480922384253),
("ordinals (first..19th)week (grain)intersect",
-1.7047480922384253),
("weekmonth", -1.2992829841302609)],
("ordinal (1..31)week (grain)intersect", -1.7047480922384253),
("weekmonth", -1.2992829841302609),
("ordinal (1..31)day (grain)Oktober", -1.7047480922384253),
("ordinal (1..31)week (grain)Oktober", -1.7047480922384253)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -1.791759469228055,
@ -844,20 +845,19 @@ classifiers
ClassData{prior = -0.5596157879354228, unseen = -2.70805020110221,
likelihoods =
HashMap.fromList
[("daymonth", -1.0296194171811581),
("ordinals (first..19th)Dienstagintersect",
-1.9459101490553135),
("ordinals (first..19th)Mittwochintersect", -1.540445040947149),
("ordinals (first..19th)DienstagOktober", -1.9459101490553135)],
[("ordinal (1..31)DienstagOktober", -1.9459101490553135),
("daymonth", -1.0296194171811581),
("ordinal (1..31)Dienstagintersect", -1.9459101490553135),
("ordinal (1..31)Mittwochintersect", -1.540445040947149)],
n = 4},
koData =
ClassData{prior = -0.8472978603872037,
unseen = -2.5649493574615367,
likelihoods =
HashMap.fromList
[("daymonth", -1.0986122886681098),
("ordinals (first..19th)DienstagSeptember", -1.791759469228055),
("ordinals (first..19th)MittwochOktober", -1.3862943611198906)],
[("ordinal (1..31)MittwochOktober", -1.3862943611198906),
("daymonth", -1.0986122886681098),
("ordinal (1..31)DienstagSeptember", -1.791759469228055)],
n = 3}}),
("the <day-of-month> (non ordinal)",
Classifier{okData =
@ -954,25 +954,33 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month>(ordinal) <named-month>",
Classifier{okData =
ClassData{prior = -0.2231435513142097,
unseen = -3.4657359027997265,
ClassData{prior = -0.16251892949777494,
unseen = -3.7612001156935624,
likelihoods =
HashMap.fromList
[("ordinal (digits)Juli", -2.0476928433652555),
("ordinal (digits)Mai", -2.740840023925201),
("ordinal (digits)September", -2.3353749158170367),
("month", -0.8690378470236094),
("ordinal (digits)Februar", -1.6422277352570913),
("ordinals (first..19th)Marz", -2.740840023925201)],
n = 12},
[("ordinal (digits)Juli", -2.3513752571634776),
("ordinal (1..31)Dezember", -1.9459101490553135),
("ordinal (1..31)Marz", -3.044522437723423),
("ordinal (digits)Mai", -3.044522437723423),
("ordinal (digits)September", -2.639057329615259),
("month", -0.8472978603872037),
("ordinal (digits)Februar", -1.9459101490553135)],
n = 17},
koData =
ClassData{prior = -1.6094379124341003, unseen = -2.639057329615259,
ClassData{prior = -1.8971199848858813, unseen = -2.70805020110221,
likelihoods =
HashMap.fromList
[("ordinal (digits)Juli", -1.466337068793427),
("ordinal (digits)April", -1.8718021769015913),
("month", -1.1786549963416462)],
[("ordinal (digits)Juli", -1.540445040947149),
("ordinal (digits)April", -1.9459101490553135),
("month", -1.252762968495368)],
n = 3}}),
("ordinal (1..31)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.772588722239781,
likelihoods = HashMap.fromList [("", 0.0)], n = 14},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("numbers prefix with -, negative or minus",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
@ -1039,10 +1047,14 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("the <day-of-month> (ordinal)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("ordinal (digits)", 0.0)], n = 1},
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods =
HashMap.fromList
[("ordinal (1..31)", -0.6931471805599453),
("ordinal (digits)", -0.6931471805599453)],
n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("afternoon",
Classifier{okData =
@ -1183,13 +1195,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("ordinals (first..19th)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("<duration> after <time>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -1445,18 +1450,17 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<day-of-month> (ordinal)",
Classifier{okData =
ClassData{prior = -0.5520685823000397,
unseen = -3.0910424533583156,
ClassData{prior = -0.4595323293784402, unseen = -3.295836866004329,
likelihoods =
HashMap.fromList
[("ordinals (first..19th)", -1.6582280766035324),
("ordinal (digits)", -0.2113090936672069)],
n = 19},
[("ordinal (1..31)", -1.0608719606852628),
("ordinal (digits)", -0.42488319396526597)],
n = 24},
koData =
ClassData{prior = -0.8574502318512216, unseen = -2.833213344056216,
ClassData{prior = -0.9985288301111273, unseen = -2.833213344056216,
likelihoods =
HashMap.fromList
[("ordinals (first..19th)", -0.8266785731844679),
[("ordinal (1..31)", -0.8266785731844679),
("ordinal (digits)", -0.5753641449035618)],
n = 14}}),
("noon",
@ -1737,16 +1741,15 @@ classifiers
ClassData{prior = -0.6931471805599453, unseen = -1.791759469228055,
likelihoods =
HashMap.fromList
[("ordinals (first..19th)Dienstagintersect", -0.916290731874155),
("dayday", -0.916290731874155)],
[("dayday", -0.916290731874155),
("ordinal (1..31)Dienstagintersect", -0.916290731874155)],
n = 1},
koData =
ClassData{prior = -0.6931471805599453, unseen = -1.791759469228055,
likelihoods =
HashMap.fromList
[("ordinals (first..19th)DienstagWeihnachten",
-0.916290731874155),
("dayday", -0.916290731874155)],
[("dayday", -0.916290731874155),
("ordinal (1..31)DienstagWeihnachten", -0.916290731874155)],
n = 1}}),
("<named-month> <day-of-month> (non ordinal)",
Classifier{okData =
@ -1903,6 +1906,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Dezember",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("<month> dd-dd (interval)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.3978952727983707,

View File

@ -679,4 +679,15 @@ allExamples = concat
, examples (datetime (2013, 2, 9, 0, 0, 0) Day)
[ "vorvorgestern"
]
, examples (datetime (2013, 12, 5, 0, 0, 0) Day)
[ "fünfter Dezember"
]
, examples (datetime (2013, 12, 30, 0, 0, 0) Day)
[ "dreißigster Dezember"
, "dreissigster Dezember"
]
, examples (datetime (2013, 12, 4, 0, 0, 0) Day)
[ "am vierten Dezember"
, "der vierte Dezember"
]
]