duckling/Duckling/Ranking/Classifiers/HU_XX.hs
Julien Odent ab0ad0256e Locales support
Summary:
* Locales support for the library, following `<Lang>_<Region>` with ISO 639-1 code for `<Lang>` and ISO 3166-1 alpha-2 code for `<Region>` (#33)
* `Locale` opaque type (composite of `Lang` and `Region`) with `makeLocale` smart constructor to only allow valid `(Lang, Region)` combinations
* API: `Context`'s `lang` parameter has been replaced by `locale`, with optional `Region` and backward compatibility.
*  `Rules/<Lang>.hs` exposes
  - `langRules`: cross-locale rules for `<Lang>`, from `<Dimension>/<Lang>/Rules.hs`
  - `localeRules`: locale-specific rules, from `<Dimension>/<Lang>/<Region>/Rules.hs`
  - `defaultRules`: `langRules` + specific rules from select locales to ensure backward-compatibility
* Corpus, tests & classifiers
  - 1 classifier per locale, with default classifier (`<Lang>_XX`) when no locale provided (backward-compatible)
  - Default classifiers are built on existing corpus
  - Locale classifiers are built on
  - `<Dimension>/<Lang>/Corpus.hs` exposes a common `corpus` to all locales of `<Lang>`
  - `<Dimension>/<Lang>/<Region>/Corpus.hs` exposes `allExamples`: a list of examples specific to the locale (following `<Dimension>/<Lang>/<Region>/Rules.hs`).
  - Locale classifiers use the language corpus extended with the locale examples as training set.
  - Locale examples need to use the same `Context` (i.e. reference time) as the language corpus.
  - For backward compatibility, `<Dimension>/<Lang>/Corpus.hs` can expose also `defaultCorpus`, which is `corpus` augmented with specific examples. This is controlled by `getDefaultCorpusForLang` in `Duckling.Ranking.Generate`.
  - Tests run against each classifier to make sure runtime works as expected.
* MM/DD (en_US) vs DD/MM (en_GB) example to illustrate

Reviewed By: JonCoens, blandinw

Differential Revision: D6038096

fbshipit-source-id: f29c28d
2017-10-13 08:34:21 -07:00

318 lines
17 KiB
Haskell

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.HU_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers
= HashMap.fromList
[("Thursday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("integer (numeric)",
Classifier{okData =
ClassData{prior = -1.2205021062771466, unseen = -2.995732273553991,
likelihoods = HashMap.fromList [("", 0.0)], n = 18},
koData =
ClassData{prior = -0.3496737484797488,
unseen = -3.8066624897703196,
likelihoods = HashMap.fromList [("", 0.0)], n = 43}}),
("pm <time-of-day>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -3.1354942159291497,
likelihoods =
HashMap.fromList
[("time-of-day (latent)", -0.6931471805599453),
("hour", -0.6931471805599453)],
n = 10},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("<time> <part-of-day>",
Classifier{okData =
ClassData{prior = -0.5108256237659907, unseen = -2.995732273553991,
likelihoods =
HashMap.fromList
[("dayhour", -0.9985288301111273),
("todaypart of days", -1.3350010667323402),
("tomorrowpart of days", -2.2512917986064953),
("yesterdaypart of days", -2.2512917986064953)],
n = 6},
koData =
ClassData{prior = -0.916290731874155, unseen = -2.772588722239781,
likelihoods =
HashMap.fromList
[("dayhour", -1.0986122886681098),
("next <day-of-week>part of days", -2.0149030205422647),
("yyyy.mm.ddpart of days", -2.0149030205422647),
("Wednesdaypart of days", -1.6094379124341003)],
n = 4}}),
("today",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("month (grain)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("January",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Wednesday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("intersect",
Classifier{okData =
ClassData{prior = -0.40546510810816444,
unseen = -3.713572066704308,
likelihoods =
HashMap.fromList
[("dayhour", -1.0498221244986778),
("yyyy.mm.ddhh:mm", -2.995732273553991),
("Wednesdaypm <time-of-day>", -2.3025850929940455),
("todaypart of days", -2.0794415416798357),
("yyyy.mm.ddpm <time-of-day>", -2.995732273553991),
("dayminute", -2.995732273553991),
("tomorrowpart of days", -2.995732273553991),
("next <day-of-week>pm <time-of-day>", -2.3025850929940455),
("yesterdaypart of days", -2.995732273553991)],
n = 14},
koData =
ClassData{prior = -1.0986122886681098, unseen = -3.295836866004329,
likelihoods =
HashMap.fromList
[("dayhour", -1.1786549963416462),
("next <day-of-week>part of days", -2.5649493574615367),
("Wednesdaypm <time-of-day>", -1.8718021769015913),
("yyyy.mm.ddpart of days", -2.5649493574615367),
("Wednesdaypart of days", -2.159484249353372)],
n = 7}}),
("year (grain)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("day before yesterday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Monday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("yesterday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("hh:mm:ss",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("am <time-of-day>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods =
HashMap.fromList
[("time-of-day (latent)", -0.6931471805599453),
("hour", -0.6931471805599453)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("end of month",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("tomorrow",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("this|last|next <cycle>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.5649493574615367,
likelihoods =
HashMap.fromList
[("month (grain)", -1.3862943611198906),
("year (grain)", -1.3862943611198906),
("year", -1.3862943611198906), ("month", -1.3862943611198906)],
n = 4},
koData =
ClassData{prior = -infinity, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [], n = 0}}),
("next <day-of-week>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004,
likelihoods =
HashMap.fromList
[("Thursday", -1.7047480922384253),
("Wednesday", -1.0116009116784799),
("day", -0.7884573603642702)],
n = 4},
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("Sunday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
likelihoods = HashMap.fromList [("", 0.0)], n = 3},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("time-of-day (latent)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.70805020110221,
likelihoods = HashMap.fromList [("integer (numeric)", 0.0)],
n = 13},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("hh:mm",
Classifier{okData =
ClassData{prior = -0.6931471805599453, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4},
koData =
ClassData{prior = -0.6931471805599453, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4}}),
("March",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004,
likelihoods = HashMap.fromList [("", 0.0)], n = 10},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("yyyy.mm.dd",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number (0..10)",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2}}),
("Tuesday",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("<named-month> <day-of-month> (non ordinal)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.5649493574615367,
likelihoods =
HashMap.fromList
[("Marchinteger (numeric)", -0.6931471805599453),
("month", -0.6931471805599453)],
n = 5},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("ordinal (digits)",
Classifier{okData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0},
koData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods = HashMap.fromList [("", 0.0)], n = 7}}),
("seasons",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("mm.dd",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("right now",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("end of year",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("day after tomorrow",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("part of days",
Classifier{okData =
ClassData{prior = -0.40546510810816444,
unseen = -2.4849066497880004,
likelihoods = HashMap.fromList [("", 0.0)], n = 10},
koData =
ClassData{prior = -1.0986122886681098,
unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5}})]