From dd70d80dc1c9b47a8b68ce36ba12c0da4e376d3e Mon Sep 17 00:00:00 2001 From: Ovidiu Nistor Date: Tue, 21 Sep 2021 12:21:05 -0700 Subject: [PATCH] Add Japanese time dimension (#646) Summary: Add the most common rules for Japanese time dimension. Pull Request resolved: https://github.com/facebook/duckling/pull/646 Reviewed By: stroxler Differential Revision: D30675005 Pulled By: chessai fbshipit-source-id: 917aa98b5cfe0c73d207b1f51b80d8e17a1c7e6a --- Duckling/Dimensions/JA.hs | 1 + Duckling/Duration/JA/Corpus.hs | 11 + Duckling/Duration/JA/Rules.hs | 43 ++ Duckling/Ranking/Classifiers/JA_XX.hs | 968 +++++++++++++++++++++++++- Duckling/Rules/JA.hs | 6 +- Duckling/Time/Helpers.hs | 10 +- Duckling/Time/JA/Corpus.hs | 604 ++++++++++++++++ Duckling/Time/JA/Rules.hs | 639 +++++++++++++++++ Duckling/TimeGrain/JA/Rules.hs | 15 +- duckling.cabal | 4 + exe/Duckling/Ranking/Generate.hs | 3 +- tests/Duckling/Duration/JA/Tests.hs | 1 + tests/Duckling/Time/JA/Tests.hs | 23 + tests/Duckling/Time/Tests.hs | 2 + 14 files changed, 2318 insertions(+), 12 deletions(-) create mode 100644 Duckling/Duration/JA/Rules.hs create mode 100644 Duckling/Time/JA/Corpus.hs create mode 100644 Duckling/Time/JA/Rules.hs create mode 100644 tests/Duckling/Time/JA/Tests.hs diff --git a/Duckling/Dimensions/JA.hs b/Duckling/Dimensions/JA.hs index e9e03885..460ddae1 100644 --- a/Duckling/Dimensions/JA.hs +++ b/Duckling/Dimensions/JA.hs @@ -17,4 +17,5 @@ allDimensions = , Seal Numeral , Seal Ordinal , Seal Temperature + , Seal Time ] diff --git a/Duckling/Duration/JA/Corpus.hs b/Duckling/Duration/JA/Corpus.hs index 76e2d140..32fa0d37 100644 --- a/Duckling/Duration/JA/Corpus.hs +++ b/Duckling/Duration/JA/Corpus.hs @@ -9,6 +9,7 @@ module Duckling.Duration.JA.Corpus ( corpus + , negativeCorpus ) where import Prelude @@ -23,6 +24,13 @@ import Duckling.TimeGrain.Types (Grain(..)) corpus :: Corpus corpus = (testContext {locale = makeLocale JA Nothing}, testOptions, allExamples) +negativeCorpus :: NegativeCorpus +negativeCorpus = (testContext {locale = makeLocale JA Nothing}, testOptions, examples) + where + examples = + [ "月面" + ] + allExamples :: [Example] allExamples = concat [ examples (DurationData 1 Second) @@ -36,4 +44,7 @@ allExamples = concat [ "百 日" , "百 日間" ] + , examples (DurationData 2 Month) + [ "2ヶ月" + ] ] diff --git a/Duckling/Duration/JA/Rules.hs b/Duckling/Duration/JA/Rules.hs new file mode 100644 index 00000000..1931639f --- /dev/null +++ b/Duckling/Duration/JA/Rules.hs @@ -0,0 +1,43 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Duration.JA.Rules + ( rules + ) where + +import Data.String +import Prelude + +import Duckling.Dimensions.Types +import Duckling.Duration.Helpers +import Duckling.Numeral.Helpers (numberWith) +import Duckling.Numeral.Types (NumeralData(..)) +import Duckling.Types +import qualified Duckling.Numeral.Types as TNumeral +import qualified Duckling.TimeGrain.Types as TG + +ruleDurationMonthWithCounter :: Rule +ruleDurationMonthWithCounter = Rule + { name = " counter months" + , pattern = + [ numberWith TNumeral.value (>= 1) + , regex "(ケ|ヶ|カ|ヵ|か|箇)(月|げつ|つき)" + ] + , prod = \case + (Token Numeral TNumeral.NumeralData{TNumeral.value = v}: + _) -> Just $ Token Duration $ duration TG.Month $ floor v + _ -> Nothing + } + +rules :: [Rule] +rules = + [ ruleDurationMonthWithCounter + ] diff --git a/Duckling/Ranking/Classifiers/JA_XX.hs b/Duckling/Ranking/Classifiers/JA_XX.hs index 7cfb1e9e..a6992fae 100644 --- a/Duckling/Ranking/Classifiers/JA_XX.hs +++ b/Duckling/Ranking/Classifiers/JA_XX.hs @@ -19,4 +19,970 @@ import qualified Data.HashMap.Strict as HashMap import Duckling.Ranking.Types classifiers :: Classifiers -classifiers = HashMap.fromList [] \ No newline at end of file +classifiers + = HashMap.fromList + [("\21320\24460|\21320\21069 ", + Classifier{okData = + ClassData{prior = -0.35667494393873245, + unseen = -3.5553480614894135, + likelihoods = + HashMap.fromList + [(" o'clock", -2.1400661634962708), + ("time-of-day (latent)", -2.1400661634962708), + ("hh:mm", -2.4277482359480516), ("hour", -1.580450375560848), + ("hh\26178mm\20998", -1.580450375560848), + ("minute", -1.329135947279942)], + n = 14}, + koData = + ClassData{prior = -1.2039728043259361, + unseen = -2.9444389791664407, + likelihoods = + HashMap.fromList + [("time-of-day (latent)", -0.9444616088408514), + ("hour", -0.9444616088408514)], + n = 6}}), + ("Thursday", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("integer (numeric)", + Classifier{okData = + ClassData{prior = -0.9572987556015321, unseen = -4.875197323201151, + likelihoods = HashMap.fromList [("", 0.0)], n = 129}, + koData = + ClassData{prior = -0.4843923666978351, unseen = -5.342334251964811, + likelihoods = HashMap.fromList [("", 0.0)], n = 207}}), + ("integer (20..90)", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("integer (0..10)", 0.0)], n = 2}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("yyyy/mm", + Classifier{okData = + ClassData{prior = -1.9459101490553135, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -0.15415067982725836, + unseen = -3.258096538021482, + likelihoods = HashMap.fromList [("", 0.0)], n = 24}}), + ("today", + Classifier{okData = + ClassData{prior = 0.0, unseen = -2.3978952727983707, + likelihoods = HashMap.fromList [("", 0.0)], n = 9}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("mm/dd", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.6094379124341003, + likelihoods = HashMap.fromList [("", 0.0)], n = 3}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("at ", + Classifier{okData = + ClassData{prior = -0.5108256237659907, unseen = -2.833213344056216, + likelihoods = + HashMap.fromList + [("hh\26178mm\20998", -0.8266785731844679), + ("minute", -0.8266785731844679)], + n = 6}, + koData = + ClassData{prior = -0.916290731874155, unseen = -2.5649493574615367, + likelihoods = + HashMap.fromList + [("time-of-day (latent)", -0.8754687373538999), + ("hour", -0.8754687373538999)], + n = 4}}), + ("Imperial year (latent)", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.9459101490553135, + likelihoods = HashMap.fromList [("integer (numeric)", 0.0)], + n = 5}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("December", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("September", + Classifier{okData = + ClassData{prior = -0.40546510810816444, + unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -1.0986122886681098, + unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ("last|past|next ", + Classifier{okData = + ClassData{prior = 0.0, unseen = -2.3025850929940455, + likelihoods = + HashMap.fromList + [("day", -1.0986122886681098), + (" ", -0.8109302162163288), + ("hour", -1.5040773967762742)], + n = 3}, + koData = + ClassData{prior = -infinity, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [], n = 0}}), + ("October", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("month (grain)", + Classifier{okData = + ClassData{prior = -2.6790626642289577, + unseen = -2.1972245773362196, + likelihoods = HashMap.fromList [("", 0.0)], n = 7}, + koData = + ClassData{prior = -7.109592168373022e-2, + unseen = -4.574710978503383, + likelihoods = HashMap.fromList [("", 0.0)], n = 95}}), + (" o'clock", + Classifier{okData = + ClassData{prior = 0.0, unseen = -3.0910424533583156, + likelihoods = + HashMap.fromList + [("\21320\24460|\21320\21069 ", + -1.6582280766035324), + ("time-of-day (latent)", -1.0986122886681098), + ("hour", -0.7419373447293773)], + n = 9}, + koData = + ClassData{prior = -infinity, unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [], n = 0}}), + ("January", + Classifier{okData = + ClassData{prior = -0.40546510810816444, + unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -1.0986122886681098, + unseen = -1.3862943611198906, + likelihoods = HashMap.fromList [("", 0.0)], n = 2}}), + ("this quarter", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.6094379124341003, + likelihoods = HashMap.fromList [("", 0.0)], n = 3}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("Wednesday", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("November", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.791759469228055, + likelihoods = HashMap.fromList [("", 0.0)], n = 4}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("July", + Classifier{okData = + ClassData{prior = 0.0, unseen = -3.1780538303479458, + likelihoods = HashMap.fromList [("", 0.0)], n = 22}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("hour (grain)", + Classifier{okData = + ClassData{prior = -0.8209805520698302, + unseen = -2.5649493574615367, + likelihoods = HashMap.fromList [("", 0.0)], n = 11}, + koData = + ClassData{prior = -0.579818495252942, unseen = -2.772588722239781, + likelihoods = HashMap.fromList [("", 0.0)], n = 14}}), + (" - ", + Classifier{okData = + ClassData{prior = 0.0, unseen = -1.791759469228055, + likelihoods = + HashMap.fromList [("integer (numeric)integer (numeric)", 0.0)], + n = 4}, + koData = + ClassData{prior = -infinity, unseen = -0.6931471805599453, + likelihoods = HashMap.fromList [], n = 0}}), + ("from