From 6f774abe38eb8c5469094fc6461ac93739d9d764 Mon Sep 17 00:00:00 2001 From: Stepan Parunashvili Date: Tue, 5 Sep 2017 11:58:00 -0700 Subject: [PATCH] georgian numeral support Summary: Introducing Georgian (KA), and the very beginnings of numeral support Reviewed By: patapizza Differential Revision: D5757952 fbshipit-source-id: 89d05f8 --- Duckling/Dimensions.hs | 2 + Duckling/Dimensions/KA.hs | 17 +++++++ Duckling/Lang.hs | 1 + Duckling/Numeral/KA/Corpus.hs | 43 +++++++++++++++++ Duckling/Numeral/KA/Rules.hs | 75 ++++++++++++++++++++++++++++++ Duckling/Ranking/Classifiers.hs | 2 + Duckling/Ranking/Classifiers/KA.hs | 22 +++++++++ Duckling/Rules.hs | 2 + Duckling/Rules/KA.hs | 34 ++++++++++++++ duckling.cabal | 6 +++ exe/Duckling/Ranking/Generate.hs | 1 + tests/Duckling/Numeral/KA/Tests.hs | 22 +++++++++ tests/Duckling/Numeral/Tests.hs | 2 + 13 files changed, 229 insertions(+) create mode 100644 Duckling/Dimensions/KA.hs create mode 100644 Duckling/Numeral/KA/Corpus.hs create mode 100644 Duckling/Numeral/KA/Rules.hs create mode 100644 Duckling/Ranking/Classifiers/KA.hs create mode 100644 Duckling/Rules/KA.hs create mode 100644 tests/Duckling/Numeral/KA/Tests.hs diff --git a/Duckling/Dimensions.hs b/Duckling/Dimensions.hs index cef8d7da..7692e2f2 100644 --- a/Duckling/Dimensions.hs +++ b/Duckling/Dimensions.hs @@ -36,6 +36,7 @@ import qualified Duckling.Dimensions.HU as HUDimensions import qualified Duckling.Dimensions.ID as IDDimensions import qualified Duckling.Dimensions.IT as ITDimensions import qualified Duckling.Dimensions.JA as JADimensions +import qualified Duckling.Dimensions.KA as KADimensions import qualified Duckling.Dimensions.KO as KODimensions import qualified Duckling.Dimensions.MY as MYDimensions import qualified Duckling.Dimensions.NB as NBDimensions @@ -95,6 +96,7 @@ langDimensions HU = HUDimensions.allDimensions langDimensions ID = IDDimensions.allDimensions langDimensions IT = ITDimensions.allDimensions langDimensions JA = JADimensions.allDimensions +langDimensions KA = KADimensions.allDimensions langDimensions KO = KODimensions.allDimensions langDimensions MY = MYDimensions.allDimensions langDimensions NB = NBDimensions.allDimensions diff --git a/Duckling/Dimensions/KA.hs b/Duckling/Dimensions/KA.hs new file mode 100644 index 00000000..d27ca232 --- /dev/null +++ b/Duckling/Dimensions/KA.hs @@ -0,0 +1,17 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +module Duckling.Dimensions.KA + ( allDimensions + ) where + +import Duckling.Dimensions.Types + +allDimensions :: [Some Dimension] +allDimensions = + [ This Numeral ] diff --git a/Duckling/Lang.hs b/Duckling/Lang.hs index 4d1e8955..7f48cd28 100644 --- a/Duckling/Lang.hs +++ b/Duckling/Lang.hs @@ -38,6 +38,7 @@ data Lang | ID | IT | JA + | KA | KO | MY | NB diff --git a/Duckling/Numeral/KA/Corpus.hs b/Duckling/Numeral/KA/Corpus.hs new file mode 100644 index 00000000..9c98ede8 --- /dev/null +++ b/Duckling/Numeral/KA/Corpus.hs @@ -0,0 +1,43 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Numeral.KA.Corpus + ( corpus ) where + +import Data.String +import Prelude + +import Duckling.Lang +import Duckling.Numeral.Types +import Duckling.Resolve +import Duckling.Testing.Types + +corpus :: Corpus +corpus = (testContext {lang = KA}, allExamples) + +allExamples :: [Example] +allExamples = concat + [ examples (NumeralValue 0) + [ "0" + , "ნული" + ] + , examples (NumeralValue 1) + [ "1" + , "ერთი" + ] + , examples (NumeralValue 2) + [ "2" + , "ორი" + ] + , examples (NumeralValue 3) + [ "3" + , "სამი" + ] + ] diff --git a/Duckling/Numeral/KA/Rules.hs b/Duckling/Numeral/KA/Rules.hs new file mode 100644 index 00000000..d4552fd9 --- /dev/null +++ b/Duckling/Numeral/KA/Rules.hs @@ -0,0 +1,75 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Numeral.KA.Rules + ( rules ) where + +import Data.HashMap.Strict (HashMap) +import Data.Maybe +import Data.String +import Data.Text (Text) +import Prelude +import qualified Data.HashMap.Strict as HashMap +import qualified Data.Text as Text + +import Duckling.Dimensions.Types +import Duckling.Numeral.Helpers +import Duckling.Numeral.Types (NumeralData (..)) +import Duckling.Regex.Types +import Duckling.Types +import qualified Duckling.Numeral.Types as TNumeral + +ruleIntegerNumeric :: Rule +ruleIntegerNumeric = Rule + { name = "integer (numeric)" + , pattern = + [ regex "(\\d{1,18})" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)): + _) -> do + v <- parseInt match + integer $ toInteger v + _ -> Nothing + } + +ruleNumeralMap :: HashMap Text Integer +ruleNumeralMap = HashMap.fromList + [ ( "ნული", 0 ) + , ( "ერთი", 1 ) + , ( "ორი", 2 ) + , ( "სამი", 3 ) + , ( "ოთხი", 4 ) + , ( "ხუთი", 5) + , ( "ექვსი", 6) + , ( "შვიდი", 7) + , ( "რვა", 8) + , ( "ცხრა", 9) + , ( "ათი", 10) + ] + +ruleNumeral :: Rule +ruleNumeral = Rule + { name = "number (0..10)" + , pattern = + [ regex "(ნული|ერთი|ორი|სამი|ოთხი|ხუთი|ექვსი|შვიდი|რვა|ცხრა|ათი)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) ruleNumeralMap >>= integer + _ -> Nothing + } + +rules :: [Rule] +rules = + [ ruleIntegerNumeric + , ruleNumeral + ] diff --git a/Duckling/Ranking/Classifiers.hs b/Duckling/Ranking/Classifiers.hs index 9b648e7b..3f3f5905 100644 --- a/Duckling/Ranking/Classifiers.hs +++ b/Duckling/Ranking/Classifiers.hs @@ -27,6 +27,7 @@ import qualified Duckling.Ranking.Classifiers.HU as HUClassifiers import qualified Duckling.Ranking.Classifiers.ID as IDClassifiers import qualified Duckling.Ranking.Classifiers.IT as ITClassifiers import qualified Duckling.Ranking.Classifiers.JA as JAClassifiers +import qualified Duckling.Ranking.Classifiers.KA as KAClassifiers import qualified Duckling.Ranking.Classifiers.KO as KOClassifiers import qualified Duckling.Ranking.Classifiers.MY as MYClassifiers import qualified Duckling.Ranking.Classifiers.NB as NBClassifiers @@ -59,6 +60,7 @@ classifiers HU = HUClassifiers.classifiers classifiers ID = IDClassifiers.classifiers classifiers IT = ITClassifiers.classifiers classifiers JA = JAClassifiers.classifiers +classifiers KA = KAClassifiers.classifiers classifiers KO = KOClassifiers.classifiers classifiers MY = MYClassifiers.classifiers classifiers NB = NBClassifiers.classifiers diff --git a/Duckling/Ranking/Classifiers/KA.hs b/Duckling/Ranking/Classifiers/KA.hs new file mode 100644 index 00000000..11813bdd --- /dev/null +++ b/Duckling/Ranking/Classifiers/KA.hs @@ -0,0 +1,22 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + +----------------------------------------------------------------- +-- Auto-generated by regenClassifiers +-- +-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +-- @generated +----------------------------------------------------------------- +{-# LANGUAGE OverloadedStrings #-} +module Duckling.Ranking.Classifiers.KA (classifiers) where +import Prelude +import Duckling.Ranking.Types +import qualified Data.HashMap.Strict as HashMap +import Data.String + +classifiers :: Classifiers +classifiers = HashMap.fromList [] diff --git a/Duckling/Rules.hs b/Duckling/Rules.hs index d9239b40..6ab9d6ab 100644 --- a/Duckling/Rules.hs +++ b/Duckling/Rules.hs @@ -38,6 +38,7 @@ import qualified Duckling.Rules.HU as HURules import qualified Duckling.Rules.ID as IDRules import qualified Duckling.Rules.IT as ITRules import qualified Duckling.Rules.JA as JARules +import qualified Duckling.Rules.KA as KARules import qualified Duckling.Rules.KO as KORules import qualified Duckling.Rules.MY as MYRules import qualified Duckling.Rules.NB as NBRules @@ -86,6 +87,7 @@ langRules HU = HURules.rules langRules ID = IDRules.rules langRules IT = ITRules.rules langRules JA = JARules.rules +langRules KA = KARules.rules langRules KO = KORules.rules langRules MY = MYRules.rules langRules NB = NBRules.rules diff --git a/Duckling/Rules/KA.hs b/Duckling/Rules/KA.hs new file mode 100644 index 00000000..217f0762 --- /dev/null +++ b/Duckling/Rules/KA.hs @@ -0,0 +1,34 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Rules.KA + ( rules + ) where + +import Duckling.Dimensions.Types +import Duckling.Types +import qualified Duckling.Numeral.KA.Rules as Numeral + +rules :: Some Dimension -> [Rule] +rules (This Distance) = [] +rules (This Duration) = [] +rules (This Numeral) = Numeral.rules +rules (This Email) = [] +rules (This AmountOfMoney) = [] +rules (This Ordinal) = [] +rules (This PhoneNumber) = [] +rules (This Quantity) = [] +rules (This RegexMatch) = [] +rules (This Temperature) = [] +rules (This Time) = [] +rules (This TimeGrain) = [] +rules (This Url) = [] +rules (This Volume) = [] diff --git a/duckling.cabal b/duckling.cabal index e826bfb6..0179c16a 100644 --- a/duckling.cabal +++ b/duckling.cabal @@ -56,6 +56,7 @@ library , Duckling.Rules.ID , Duckling.Rules.IT , Duckling.Rules.JA + , Duckling.Rules.KA , Duckling.Rules.KO , Duckling.Rules.MY , Duckling.Rules.NB @@ -92,6 +93,7 @@ library , Duckling.Ranking.Classifiers.ID , Duckling.Ranking.Classifiers.IT , Duckling.Ranking.Classifiers.JA + , Duckling.Ranking.Classifiers.KA , Duckling.Ranking.Classifiers.KO , Duckling.Ranking.Classifiers.MY , Duckling.Ranking.Classifiers.NB @@ -127,6 +129,7 @@ library , Duckling.Dimensions.ID , Duckling.Dimensions.IT , Duckling.Dimensions.JA + , Duckling.Dimensions.KA , Duckling.Dimensions.KO , Duckling.Dimensions.MY , Duckling.Dimensions.NB @@ -276,6 +279,8 @@ library , Duckling.Numeral.IT.Rules , Duckling.Numeral.JA.Corpus , Duckling.Numeral.JA.Rules + , Duckling.Numeral.KA.Corpus + , Duckling.Numeral.KA.Rules , Duckling.Numeral.KO.Corpus , Duckling.Numeral.KO.Rules , Duckling.Numeral.MY.Corpus @@ -624,6 +629,7 @@ test-suite duckling-test , Duckling.Numeral.ID.Tests , Duckling.Numeral.IT.Tests , Duckling.Numeral.JA.Tests + , Duckling.Numeral.KA.Tests , Duckling.Numeral.KO.Tests , Duckling.Numeral.MY.Tests , Duckling.Numeral.NB.Tests diff --git a/exe/Duckling/Ranking/Generate.hs b/exe/Duckling/Ranking/Generate.hs index dc4df705..6102c447 100644 --- a/exe/Duckling/Ranking/Generate.hs +++ b/exe/Duckling/Ranking/Generate.hs @@ -85,6 +85,7 @@ regenClassifiers lang = do ID -> (testContext, []) IT -> ITTime.corpus JA -> (testContext, []) + KA -> (testContext, []) KO -> KOTime.corpus MY -> (testContext, []) NB -> NBTime.corpus diff --git a/tests/Duckling/Numeral/KA/Tests.hs b/tests/Duckling/Numeral/KA/Tests.hs new file mode 100644 index 00000000..14b43168 --- /dev/null +++ b/tests/Duckling/Numeral/KA/Tests.hs @@ -0,0 +1,22 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + +module Duckling.Numeral.KA.Tests + ( tests ) where + +import Data.String +import Prelude +import Test.Tasty + +import Duckling.Dimensions.Types +import Duckling.Numeral.KA.Corpus +import Duckling.Testing.Asserts + +tests :: TestTree +tests = testGroup "KA Tests" + [ makeCorpusTest [This Numeral] corpus + ] diff --git a/tests/Duckling/Numeral/Tests.hs b/tests/Duckling/Numeral/Tests.hs index 192b804b..e1ba12a8 100644 --- a/tests/Duckling/Numeral/Tests.hs +++ b/tests/Duckling/Numeral/Tests.hs @@ -28,6 +28,7 @@ import qualified Duckling.Numeral.HU.Tests as HU import qualified Duckling.Numeral.ID.Tests as ID import qualified Duckling.Numeral.IT.Tests as IT import qualified Duckling.Numeral.JA.Tests as JA +import qualified Duckling.Numeral.KA.Tests as KA import qualified Duckling.Numeral.KO.Tests as KO import qualified Duckling.Numeral.MY.Tests as MY import qualified Duckling.Numeral.NB.Tests as NB @@ -60,6 +61,7 @@ tests = testGroup "Numeral Tests" , ID.tests , IT.tests , JA.tests + , KA.tests , KO.tests , MY.tests , NB.tests