Support malayalam numerals

Summary: Add support for malayalam numerals in duckling.

Reviewed By: patapizza

Differential Revision: D10075358

fbshipit-source-id: ce7fee2a71d862391980481b018c513981249f3d
This commit is contained in:
Akhil Ravidas 2018-09-28 10:12:24 -07:00 committed by Facebook Github Bot
parent ee24a4196d
commit c46dbb0d92
13 changed files with 366 additions and 0 deletions

View File

@ -47,6 +47,7 @@ import qualified Duckling.Dimensions.KA as KADimensions
import qualified Duckling.Dimensions.KM as KMDimensions
import qualified Duckling.Dimensions.KO as KODimensions
import qualified Duckling.Dimensions.LO as LODimensions
import qualified Duckling.Dimensions.ML as MLDimensions
import qualified Duckling.Dimensions.MY as MYDimensions
import qualified Duckling.Dimensions.NB as NBDimensions
import qualified Duckling.Dimensions.NE as NEDimensions
@ -116,6 +117,7 @@ langDimensions KA = KADimensions.allDimensions
langDimensions KM = KMDimensions.allDimensions
langDimensions KO = KODimensions.allDimensions
langDimensions LO = LODimensions.allDimensions
langDimensions ML = MLDimensions.allDimensions
langDimensions MY = MYDimensions.allDimensions
langDimensions NB = NBDimensions.allDimensions
langDimensions NE = NEDimensions.allDimensions

18
Duckling/Dimensions/ML.hs Normal file
View File

@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Dimensions.ML
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]

View File

@ -59,6 +59,7 @@ data Lang
| KM
| KO
| LO
| ML
| MY
| NB
| NE

View File

@ -0,0 +1,101 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.ML.Corpus
( corpus
) where
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
context :: Context
context = testContext {locale = makeLocale ML Nothing}
corpus :: Corpus
corpus = (context, testOptions, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "പൂജ്യം"
]
, examples (NumeralValue 1)
[ "ഒന്ന്"
]
, examples (NumeralValue 2)
[ "രണ്ട്"
]
, examples (NumeralValue 3)
[ "മുന്ന്"
]
, examples (NumeralValue 4)
[ "നാല്"
]
, examples (NumeralValue 5)
[ "അഞ്ച്"
]
, examples (NumeralValue 6)
[ "ആറ്"
]
, examples (NumeralValue 7)
[ "ഏഴ്"
]
, examples (NumeralValue 8)
[ "എട്ട്"
]
, examples (NumeralValue 9)
[ "ഒൻപത്"
]
, examples (NumeralValue 10)
[ "പത്ത്"
]
, examples (NumeralValue 11)
[ "പതിനൊന്ന്"
]
, examples (NumeralValue 12)
[ "പന്ത്രണ്ട്"
]
, examples (NumeralValue 13)
[ "പതിമൂന്ന്"
]
, examples (NumeralValue 19)
[ "പത്തൊമ്പത്"
]
, examples (NumeralValue 20)
[ "ഇരുപത്"
]
, examples (NumeralValue 21)
[ "ഇരുപത്തിഒന്ന്"
]
, examples (NumeralValue 22)
[ "ഇരുപത്തിരണ്ട്"
]
, examples (NumeralValue 26)
[ "ഇരുപത്തിആറ്"
]
, examples (NumeralValue 30)
[ "മുപ്പത്"
]
, examples (NumeralValue 33)
[ "മുപ്പത്തിമുന്ന്"
]
, examples (NumeralValue 50)
[ "അമ്പത്"
]
, examples (NumeralValue 51)
[ "അമ്പത്തിഒന്ന്"
]
]

View File

@ -0,0 +1,139 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE LambdaCase #-}
module Duckling.Numeral.ML.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
zeroToNineMap :: HashMap Text Integer
zeroToNineMap = HashMap.fromList
[ ( "പൂജ്യം", 0 )
, ( "ഒന്ന്", 1 )
, ( "രണ്ട്", 2 )
, ( "മുന്ന്", 3 )
, ( "നാല്", 4 )
, ( "അഞ്ച്", 5 )
, ( "ആറ്", 6 )
, ( "ഏഴ്", 7 )
, ( "എട്ട്", 8 )
, ( "ഒൻപത്", 9 )
]
ruleZeroToNine :: Rule
ruleZeroToNine = Rule
{ name = "integer (0..9)"
, pattern =
[ regex "(പൂജ്യം|ഒന്ന്|രണ്ട്|മുന്ന്|നാല്|അഞ്ച്|ആറ്|ഏഴ്|എട്ട്|ഒൻപത്)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) zeroToNineMap >>= integer
_ -> Nothing
}
tenToNineteenMap :: HashMap Text Integer
tenToNineteenMap = HashMap.fromList
[ ( "പത്ത്", 10 )
, ( "പതിനൊന്ന്", 11 )
, ( "പന്ത്രണ്ട്", 12 )
, ( "പതിമൂന്ന്", 13 )
, ( "പതിനാല്", 14 )
, ( "പതിനഞ്ച്", 15 )
, ( "പതിനാറ്", 16 )
, ( "പതിനേഴ്", 17 )
, ( "പതിനെട്ട്", 18 )
, ( "പത്തൊമ്പത്", 19 )
]
ruleTenToNineteen :: Rule
ruleTenToNineteen = Rule
{ name = "integer (10..19)"
, pattern =
[
regex "(പത്ത്|പതിനൊന്ന്|പന്ത്രണ്ട്|പതിമൂന്ന്|പതിനാല്|പതിനഞ്ച്|പതിനാറ്|പതിനേഴ്|പതിനെട്ട്|പത്തൊമ്പത്)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tenToNineteenMap >>= integer
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "ഇരുപത്", 20 )
, ( "ഇരുപത്തി", 20 )
, ( "മുപ്പത്", 30 )
, ( "മുപ്പത്തി", 30 )
, ( "നാല്പത്", 40 )
, ( "നാല്പത്തി", 40 )
, ( "അമ്പത്", 50 )
, ( "അമ്പത്തി", 50 )
, ( "അറുപത്", 60 )
, ( "അറുപത്തി", 60 )
, ( "എഴുപത്", 70 )
, ( "എഴുപത്തി", 70 )
, ( "എൺപത്", 80 )
, ( "എൺപത്തി", 80 )
, ( "തൊണ്ണൂറ്", 90 )
, ( "തൊണ്ണൂറ്റി", 90 )
]
ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "(ഇരുപത്|മുപ്പത്|നാല്പത്|അമ്പത്|അറുപത്|എഴുപത്|എൺപത്|തൊണ്ണൂറ്)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer ([2-9][1-9])"
, pattern =
[ regex "(ഇരുപത്തി|മുപ്പത്തി|നാല്പത്തി|അമ്പത്തി|അറുപത്തി|എഴുപത്തി|എൺപത്തി|തൊണ്ണൂറ്റി)(ഒന്ന്|രണ്ട്|മുന്ന്|നാല്|അഞ്ച്|ആറ്|ഏഴ്|എട്ട്|ഒൻപത്)"
]
, prod = \case
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
v1 <- HashMap.lookup (Text.toLower m1) tensMap
v2 <- HashMap.lookup (Text.toLower m2) zeroToNineMap
integer $ v1 + v2
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleZeroToNine
, ruleTenToNineteen
, ruleCompositeTens
, ruleTens
]

View File

@ -41,6 +41,7 @@ import qualified Duckling.Ranking.Classifiers.KA_XX as KA_XXClassifiers
import qualified Duckling.Ranking.Classifiers.KM_XX as KM_XXClassifiers
import qualified Duckling.Ranking.Classifiers.KO_XX as KO_XXClassifiers
import qualified Duckling.Ranking.Classifiers.LO_XX as LO_XXClassifiers
import qualified Duckling.Ranking.Classifiers.ML_XX as ML_XXClassifiers
import qualified Duckling.Ranking.Classifiers.MY_XX as MY_XXClassifiers
import qualified Duckling.Ranking.Classifiers.NB_XX as NB_XXClassifiers
import qualified Duckling.Ranking.Classifiers.NE_XX as NE_XXClassifiers
@ -84,6 +85,7 @@ classifiers (Locale KA _) = KA_XXClassifiers.classifiers
classifiers (Locale KM _) = KM_XXClassifiers.classifiers
classifiers (Locale KO _) = KO_XXClassifiers.classifiers
classifiers (Locale LO _) = LO_XXClassifiers.classifiers
classifiers (Locale ML _) = ML_XXClassifiers.classifiers
classifiers (Locale MY _) = MY_XXClassifiers.classifiers
classifiers (Locale NB _) = NB_XXClassifiers.classifiers
classifiers (Locale NE _) = NE_XXClassifiers.classifiers

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.ML_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -48,6 +48,7 @@ import qualified Duckling.Rules.KA as KARules
import qualified Duckling.Rules.KM as KMRules
import qualified Duckling.Rules.KO as KORules
import qualified Duckling.Rules.LO as LORules
import qualified Duckling.Rules.ML as MLRules
import qualified Duckling.Rules.MY as MYRules
import qualified Duckling.Rules.NB as NBRules
import qualified Duckling.Rules.NE as NERules
@ -109,6 +110,7 @@ defaultRules KA = KARules.defaultRules
defaultRules KM = KMRules.defaultRules
defaultRules KO = KORules.defaultRules
defaultRules LO = LORules.defaultRules
defaultRules ML = MLRules.defaultRules
defaultRules MY = MYRules.defaultRules
defaultRules NB = NBRules.defaultRules
defaultRules NE = NERules.defaultRules
@ -150,6 +152,7 @@ localeRules KA = KARules.localeRules
localeRules KM = KMRules.localeRules
localeRules KO = KORules.localeRules
localeRules LO = LORules.localeRules
localeRules ML = MLRules.localeRules
localeRules MY = MYRules.localeRules
localeRules NB = NBRules.localeRules
localeRules NE = NERules.localeRules
@ -191,6 +194,7 @@ langRules KA = KARules.langRules
langRules KM = KMRules.langRules
langRules KO = KORules.langRules
langRules LO = LORules.langRules
langRules ML = MLRules.langRules
langRules MY = MYRules.langRules
langRules NB = NBRules.langRules
langRules NE = NERules.langRules

45
Duckling/Rules/ML.hs Normal file
View File

@ -0,0 +1,45 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
module Duckling.Rules.ML
( defaultRules
, langRules
, localeRules
) where
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Numeral.ML.Rules as Numeral
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
localeRules :: Region -> Some Dimension -> [Rule]
localeRules region (This (CustomDimension dim)) = dimLocaleRules region dim
localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = []
langRules (This PhoneNumber) = []
langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This Url) = []
langRules (This Volume) = []
langRules (This (CustomDimension dim)) = dimLangRules ML dim

View File

@ -66,6 +66,7 @@ library
, Duckling.Rules.KM
, Duckling.Rules.KO
, Duckling.Rules.LO
, Duckling.Rules.ML
, Duckling.Rules.MY
, Duckling.Rules.NB
, Duckling.Rules.NE
@ -115,6 +116,7 @@ library
, Duckling.Ranking.Classifiers.KM_XX
, Duckling.Ranking.Classifiers.KO_XX
, Duckling.Ranking.Classifiers.LO_XX
, Duckling.Ranking.Classifiers.ML_XX
, Duckling.Ranking.Classifiers.MY_XX
, Duckling.Ranking.Classifiers.NB_XX
, Duckling.Ranking.Classifiers.NE_XX
@ -166,6 +168,7 @@ library
, Duckling.Dimensions.KM
, Duckling.Dimensions.KO
, Duckling.Dimensions.LO
, Duckling.Dimensions.ML
, Duckling.Dimensions.MY
, Duckling.Dimensions.NB
, Duckling.Dimensions.NE
@ -390,6 +393,8 @@ library
, Duckling.Numeral.KO.Rules
, Duckling.Numeral.LO.Corpus
, Duckling.Numeral.LO.Rules
, Duckling.Numeral.ML.Corpus
, Duckling.Numeral.ML.Rules
, Duckling.Numeral.MY.Corpus
, Duckling.Numeral.MY.Rules
, Duckling.Numeral.NB.Corpus
@ -849,6 +854,7 @@ test-suite duckling-test
, Duckling.Numeral.KM.Tests
, Duckling.Numeral.KO.Tests
, Duckling.Numeral.LO.Tests
, Duckling.Numeral.ML.Tests
, Duckling.Numeral.MY.Tests
, Duckling.Numeral.NB.Tests
, Duckling.Numeral.NE.Tests

View File

@ -194,6 +194,7 @@ getCorpusForLang KA = (testContext, testOptions, [])
getCorpusForLang KM = (testContext, testOptions, [])
getCorpusForLang KO = KOTime.corpus
getCorpusForLang LO = (testContext, testOptions, [])
getCorpusForLang ML = (testContext, testOptions, [])
getCorpusForLang MY = (testContext, testOptions, [])
getCorpusForLang NB = NBTime.corpus
getCorpusForLang NE = (testContext, testOptions, [])

View File

@ -0,0 +1,23 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Numeral.ML.Tests
( tests ) where
import Prelude
import Data.String
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.ML.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "ML Tests"
[ makeCorpusTest [This Numeral] corpus
]

View File

@ -37,6 +37,7 @@ import qualified Duckling.Numeral.KA.Tests as KA
import qualified Duckling.Numeral.KM.Tests as KM
import qualified Duckling.Numeral.KO.Tests as KO
import qualified Duckling.Numeral.LO.Tests as LO
import qualified Duckling.Numeral.ML.Tests as ML
import qualified Duckling.Numeral.MY.Tests as MY
import qualified Duckling.Numeral.NB.Tests as NB
import qualified Duckling.Numeral.NE.Tests as NE
@ -79,6 +80,7 @@ tests = testGroup "Numeral Tests"
, KM.tests
, KO.tests
, LO.tests
, ML.tests
, MY.tests
, NB.tests
, NE.tests