Numeral/BN: Adding Bengali numeral support to Duckling

Summary: Added support for Bengali numerals

Reviewed By: patapizza

Differential Revision: D8730468

fbshipit-source-id: dc36017e24d796f35abc477a0b8b317218c64a6a
This commit is contained in:
Arunavha Chanda 2018-07-09 12:22:11 -07:00 committed by Facebook Github Bot
parent 4e11613d39
commit d5555d0149
13 changed files with 397 additions and 0 deletions

View File

@ -24,6 +24,7 @@ import Duckling.Types
import qualified Duckling.Dimensions.Common as CommonDimensions
import qualified Duckling.Dimensions.AR as ARDimensions
import qualified Duckling.Dimensions.BG as BGDimensions
import qualified Duckling.Dimensions.BN as BNDimensions
import qualified Duckling.Dimensions.CS as CSDimensions
import qualified Duckling.Dimensions.DA as DADimensions
import qualified Duckling.Dimensions.DE as DEDimensions
@ -89,6 +90,7 @@ dependents (This (CustomDimension dim)) = dimDependents dim
langDimensions :: Lang -> [Some Dimension]
langDimensions AR = ARDimensions.allDimensions
langDimensions BG = BGDimensions.allDimensions
langDimensions BN = BNDimensions.allDimensions
langDimensions CS = CSDimensions.allDimensions
langDimensions DA = DADimensions.allDimensions
langDimensions DE = DEDimensions.allDimensions

18
Duckling/Dimensions/BN.hs Normal file
View File

@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Dimensions.BN
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]

View File

@ -36,6 +36,7 @@ import qualified Duckling.Region as R (Region(NL))
data Lang
= AR
| BG
| BN
| CS
| DA
| DE

View File

@ -0,0 +1,102 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.BN.Corpus
( corpus
) where
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus = (testContext {locale = makeLocale BN Nothing}, testOptions,
allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "শূন্য"
, ""
]
, examples (NumeralValue 1)
[ "এক"
]
, examples (NumeralValue 2)
[ "দুই"
]
, examples (NumeralValue 3)
[ "তিন"
]
, examples (NumeralValue 4)
[ "চার"
]
, examples (NumeralValue 5)
[ "পাঁচ"
]
, examples (NumeralValue 6)
[ "ছয়"
]
, examples (NumeralValue 7)
[ "সাত"
]
, examples (NumeralValue 8)
[ "আট"
]
, examples (NumeralValue 9)
[ "নয়"
]
, examples (NumeralValue 10)
[ "দশ"
]
, examples (NumeralValue 11)
[ "এগারো"
]
, examples (NumeralValue 15)
[ "পনেরো"
]
, examples (NumeralValue 17)
[ "সতেরো"
]
, examples (NumeralValue 20)
[ "কুড়ি"
]
, examples (NumeralValue 22)
[ "বাইশ"
]
, examples (NumeralValue 24)
[ "চব্বিশ"
]
, examples (NumeralValue 25)
[ "পঁচিশ"
]
, examples (NumeralValue 26)
[ "ছাব্বিশ"
]
, examples (NumeralValue 28)
[ "আঠাশ"
]
, examples (NumeralValue 30)
[ "তিরিশ"
]
, examples (NumeralValue 40)
[ "চল্লিশ"
]
, examples (NumeralValue 50)
[ "পঞ্চাশ"
]
, examples (NumeralValue 70)
[ "সত্তর"
]
]

View File

@ -0,0 +1,170 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE LambdaCase #-}
module Duckling.Numeral.BN.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
bengaliMap :: HashMap Char Char
bengaliMap = HashMap.fromList
[ ( '', '0' )
, ( '১', '1' )
, ( '২', '2' )
, ( '৩', '3' )
, ( '', '4' )
, ( '৫', '5' )
, ( '৬', '6' )
, ( '', '7' )
, ( '৮', '8' )
, ( '৯', '9' )
]
bengaliToArab :: Char -> Char
bengaliToArab c = HashMap.lookupDefault c c bengaliMap
ruleBengali :: Rule
ruleBengali = Rule
{ name = "bengali forms"
, pattern =
[ regex "([০১২৩৪৫৬৭৮৯]{1,18})"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
toInteger <$> parseInt (Text.map bengaliToArab match) >>= integer
_ -> Nothing
}
ruleNumeralMap :: HashMap Text Integer
ruleNumeralMap = HashMap.fromList
[ ( "শূন্য", 0 )
, ( "এক", 1 )
, ( "দুই" , 2 )
, ( "তিন", 3 )
, ( "চার", 4 )
, ( "পাঁচ", 5 )
, ( "ছয়", 6 )
, ( "সাত", 7 )
, ( "আট", 8 )
, ( "নয়" , 9 )
, ( "দশ", 10 )
]
ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number (0..10)"
, pattern =
[ regex "(শূন্য|এক|দুই|তিন|চার|পাঁচ|ছয়|সাত|আট|নয়|দশ)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup match ruleNumeralMap >>= integer
_ -> Nothing
}
elevenToNineteenMap :: HashMap Text Integer
elevenToNineteenMap = HashMap.fromList
[ ( "এগারো", 11 )
, ( "বারো", 12 )
, ( "তেরো", 13 )
, ( "চৌদ্দ", 14 )
, ( "পনেরো", 15 )
, ( "ষোল", 16 )
, ( "সতেরো", 17 )
, ( "আঠারো", 18 )
, ( "উনিশ", 19 )
]
ruleElevenToNineteen :: Rule
ruleElevenToNineteen = Rule
{ name = "number (11..19)"
, pattern =
[ regex "(এগারো|বারো|তেরো|চৌদ্দ|পনেরো|ষোল|সতেরো|আঠারো|উনিশ)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup match elevenToNineteenMap >>= integer
_ -> Nothing
}
twentyoneToTwentynineMap :: HashMap Text Integer
twentyoneToTwentynineMap = HashMap.fromList
[ ( "একুশ", 21 )
, ( "বাইশ", 22 )
, ( "তেইশ", 23 )
, ( "চব্বিশ", 24 )
, ( "পঁচিশ", 25 )
, ( "ছাব্বিশ", 26 )
, ( "সাতাশ", 27 )
, ( "আঠাশ", 28 )
, ( "ঊনত্রিশ", 29 )
]
ruleTwentyoneToTwentynine :: Rule
ruleTwentyoneToTwentynine = Rule
{ name = "number (21..29)"
, pattern =
[ regex "(একুশ|বাইশ|তেইশ|চব্বিশ|পঁচিশ|ছাব্বিশ|সাতাশ|আঠাশ|ঊনত্রিশ)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup match twentyoneToTwentynineMap >>= integer
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "কুড়ি", 20 )
, ( "তিরিশ", 30 )
, ( "চল্লিশ", 40 )
, ( "পঞ্চাশ", 50 )
, ( "ষাট", 60 )
, ( "সত্তর", 70 )
, ( "আশি", 80 )
, ( "নব্বই", 90 )
]
ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "(কুড়ি|তিরিশ|চল্লিশ|পঞ্চাশ|ষাট|সত্তর|আশি|নব্বই)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup match tensMap >>= integer
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleBengali
, ruleNumeral
, ruleElevenToNineteen
, ruleTwentyoneToTwentynine
, ruleTens
]

View File

@ -16,6 +16,7 @@ import Duckling.Locale
import Duckling.Ranking.Types
import qualified Duckling.Ranking.Classifiers.AR_XX as AR_XXClassifiers
import qualified Duckling.Ranking.Classifiers.BG_XX as BG_XXClassifiers
import qualified Duckling.Ranking.Classifiers.BN_XX as BN_XXClassifiers
import qualified Duckling.Ranking.Classifiers.CS_XX as CS_XXClassifiers
import qualified Duckling.Ranking.Classifiers.DA_XX as DA_XXClassifiers
import qualified Duckling.Ranking.Classifiers.DE_XX as DE_XXClassifiers
@ -55,6 +56,7 @@ import qualified Duckling.Ranking.Classifiers.ZH_XX as ZH_XXClassifiers
classifiers :: Locale -> Classifiers
classifiers (Locale AR _) = AR_XXClassifiers.classifiers
classifiers (Locale BG _) = BG_XXClassifiers.classifiers
classifiers (Locale BN _) = BN_XXClassifiers.classifiers
classifiers (Locale CS _) = CS_XXClassifiers.classifiers
classifiers (Locale DA _) = DA_XXClassifiers.classifiers
classifiers (Locale DE _) = DE_XXClassifiers.classifiers

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.BN_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -25,6 +25,7 @@ import Duckling.Types
import qualified Duckling.Rules.AR as ARRules
import qualified Duckling.Rules.Common as CommonRules
import qualified Duckling.Rules.BG as BGRules
import qualified Duckling.Rules.BN as BNRules
import qualified Duckling.Rules.CS as CSRules
import qualified Duckling.Rules.DA as DARules
import qualified Duckling.Rules.DE as DERules
@ -82,6 +83,7 @@ rulesFor' (Locale lang Nothing) dim =
defaultRules :: Lang -> Some Dimension -> [Rule]
defaultRules AR = ARRules.defaultRules
defaultRules BG = BGRules.defaultRules
defaultRules BN = BNRules.defaultRules
defaultRules CS = CSRules.defaultRules
defaultRules DA = DARules.defaultRules
defaultRules DE = DERules.defaultRules
@ -119,6 +121,7 @@ defaultRules ZH = ZHRules.defaultRules
localeRules :: Lang -> Region -> Some Dimension -> [Rule]
localeRules AR = ARRules.localeRules
localeRules BG = BGRules.localeRules
localeRules BN = BNRules.localeRules
localeRules CS = CSRules.localeRules
localeRules DA = DARules.localeRules
localeRules DE = DERules.localeRules
@ -156,6 +159,7 @@ localeRules ZH = ZHRules.localeRules
langRules :: Lang -> Some Dimension -> [Rule]
langRules AR = ARRules.langRules
langRules BG = BGRules.langRules
langRules BN = BNRules.langRules
langRules CS = CSRules.langRules
langRules DA = DARules.langRules
langRules DE = DERules.langRules

44
Duckling/Rules/BN.hs Normal file
View File

@ -0,0 +1,44 @@
--Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
module Duckling.Rules.BN
( defaultRules
, langRules
, localeRules
) where
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Numeral.BN.Rules as Numeral
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
localeRules :: Region -> Some Dimension -> [Rule]
localeRules region (This (CustomDimension dim)) = dimLocaleRules region dim
localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Numeral) = Numeral.rules
langRules (This Email) = []
langRules (This Ordinal) = []
langRules (This PhoneNumber) = []
langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This Url) = []
langRules (This Volume) = []
langRules (This (CustomDimension dim)) = dimLangRules BN dim

View File

@ -43,6 +43,7 @@ library
, Duckling.Rules.Common
, Duckling.Rules.AR
, Duckling.Rules.BG
, Duckling.Rules.BN
, Duckling.Rules.CS
, Duckling.Rules.DA
, Duckling.Rules.DE
@ -85,6 +86,7 @@ library
, Duckling.Ranking.Classifiers
, Duckling.Ranking.Classifiers.AR_XX
, Duckling.Ranking.Classifiers.BG_XX
, Duckling.Ranking.Classifiers.BN_XX
, Duckling.Ranking.Classifiers.CS_XX
, Duckling.Ranking.Classifiers.DA_XX
, Duckling.Ranking.Classifiers.DE_XX
@ -135,6 +137,7 @@ library
, Duckling.Dimensions.Types
, Duckling.Dimensions.AR
, Duckling.Dimensions.BG
, Duckling.Dimensions.BN
, Duckling.Dimensions.CS
, Duckling.Dimensions.DA
, Duckling.Dimensions.DE
@ -328,6 +331,8 @@ library
, Duckling.Numeral.AR.Rules
, Duckling.Numeral.BG.Corpus
, Duckling.Numeral.BG.Rules
, Duckling.Numeral.BN.Corpus
, Duckling.Numeral.BN.Rules
, Duckling.Numeral.CS.Corpus
, Duckling.Numeral.CS.Rules
, Duckling.Numeral.DA.Corpus
@ -787,6 +792,7 @@ test-suite duckling-test
-- Numeral
, Duckling.Numeral.AR.Tests
, Duckling.Numeral.BG.Tests
, Duckling.Numeral.BN.Tests
, Duckling.Numeral.CS.Tests
, Duckling.Numeral.DA.Tests
, Duckling.Numeral.DE.Tests

View File

@ -171,6 +171,7 @@ getDefaultCorpusForLang lang = getCorpusForLang lang
getCorpusForLang :: Lang -> Corpus
getCorpusForLang AR = ARTime.corpus
getCorpusForLang BG = (testContext, testOptions, [])
getCorpusForLang BN = (testContext, testOptions, [])
getCorpusForLang CS = (testContext, testOptions, [])
getCorpusForLang DA = DATime.corpus
getCorpusForLang DE = DETime.corpus

View File

@ -0,0 +1,23 @@
--Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Numeral.BN.Tests
( tests
) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.BN.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "BN Tests"
[ makeCorpusTest [This Numeral] corpus
]

View File

@ -14,6 +14,7 @@ import Test.Tasty
import qualified Duckling.Numeral.AR.Tests as AR
import qualified Duckling.Numeral.BG.Tests as BG
import qualified Duckling.Numeral.BN.Tests as BN
import qualified Duckling.Numeral.CS.Tests as CS
import qualified Duckling.Numeral.DA.Tests as DA
import qualified Duckling.Numeral.DE.Tests as DE
@ -52,6 +53,7 @@ tests :: TestTree
tests = testGroup "Numeral Tests"
[ AR.tests
, BG.tests
, BN.tests
, CS.tests
, DA.tests
, DE.tests