Setup Nepali (NE) and add Numeral dimension

Summary:
- Setup Nepali (NE) language
- Add basic Numeral dimension
Closes https://github.com/facebook/duckling/pull/156

Reviewed By: JonCoens

Differential Revision: D6965558

Pulled By: patapizza

fbshipit-source-id: f46c9b104d4345f20bd0cf53f8c9c8754855f314
This commit is contained in:
bidhan-a 2018-02-13 07:42:55 -08:00 committed by Facebook Github Bot
parent c7fb533a67
commit 43079e7113
14 changed files with 336 additions and 2 deletions

View File

@ -42,6 +42,7 @@ import qualified Duckling.Dimensions.KA as KADimensions
import qualified Duckling.Dimensions.KO as KODimensions
import qualified Duckling.Dimensions.MY as MYDimensions
import qualified Duckling.Dimensions.NB as NBDimensions
import qualified Duckling.Dimensions.NE as NEDimensions
import qualified Duckling.Dimensions.NL as NLDimensions
import qualified Duckling.Dimensions.PL as PLDimensions
import qualified Duckling.Dimensions.PT as PTDimensions
@ -104,6 +105,7 @@ langDimensions KA = KADimensions.allDimensions
langDimensions KO = KODimensions.allDimensions
langDimensions MY = MYDimensions.allDimensions
langDimensions NB = NBDimensions.allDimensions
langDimensions NE = NEDimensions.allDimensions
langDimensions NL = NLDimensions.allDimensions
langDimensions PL = PLDimensions.allDimensions
langDimensions PT = PTDimensions.allDimensions

18
Duckling/Dimensions/NE.hs Normal file
View File

@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Dimensions.NE
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]

View File

@ -53,6 +53,7 @@ data Lang
| KO
| MY
| NB
| NE
| NL
| PL
| PT

View File

@ -0,0 +1,86 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.NE.Corpus
( corpus
) where
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus = (testContext {locale = makeLocale NE Nothing}, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "शुन्य"
, "सुन्ना"
]
, examples (NumeralValue 1)
[ "एक"
]
, examples (NumeralValue 2)
[ "दुई"
]
, examples (NumeralValue 3)
[ "तीन"
]
, examples (NumeralValue 4)
[ "चार"
]
, examples (NumeralValue 5)
[ "पाँच"
]
, examples (NumeralValue 6)
[ ""
]
, examples (NumeralValue 7)
[ "सात"
]
, examples (NumeralValue 8)
[ "आठ"
]
, examples (NumeralValue 9)
[ "नौ"
]
, examples (NumeralValue 10)
[ "दश"
]
, examples (NumeralValue 11)
[ "एघार"
]
, examples (NumeralValue 12)
[ "बाह्र"
]
, examples (NumeralValue 20)
[ "बिस"
]
, examples (NumeralValue 21)
[ "एक्काइस"
]
, examples (NumeralValue 22)
[ "बाइस"
]
, examples (NumeralValue 26)
[ "छब्बिस"
]
, examples (NumeralValue 30)
[ "तिस"
]
, examples (NumeralValue 50)
[ "पचास"
]
]

View File

@ -0,0 +1,125 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Numeral.NE.Rules
( rules
) where
import Control.Applicative ((<|>))
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
zeroToNineteenMap :: HashMap Text Integer
zeroToNineteenMap = HashMap.fromList
[ ( "शुन्य", 0 )
, ( "सुन्ना", 0 )
, ( "एक", 1 )
, ( "दुई", 2 )
, ( "तीन", 3 )
, ( "चार", 4 )
, ( "पाँच", 5 )
, ( "", 6 )
, ( "सात", 7 )
, ( "आठ", 8 )
, ( "नौ", 9 )
, ( "दश", 10 )
, ( "एघार", 11 )
, ( "बाह्र", 12 )
, ( "तेह्र", 13 )
, ( "चौध", 14 )
, ( "पन्ध्र", 15 )
, ( "सोह्र", 16 )
, ( "सत्र", 17 )
, ( "अठार", 18 )
, ( "उन्नाइस", 19 )
]
ruleToNineteen :: Rule
ruleToNineteen = Rule
{ name = "integer (0..19)"
, pattern =
[ regex "(शुन्य|सुन्ना|एक|दुई|तीन|चार|पाँच|छ|सात|आठ|नौ|दश|एघार|बाह्र|तेह्र|चौध|पन्ध्र|सोह्र|सत्र|अठार|उन्नाइस)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) zeroToNineteenMap >>= integer
_ -> Nothing
}
twentyoneToTwentynineMap :: HashMap Text Integer
twentyoneToTwentynineMap = HashMap.fromList
[ ( "एक्काइस", 21 )
, ( "बाइस", 22 )
, ( "तेइस", 23 )
, ( "चौबिस", 24 )
, ( "पच्चिस", 25 )
, ( "छब्बिस", 26 )
, ( "सत्ताइस", 27 )
, ( "अट्ठाइस", 28 )
, ( "उनन्तिस", 29 )
]
ruleTwentyoneToTwentynine :: Rule
ruleTwentyoneToTwentynine = Rule
{ name = "number (21..29)"
, pattern =
[ regex "(एक्काइस|बाइस|तेइस|चौबिस|पच्चिस|छब्बिस|सत्ताइस|अट्ठाइस|उनन्तिस)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) twentyoneToTwentynineMap >>= integer
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "बिस", 20 )
, ( "तिस", 30 )
, ( "चालिस", 40 )
, ( "पचास", 50 )
, ( "साठी", 60 )
, ( "सत्तरी", 70 )
, ( "असी", 80 )
, ( "नब्बे", 90 )
]
ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "(बिस|तिस|चालिस|पचास|साठी|सत्तरी|असी|नब्बे)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleToNineteen
, ruleTwentyoneToTwentynine
, ruleTens
]

View File

@ -38,6 +38,7 @@ import qualified Duckling.Ranking.Classifiers.KA_XX as KA_XXClassifiers
import qualified Duckling.Ranking.Classifiers.KO_XX as KO_XXClassifiers
import qualified Duckling.Ranking.Classifiers.MY_XX as MY_XXClassifiers
import qualified Duckling.Ranking.Classifiers.NB_XX as NB_XXClassifiers
import qualified Duckling.Ranking.Classifiers.NE_XX as NE_XXClassifiers
import qualified Duckling.Ranking.Classifiers.NL_XX as NL_XXClassifiers
import qualified Duckling.Ranking.Classifiers.PL_XX as PL_XXClassifiers
import qualified Duckling.Ranking.Classifiers.PT_XX as PT_XXClassifiers
@ -74,6 +75,7 @@ classifiers (Locale KA _) = KA_XXClassifiers.classifiers
classifiers (Locale KO _) = KO_XXClassifiers.classifiers
classifiers (Locale MY _) = MY_XXClassifiers.classifiers
classifiers (Locale NB _) = NB_XXClassifiers.classifiers
classifiers (Locale NE _) = NE_XXClassifiers.classifiers
classifiers (Locale NL _) = NL_XXClassifiers.classifiers
classifiers (Locale PL _) = PL_XXClassifiers.classifiers
classifiers (Locale PT _) = PT_XXClassifiers.classifiers

View File

@ -320,10 +320,10 @@ classifiers
unseen = -4.31748811353631,
likelihoods =
HashMap.fromList
[("<integer> (latent time-of-day)", -0.9718605830289657),
[("<integer> (latent time-of-day)", -0.9718605830289658),
("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
("hour", -0.9718605830289657),
("hour", -0.9718605830289658),
("two time tokens separated by `di`", -3.20545280453606),
("Domenica", -3.6109179126442243)],
n = 33}}),

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.NE_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -45,6 +45,7 @@ import qualified Duckling.Rules.KA as KARules
import qualified Duckling.Rules.KO as KORules
import qualified Duckling.Rules.MY as MYRules
import qualified Duckling.Rules.NB as NBRules
import qualified Duckling.Rules.NE as NERules
import qualified Duckling.Rules.NL as NLRules
import qualified Duckling.Rules.PL as PLRules
import qualified Duckling.Rules.PT as PTRules
@ -99,6 +100,7 @@ defaultRules KA = KARules.defaultRules
defaultRules KO = KORules.defaultRules
defaultRules MY = MYRules.defaultRules
defaultRules NB = NBRules.defaultRules
defaultRules NE = NERules.defaultRules
defaultRules NL = NLRules.defaultRules
defaultRules PL = PLRules.defaultRules
defaultRules PT = PTRules.defaultRules
@ -133,6 +135,7 @@ localeRules KA = KARules.localeRules
localeRules KO = KORules.localeRules
localeRules MY = MYRules.localeRules
localeRules NB = NBRules.localeRules
localeRules NE = NERules.localeRules
localeRules NL = NLRules.localeRules
localeRules PL = PLRules.localeRules
localeRules PT = PTRules.localeRules
@ -167,6 +170,7 @@ langRules KA = KARules.langRules
langRules KO = KORules.langRules
langRules MY = MYRules.langRules
langRules NB = NBRules.langRules
langRules NE = NERules.langRules
langRules NL = NLRules.langRules
langRules PL = PLRules.langRules
langRules PT = PTRules.langRules

42
Duckling/Rules/NE.hs Normal file
View File

@ -0,0 +1,42 @@
--Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
module Duckling.Rules.NE
( defaultRules
, langRules
, localeRules
) where
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Numeral.NE.Rules as Numeral
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
localeRules :: Region -> Some Dimension -> [Rule]
localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Numeral) = Numeral.rules
langRules (This Email) = []
langRules (This Ordinal) = []
langRules (This PhoneNumber) = []
langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This Url) = []
langRules (This Volume) = []

View File

@ -62,6 +62,7 @@ library
, Duckling.Rules.KO
, Duckling.Rules.MY
, Duckling.Rules.NB
, Duckling.Rules.NE
, Duckling.Rules.NL
, Duckling.Rules.PL
, Duckling.Rules.PT
@ -104,6 +105,7 @@ library
, Duckling.Ranking.Classifiers.KO_XX
, Duckling.Ranking.Classifiers.MY_XX
, Duckling.Ranking.Classifiers.NB_XX
, Duckling.Ranking.Classifiers.NE_XX
, Duckling.Ranking.Classifiers.NL_XX
, Duckling.Ranking.Classifiers.PL_XX
, Duckling.Ranking.Classifiers.PT_XX
@ -146,6 +148,7 @@ library
, Duckling.Dimensions.KO
, Duckling.Dimensions.MY
, Duckling.Dimensions.NB
, Duckling.Dimensions.NE
, Duckling.Dimensions.NL
, Duckling.Dimensions.PL
, Duckling.Dimensions.PT
@ -318,6 +321,8 @@ library
, Duckling.Numeral.MY.Rules
, Duckling.Numeral.NB.Corpus
, Duckling.Numeral.NB.Rules
, Duckling.Numeral.NE.Corpus
, Duckling.Numeral.NE.Rules
, Duckling.Numeral.NL.Corpus
, Duckling.Numeral.NL.Rules
, Duckling.Numeral.PL.Corpus
@ -731,6 +736,7 @@ test-suite duckling-test
, Duckling.Numeral.KO.Tests
, Duckling.Numeral.MY.Tests
, Duckling.Numeral.NB.Tests
, Duckling.Numeral.NE.Tests
, Duckling.Numeral.NL.Tests
, Duckling.Numeral.PL.Tests
, Duckling.Numeral.PT.Tests

View File

@ -182,6 +182,7 @@ getCorpusForLang KA = (testContext, [])
getCorpusForLang KO = KOTime.corpus
getCorpusForLang MY = (testContext, [])
getCorpusForLang NB = NBTime.corpus
getCorpusForLang NE = (testContext, [])
getCorpusForLang NL = NLTime.corpus
getCorpusForLang PL = PLTime.corpus
getCorpusForLang PT = PTTime.corpus

View File

@ -0,0 +1,23 @@
--Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Numeral.NE.Tests
( tests
) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.NE.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "NE Tests"
[ makeCorpusTest [This Numeral] corpus
]

View File

@ -34,6 +34,7 @@ import qualified Duckling.Numeral.KA.Tests as KA
import qualified Duckling.Numeral.KO.Tests as KO
import qualified Duckling.Numeral.MY.Tests as MY
import qualified Duckling.Numeral.NB.Tests as NB
import qualified Duckling.Numeral.NE.Tests as NE
import qualified Duckling.Numeral.NL.Tests as NL
import qualified Duckling.Numeral.PL.Tests as PL
import qualified Duckling.Numeral.PT.Tests as PT
@ -69,6 +70,7 @@ tests = testGroup "Numeral Tests"
, KO.tests
, MY.tests
, NB.tests
, NE.tests
, NL.tests
, PL.tests
, PT.tests