diff --git a/Duckling/Locale.hs b/Duckling/Locale.hs index 0c265003..c86d5eb4 100644 --- a/Duckling/Locale.hs +++ b/Duckling/Locale.hs @@ -19,6 +19,7 @@ module Duckling.Locale , CL , CN , CO + , EG , GB , HK , IE @@ -138,7 +139,8 @@ makeLocale lang (Just region) allLocales :: HashMap Lang (HashSet Region) allLocales = HashMap.fromList - [ (EN, HashSet.fromList [AU, BZ, CA, GB, IN, IE, JM, NZ, PH, ZA, TT, US]) + [ (AR, HashSet.fromList [EG]) + , (EN, HashSet.fromList [AU, BZ, CA, GB, IN, IE, JM, NZ, PH, ZA, TT, US]) , (ES, HashSet.fromList [R.AR, CL, CO, R.ES, MX, PE, VE]) , (NL, HashSet.fromList [BE, R.NL]) , (ZH, HashSet.fromList [CN, HK, MO, TW]) diff --git a/Duckling/Numeral/AR/EG/Corpus.hs b/Duckling/Numeral/AR/EG/Corpus.hs new file mode 100644 index 00000000..38c1eec7 --- /dev/null +++ b/Duckling/Numeral/AR/EG/Corpus.hs @@ -0,0 +1,257 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. + + +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Numeral.AR.EG.Corpus (allExamples) where + +import Data.String +import Prelude + +import Duckling.Numeral.Types +import Duckling.Testing.Types + +allExamples :: [Example] +allExamples = concat + [ examples (NumeralValue 0) + [ "0" + , "صفر" + , "٠" + , "٠٫٠" + ] + , examples (NumeralValue 1) + [ "1" + , "واحد" + , "١" + , "١٫٠" + ] + , examples (NumeralValue 4) + [ "4" + , "أربع" + , "اربعه" + , "٤" + , "٤٫٠" + ] + , examples (NumeralValue 6) + [ "6" + , "ستة" + , "ست" + , "سته" + , "٦" + , "٦٫٠" + ] + , examples (NumeralValue 10) + [ "10" + , "عشرة" + , "عشره" + , "١٠" + , "١٠٫٠" + ] + , examples (NumeralValue 11) + [ "11" + , "حداشر" + , "١١" + , "١١٫٠" + ] + , examples (NumeralValue 12) + [ "12" + , "إتناشر" + , "١٢" + , "١٢٫٠" + ] + , examples (NumeralValue 14) + [ "14" + , "اربعتاشر" + , "١٤" + , "١٤٫٠" + ] + , examples (NumeralValue 16) + [ "16" + , "ستاشر" + , "١٦" + , "١٦٫٠" + ] + , examples (NumeralValue 17) + [ "17" + , "سبعتاشر" + , "١٧" + , "١٧٫٠" + ] + , examples (NumeralValue 18) + [ "18" + , "تمنتاشر" + , "١٨" + , "١٨٫٠" + ] + , examples (NumeralValue 20) + [ "عشرين" + , "٢٠" + , "٢٠٫٠" + ] + , examples (NumeralValue 21) + [ "واحد و عشرين" + , "21" + , "٢١" + , "٢١٫٠" + ] + , examples (NumeralValue 24) + [ "أربعة و عشرين" + , "24" + , "٢٤" + , "٢٤٫٠" + ] + , examples (NumeralValue 26) + [ "ستة و عشرين" + , "26" + , "٢٦" + , "٢٦٫٠" + ] + , examples (NumeralValue 20) + [ "عشرين" + , "٢٠" + ] + , examples (NumeralValue 30) + [ "ثلاثين" + , "٣٠" + , "٣٠٫٠" + ] + , examples (NumeralValue 33) + [ "33" + , "تلاتة و تلاتين" + , "٣٣" + , "٣٣٫٠" + ] + , examples (NumeralValue 40) + [ "أربعين" + , "٤٠" + , "٤٠٫٠" + ] + , examples (NumeralValue 200) + [ "متين" + , "٢٠٠" + , "٢٠٠٫٠" + ] + , examples (NumeralValue 210) + [ "متين وعشرة" + , "٢١٠" + , "٢١٠٫٠" + ] + , examples (NumeralValue 221) + [ "متين واحد وعشرين" + , "٢٢١" + , "٢٢١٫٠" + ] + , examples (NumeralValue 263) + [ "متين تلاتة وستين" + , "٢٦٣" + , "٢٦٣٫٠" + ] + , examples (NumeralValue 300) + [ "تلتمية" + , "٣٠٠" + , "٣٠٠٫٠" + ] + , examples (NumeralValue 350) + [ "تلتمية وخمسين" + , "٣٥٠" + , "٣٥٠٫٠" + ] + , examples (NumeralValue 500) + [ "خمسمية" + , "٥٠٠" + , "٥٠٠٫٠" + ] + , examples (NumeralValue 525) + [ "خمسمية خمسة و عشرين" + , "525" + , "٥٢٥" + , "٥٢٥٫٠" + ] + , examples (NumeralValue 700) + [ "سبعمية" + , "٧٠٠" + , "٧٠٠٫٠" + ] + , examples (NumeralValue 1.1) + [ "1.1" + , "1.10" + , "01.10" + , "1 فاصلة 1" + , "واحد فاصلة واحد" + , "١١/١٠" + , "١٫١" + , "١٫١٠" + ] + , examples (NumeralValue 0.77) + [ "0.77" + , ".77" + , "٧٧/١٠٠" + , "٫٧٧" + , "٠٫٧٧" + , "٠٫٧٧٠" + ] + , examples (NumeralValue 2000) + [ "2000" + , "٢٠٠٠" + , "الفين" + ] + , examples (NumeralValue 100000) + [ "100000" + , "100 الف" + , "١٠٠٠٠٠" + , "١٠٠٠٠٠٫٠٠" + , "١٠٠٬٠٠٠" + , "١٠٠٬٠٠٠٫٠" + ] + , examples (NumeralValue 10000) + [ "10000" + , "10 آلاف" + , "١٠٠٠٠" + , "١٠٠٠٠٫٠٠" + , "١٠٬٠٠٠" + , "١٠٬٠٠٠٫٠٠" + ] + , examples (NumeralValue 1000000) + [ "1000000" + , "مليون" + , "١٠٠٠٠٠٠" + , "١٠٠٠٠٠٠٫٠٠" + , "١٬٠٠٠٬٠٠٠" + , "١٬٠٠٠٬٠٠٠٫٠٠" + ] + , examples (NumeralValue 2000000) + [ "2000000" + , "2 مليون" + , "اتنين مليون" + , "٢٠٠٠٠٠٠" + , "٢٠٠٠٠٠٠٫٠٠" + , "٢٬٠٠٠٬٠٠٠" + , "٢٬٠٠٠٬٠٠٠٫٠٠" + ] + , examples (NumeralValue 3000000) + [ "3 ملايين" + , "3000000" + , "3 مليون" + , "٣٠٠٠٠٠٠" + , "٣٠٠٠٠٠٠٫٠٠" + , "٣٬٠٠٠٬٠٠٠" + , "٣٬٠٠٠٬٠٠٠٫٠٠" + ] + , examples (NumeralValue (-1200000)) + [ "-1200000" + , "-١٢٠٠٠٠٠" + , "-١٢٠٠٠٠٠٫٠٠" + , "-١٬٢٠٠٬٠٠٠" + , "-١٬٢٠٠٬٠٠٠٫٠٠" + ] + , examples (NumeralValue (-1.2)) + [ "-١٢/١٠" + , "-1.2" + , "-١٫٢" + , "-١٫٢٠" + ] + ] diff --git a/Duckling/Numeral/AR/EG/Helpers.hs b/Duckling/Numeral/AR/EG/Helpers.hs new file mode 100644 index 00000000..66f44a6e --- /dev/null +++ b/Duckling/Numeral/AR/EG/Helpers.hs @@ -0,0 +1,80 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +module Duckling.Numeral.AR.EG.Helpers + ( digitsMap + , numeralToStringMap + , parseArabicDoubleAsText + , parseArabicDoubleFromText + , parseArabicIntAsText + , parseArabicIntegerFromText + ) where + +import Control.Monad (join) +import Data.HashMap.Strict (HashMap) +import Data.Maybe (mapMaybe) +import Data.String +import Data.Text (Text) +import Duckling.Numeral.Helpers + ( parseDouble + , parseInteger + ) +import Prelude + +import qualified Data.HashMap.Strict as HashMap +import qualified Data.Text as Text + +numeralToStringMap :: HashMap Char String +numeralToStringMap = + HashMap.fromList + [ ('٠', "0") + , ('١', "1") + , ('٢', "2") + , ('٣', "3") + , ('٤', "4") + , ('٥', "5") + , ('٦', "6") + , ('٧', "7") + , ('٨', "8") + , ('٩', "9") + ] + +digitsMap :: HashMap Text Integer +digitsMap = + HashMap.fromList + [ ("اتنين", 2) + , ("تلات", 3) + , ("اربع", 4) + , ("أربع", 4) + , ("خمس", 5) + , ("ست", 6) + , ("سبع", 7) + , ("تمان", 8) + , ("تسع", 9) + ] + +parseArabicIntAsText :: Text -> Text +parseArabicIntAsText = + Text.pack + . join + . mapMaybe (`HashMap.lookup` numeralToStringMap) + . Text.unpack + +parseArabicIntegerFromText :: Text -> Maybe Integer +parseArabicIntegerFromText = parseInteger . parseArabicIntAsText + +parseArabicDoubleAsText :: Text -> Text +parseArabicDoubleAsText = + Text.pack + . join + . mapMaybe (`HashMap.lookup` HashMap.insert '٫' "." numeralToStringMap) + . Text.unpack + +parseArabicDoubleFromText :: Text -> Maybe Double +parseArabicDoubleFromText = parseDouble . parseArabicDoubleAsText diff --git a/Duckling/Numeral/AR/EG/Rules.hs b/Duckling/Numeral/AR/EG/Rules.hs new file mode 100644 index 00000000..baeb3893 --- /dev/null +++ b/Duckling/Numeral/AR/EG/Rules.hs @@ -0,0 +1,343 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoRebindableSyntax #-} + +module Duckling.Numeral.AR.EG.Rules (rules) where + +import Data.Maybe +import Prelude +import qualified Data.HashMap.Strict as HashMap + +import Duckling.Dimensions.Types +import Duckling.Numeral.AR.EG.Helpers + ( digitsMap + , parseArabicDoubleFromText + , parseArabicIntegerFromText + ) +import Duckling.Numeral.Helpers +import Duckling.Numeral.Types (NumeralData (..)) +import Duckling.Regex.Types +import Duckling.Types +import qualified Duckling.Numeral.Types as TNumeral + +ruleInteger2 :: Rule +ruleInteger2 = Rule + { name = "integer 2" + , pattern = + [ regex "[إأا]?تني[ي]?ن" + ] + , prod = \_ -> integer 2 + } + +ruleInteger3 :: Rule +ruleInteger3 = Rule + { name = "integer 3" + , pattern = + [ regex "تلات[هة]?" + ] + , prod = \_ -> integer 3 + } + +ruleInteger8 :: Rule +ruleInteger8 = Rule + { name = "integer 8" + , pattern = + [ regex "تمان(ي[هة])?" + ] + , prod = \_ -> integer 8 + } + +ruleInteger11 :: Rule +ruleInteger11 = Rule + { name = "integer 11" + , pattern = + [ regex "(إأا)?حداشر" + ] + , prod = \_ -> integer 11 + } + +ruleInteger12 :: Rule +ruleInteger12 = Rule + { name = "integer 12" + , pattern = + [ regex "[إأا]?[تط]ناشر" + ] + , prod = \_ -> integer 12 + } + +ruleInteger13 :: Rule +ruleInteger13 = Rule + { name = "integer 13" + , pattern = + [ regex "[تط]ل(ا)?[تط]اشر" + ] + , prod = \_ -> integer 13 + } + +ruleInteger14 :: Rule +ruleInteger14 = Rule + { name = "integer 14" + , pattern = + [ regex "[إأا]ربع[تط]اشر" + ] + , prod = \_ -> integer 14 + } + +ruleInteger15 :: Rule +ruleInteger15 = Rule + { name = "integer 15" + , pattern = + [ regex "خمس[تط]اشر" + ] + , prod = \_ -> integer 15 + } + +ruleInteger16 :: Rule +ruleInteger16 = Rule + { name = "integer 16" + , pattern = + [ regex "س[تط]اشر" + ] + , prod = \_ -> integer 16 + } + +ruleInteger17 :: Rule +ruleInteger17 = Rule + { name = "integer 17" + , pattern = + [ regex "سبع[تط]اشر" + ] + , prod = \_ -> integer 17 + } + +ruleInteger18 :: Rule +ruleInteger18 = Rule + { name = "integer 18" + , pattern = + [ regex "[تط]من[تط]اشر" + ] + , prod = \_ -> integer 18 + } + +ruleInteger19 :: Rule +ruleInteger19 = Rule + { name = "integer 19" + , pattern = + [ regex "تسع[تط]اشر" + ] + , prod = \_ -> integer 19 + } + +ruleInteger30_80 :: Rule +ruleInteger30_80 = Rule + { name = "integer (30, 80)" + , pattern = + [ regex "([تط]لا[تط]|[تط]مان)(ين)" + ] + , prod = \tokens -> case tokens of + Token RegexMatch (GroupMatch (match:_)):_ -> + (* 10) <$> HashMap.lookup match digitsMap >>= integer + _ -> Nothing + } + +ruleInteger100 :: Rule +ruleInteger100 = Rule + { name = "integer (100)" + , pattern = + [ regex "مي[هةت]" + ] + , prod = const $ integer 100 + } + +ruleInteger200 :: Rule +ruleInteger200 = Rule + { name = "integer (200)" + , pattern = + [ regex "م(ي)?تين" + ] + , prod = const $ integer 200 + } + +ruleInteger300 :: Rule +ruleInteger300 = Rule + { name = "integer (300)" + , pattern = + [ regex "([تط]و?لا?[تط]و?)مي[هةت]" + ] + , prod = const $ integer 300 + } + +ruleInteger400 :: Rule +ruleInteger400 = Rule + { name = "integer (400)" + , pattern = + [ regex "(رو?بعو?)مي[هةت]" + ] + , prod = const $ integer 400 + } + +ruleInteger500 :: Rule +ruleInteger500 = Rule + { name = "integer (500)" + , pattern = + [ regex "(خو?مسو?)مي[هةت]" + ] + , prod = const $ integer 500 + } + +ruleInteger600 :: Rule +ruleInteger600 = Rule + { name = "integer (600)" + , pattern = + [ regex "(سو?تو?)مي[هةت]" + ] + , prod = const $ integer 600 + } + +ruleInteger700 :: Rule +ruleInteger700 = Rule + { name = "integer (700)" + , pattern = + [ regex "(سو?بعو?)مي[هةت]" + ] + , prod = const $ integer 700 + } + +ruleInteger800 :: Rule +ruleInteger800 = Rule + { name = "integer (800)" + , pattern = + [ regex "([تط]و?منو?)مي[هةت]" + ] + , prod = const $ integer 800 + } + +ruleInteger900 :: Rule +ruleInteger900 = Rule + { name = "integer (900)" + , pattern = + [ regex "([تط]و?سعو?)مي[هةت]" + ] + , prod = const $ integer 900 + } + +ruleInteger101_999 :: Rule +ruleInteger101_999 = Rule + { name = "integer 101..999" + , pattern = + [ oneOf [100, 200 .. 900] + , regex "\\s" + , oneOf $ map (+) [10, 20 .. 90] <*> [1..9] + ] + , prod = \tokens -> case tokens of + (Token Numeral NumeralData{TNumeral.value = v1}: + _: + Token Numeral NumeralData{TNumeral.value = v2}: + _) -> double $ v1 + v2 + _ -> Nothing + } + + +ruleInteger3000 :: Rule +ruleInteger3000 = Rule + { name = "integer (3000)" + , pattern = + [ regex "([تط]لا?[تط])( ?[اآ])?لاف" + ] + , prod = const $ integer 3000 + } + +ruleInteger4000 :: Rule +ruleInteger4000 = Rule + { name = "integer (4000)" + , pattern = + [ regex "[أا]ربع(ت| ?[اآ])لاف" + ] + , prod = const $ integer 4000 + } + +ruleInteger5000 :: Rule +ruleInteger5000 = Rule + { name = "integer (5000)" + , pattern = + [ regex "خمس(ت| ?[اآ])لاف" + ] + , prod = const $ integer 5000 + } + +ruleInteger6000 :: Rule +ruleInteger6000 = Rule + { name = "integer (6000)" + , pattern = + [ regex "س(ت| ?[اآ])لاف" + ] + , prod = const $ integer 6000 + } + +ruleInteger7000 :: Rule +ruleInteger7000 = Rule + { name = "integer (7000)" + , pattern = + [ regex "سبع(ت| ?[اآ])لاف" + ] + , prod = const $ integer 7000 + } + +ruleInteger8000 :: Rule +ruleInteger8000 = Rule + { name = "integer (8000)" + , pattern = + [ regex "[تط]م[ا]?ن(ت| ?[اآ])لاف" + ] + , prod = const $ integer 8000 + } + +ruleInteger9000 :: Rule +ruleInteger9000 = Rule + { name = "integer (9000)" + , pattern = + [ regex "([تط]سع)(ت| ?[اآ])لاف" + ] + , prod = const $ integer 9000 + } + +rules :: [Rule] +rules = + [ ruleInteger2 + , ruleInteger3 + , ruleInteger8 + , ruleInteger11 + , ruleInteger12 + , ruleInteger13 + , ruleInteger14 + , ruleInteger15 + , ruleInteger16 + , ruleInteger17 + , ruleInteger18 + , ruleInteger19 + , ruleInteger30_80 + , ruleInteger100 + , ruleInteger200 + , ruleInteger300 + , ruleInteger400 + , ruleInteger500 + , ruleInteger600 + , ruleInteger700 + , ruleInteger800 + , ruleInteger900 + , ruleInteger101_999 + , ruleInteger3000 + , ruleInteger4000 + , ruleInteger5000 + , ruleInteger6000 + , ruleInteger7000 + , ruleInteger8000 + , ruleInteger9000 + ] diff --git a/Duckling/Region.hs b/Duckling/Region.hs index 87a3cad3..8b21c9dc 100644 --- a/Duckling/Region.hs +++ b/Duckling/Region.hs @@ -30,6 +30,7 @@ data Region | CL | CN | CO + | EG | ES | GB | HK diff --git a/Duckling/Rules/AR.hs b/Duckling/Rules/AR.hs index f11d4c63..7911b54f 100644 --- a/Duckling/Rules/AR.hs +++ b/Duckling/Rules/AR.hs @@ -20,6 +20,7 @@ import Duckling.Types import qualified Duckling.AmountOfMoney.AR.Rules as AmountOfMoney import qualified Duckling.Duration.AR.Rules as Duration import qualified Duckling.Numeral.AR.Rules as Numeral +import qualified Duckling.Numeral.AR.EG.Rules as NumeralEG import qualified Duckling.Ordinal.AR.Rules as Ordinal import qualified Duckling.PhoneNumber.AR.Rules as PhoneNumber import qualified Duckling.Quantity.AR.Rules as Quantity @@ -32,6 +33,7 @@ defaultRules :: Seal Dimension -> [Rule] defaultRules = langRules localeRules :: Region -> Seal Dimension -> [Rule] +localeRules EG (Seal Numeral) = NumeralEG.rules localeRules region (Seal (CustomDimension dim)) = dimLocaleRules region dim localeRules _ _ = [] diff --git a/duckling.cabal b/duckling.cabal index bcc99860..a0ad6bdd 100644 --- a/duckling.cabal +++ b/duckling.cabal @@ -400,6 +400,9 @@ library , Duckling.Numeral.AR.Corpus , Duckling.Numeral.AR.Helpers , Duckling.Numeral.AR.Rules + , Duckling.Numeral.AR.EG.Corpus + , Duckling.Numeral.AR.EG.Helpers + , Duckling.Numeral.AR.EG.Rules , Duckling.Numeral.BG.Corpus , Duckling.Numeral.BG.Rules , Duckling.Numeral.BN.Corpus diff --git a/tests/Duckling/Numeral/AR/Tests.hs b/tests/Duckling/Numeral/AR/Tests.hs index 24f48f7a..be891e33 100644 --- a/tests/Duckling/Numeral/AR/Tests.hs +++ b/tests/Duckling/Numeral/AR/Tests.hs @@ -13,10 +13,23 @@ import Data.String import Test.Tasty import Duckling.Dimensions.Types +import Duckling.Locale import Duckling.Numeral.AR.Corpus import Duckling.Testing.Asserts +import Duckling.Testing.Types hiding (examples) +import qualified Duckling.Numeral.AR.EG.Corpus as EG tests :: TestTree tests = testGroup "AR Tests" [ makeCorpusTest [Seal Numeral] corpus + , localeTests ] + +localeTests :: TestTree +localeTests = testGroup "Locale Tests" + [ testGroup "AR_EG Tests" + [ makeCorpusTest [Seal Numeral] $ withLocale corpus localeEG EG.allExamples + ] + ] + where + localeEG = makeLocale AR $ Just EG