Support Arabic phone numbers

Summary: Support Arabic phone numbers for AR locales.

Reviewed By: patapizza

Differential Revision: D14591780

fbshipit-source-id: 93875c5b407c3fe2a06276c6fa3af2a26c91639e
This commit is contained in:
Nathan Hausman 2019-03-29 09:35:28 -07:00 committed by Facebook Github Bot
parent 830e5e3d8e
commit b90ff9a64e
8 changed files with 238 additions and 18 deletions

View File

@ -0,0 +1,64 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.AR.Helpers
( digitsMap
, numeralToStringMap
, parseArabicIntAsText
, parseArabicIntegerFromText
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe (mapMaybe)
import Data.String
import Data.Text (Text)
import Duckling.Numeral.Helpers (parseInteger)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
numeralToStringMap :: HashMap Char String
numeralToStringMap =
HashMap.fromList
[ ('٠', "0")
, ('١', "1")
, ('٢', "2")
, ('٣', "3")
, ('٤', "4")
, ('٥', "5")
, ('٦', "6")
, ('٧', "7")
, ('٨', "8")
, ('٩', "9")
]
digitsMap :: HashMap Text Integer
digitsMap =
HashMap.fromList
[ ("عشر", 2)
, ("ثلاث", 3)
, ("اربع", 4)
, ("أربع", 4)
, ("خمس", 5)
, ("ست", 6)
, ("سبع", 7)
, ("ثمان", 8)
, ("تسع", 9)
]
parseArabicIntAsText :: Text -> Text
parseArabicIntAsText intText =
Text.pack
$ concat
$ mapMaybe (`HashMap.lookup` numeralToStringMap) (Text.unpack intText)
parseArabicIntegerFromText :: Text -> Maybe Integer
parseArabicIntegerFromText given = parseInteger $ parseArabicIntAsText given

View File

@ -9,10 +9,7 @@
{-# LANGUAGE GADTs #-} {-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.AR.Rules module Duckling.Numeral.AR.Rules (rules) where
( rules
) where
import Data.HashMap.Strict (HashMap) import Data.HashMap.Strict (HashMap)
import Data.Maybe import Data.Maybe
import Data.String import Data.String
@ -22,6 +19,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text import qualified Data.Text as Text
import Duckling.Dimensions.Types import Duckling.Dimensions.Types
import Duckling.Numeral.AR.Helpers (digitsMap)
import Duckling.Numeral.Helpers import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..)) import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types import Duckling.Regex.Types
@ -62,19 +60,6 @@ ruleInteger18 = Rule
, prod = \_ -> integer 12 , prod = \_ -> integer 12
} }
digitsMap :: HashMap Text Integer
digitsMap = HashMap.fromList
[ ("عشر", 2)
, ("ثلاث", 3)
, ("اربع", 4)
, ("أربع", 4)
, ("خمس", 5)
, ("ست", 6)
, ("سبع", 7)
, ("ثمان", 8)
, ("تسع", 9)
]
ruleInteger19 :: Rule ruleInteger19 :: Rule
ruleInteger19 = Rule ruleInteger19 = Rule
{ name = "integer (20..90)" { name = "integer (20..90)"

View File

@ -0,0 +1,84 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.PhoneNumber.AR.Corpus
( corpus
, negativeCorpus
) where
import Prelude
import Data.String
import Duckling.Locale
import Duckling.PhoneNumber.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus =
(testContext { locale = makeLocale AR Nothing }, testOptions, allExamples)
negativeCorpus :: NegativeCorpus
negativeCorpus = (testContext, testOptions, xs)
where
xs =
[ "١٢٣٤٥"
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦٧٧٧٧٧٧"
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦"
]
-- Tests include both unicode characters and equivalent unicode decimal code
-- representation because the Arabic phone number regex is constructed with
-- unicode decimal form.
allExamples :: [Example]
allExamples =
concat
[ examples (PhoneNumberValue "6507018887")
[ "٦٥٠٧٠١٨٨٨٧"
, "\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
, "٦٥٠ ٧٠١ ٨٨٨٧"
, "\1638\1637\1632 \1639\1632\1633 \1640\1640\1640\1639"
, "٦٥٠-٧٠١-٨٨٨٧"
, "\1638\1637\1632-\1639\1632\1633-\1640\1640\1640\1639"
]
, examples (PhoneNumberValue "(+1) 6507018887")
[ "+١ ٦٥٠٧٠١٨٨٨٧"
, "+\1633 \1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
, "(+١)٦٥٠٧٠١٨٨٨٧"
, "(+\1633)\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
, "(+١) ٦٥٠ - ٧٠١ ٨٨٨٧"
, "(+\1633) \1638\1637\1632 - \1639\1632\1633 \1640\1640\1640\1639"
]
, examples (PhoneNumberValue "(+33) 146647998")
[ "+٣٣ ١ ٤٦٦٤٧٩٩٨"
, "+\1635\1635 \1633 \1636\1638\1638\1636\1639\1641\1641\1640"
]
, examples (PhoneNumberValue "0620702220")
[ "٠٦ ٢٠٧٠ ٢٢٢٠"
]
, examples (PhoneNumberValue "6507018887 ext 897")
[ "٦٥٠٧٠١٨٨٨٧ ext ٨٩٧"
, "٦٥٠٧٠١٨٨٨٧ x ٨٩٧"
, "٦٥٠٧٠١٨٨٨٧ ext. ٨٩٧"
]
, examples (PhoneNumberValue "6507018887 ext 897")
[ "٦٥٠٧٠١٨٨٨٧ فرعي ٨٩٧"
]
, examples (PhoneNumberValue "(+1) 2025550121")
[ "+١-٢٠٢-٥٥٥-٠١٢١"
, "+١ ٢٠٢.٥٥٥.٠١٢١"
]
, examples (PhoneNumberValue "4866827")
[ "٤.٨.٦.٦.٨.٢.٧"
]
, examples (PhoneNumberValue "(+55) 19992842606")
[ "(+٥٥) ١٩٩٩٢٨٤٢٦٠٦"
]
]

View File

@ -0,0 +1,56 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.PhoneNumber.AR.Rules (rules) where
import Duckling.Dimensions.Types
import Duckling.Numeral.AR.Helpers
( parseArabicIntAsText
, parseArabicIntegerFromText
)
import Duckling.PhoneNumber.Types (PhoneNumberData(..))
import Duckling.Regex.Types
import Duckling.Types
import Prelude
import qualified Data.Text as Text
import qualified Duckling.PhoneNumber.Types as TPhoneNumber
rulePhoneNumber :: Rule
rulePhoneNumber = Rule
{ name = "phone number"
, pattern =
-- Arabic is a right to left langauge except for numbers, which are read
-- left to right. This regex uses the unicode range for Arabic numbers
-- [\1632-\1641] to make the code easier to read and maintain. The unicode
-- sequence \1601\1585\1593\1610, corresponding to فرعي, is a popular
-- Arabic equivalent for "extension" and is used in this regex.
[ regex $
"(?:\\(?\\+([\1632-\1641]{1,2})\\)?[\\s-\\.]*)?" ++ -- area code
"((?=[-\1632-\1641()\\s\\.]{6,16}(?:\\s*(?:e?xt?|\1601\1585\1593\1610)?\\.?\\s*(?:[\1632-\1641]{1,20}))?(?:[^\1632-\1641]+|$))(?:[\1632-\1641(]{1,20}(?:[-)\\s\\.]*[\1632-\1641]{1,20}){0,20}){1,20})" ++ -- nums
"(?:\\s*(?:e?xt?|\1601\1585\1593\1610)\\.?\\s*([\1632-\1641]{1,20}))?" -- extension
]
, prod = \xs -> case xs of
(Token RegexMatch (GroupMatch (code:nums:ext:_)):_) ->
let
mnums = parseArabicIntAsText $ cleanup nums
cleanup = Text.filter (not . isWhitespace)
isWhitespace x = elem x ['.', ' ', '-', '\t', '(', ')']
in Just $ Token PhoneNumber $ PhoneNumberData
{ TPhoneNumber.prefix = parseArabicIntegerFromText code
, TPhoneNumber.number = mnums
, TPhoneNumber.extension = parseArabicIntegerFromText ext
}
_ -> Nothing
}
rules :: [Rule]
rules = [rulePhoneNumber]

View File

@ -22,6 +22,7 @@ import qualified Duckling.AmountOfMoney.AR.Rules as AmountOfMoney
import qualified Duckling.Duration.AR.Rules as Duration import qualified Duckling.Duration.AR.Rules as Duration
import qualified Duckling.Numeral.AR.Rules as Numeral import qualified Duckling.Numeral.AR.Rules as Numeral
import qualified Duckling.Ordinal.AR.Rules as Ordinal import qualified Duckling.Ordinal.AR.Rules as Ordinal
import qualified Duckling.PhoneNumber.AR.Rules as PhoneNumber
import qualified Duckling.Quantity.AR.Rules as Quantity import qualified Duckling.Quantity.AR.Rules as Quantity
import qualified Duckling.Temperature.AR.Rules as Temperature import qualified Duckling.Temperature.AR.Rules as Temperature
import qualified Duckling.Time.AR.Rules as Time import qualified Duckling.Time.AR.Rules as Time
@ -43,7 +44,7 @@ langRules (This Duration) = Duration.rules
langRules (This Email) = [] langRules (This Email) = []
langRules (This Numeral) = Numeral.rules langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = Ordinal.rules langRules (This Ordinal) = Ordinal.rules
langRules (This PhoneNumber) = [] langRules (This PhoneNumber) = PhoneNumber.rules
langRules (This Quantity) = Quantity.rules langRules (This Quantity) = Quantity.rules
langRules (This RegexMatch) = [] langRules (This RegexMatch) = []
langRules (This Temperature) = Temperature.rules langRules (This Temperature) = Temperature.rules

View File

@ -385,6 +385,7 @@ library
-- Numeral -- Numeral
, Duckling.Numeral.AR.Corpus , Duckling.Numeral.AR.Corpus
, Duckling.Numeral.AR.Helpers
, Duckling.Numeral.AR.Rules , Duckling.Numeral.AR.Rules
, Duckling.Numeral.BG.Corpus , Duckling.Numeral.BG.Corpus
, Duckling.Numeral.BG.Rules , Duckling.Numeral.BG.Rules
@ -547,6 +548,8 @@ library
, Duckling.Ordinal.Types , Duckling.Ordinal.Types
-- PhoneNumber -- PhoneNumber
, Duckling.PhoneNumber.AR.Corpus
, Duckling.PhoneNumber.AR.Rules
, Duckling.PhoneNumber.PT.Corpus , Duckling.PhoneNumber.PT.Corpus
, Duckling.PhoneNumber.PT.Rules , Duckling.PhoneNumber.PT.Rules
, Duckling.PhoneNumber.Corpus , Duckling.PhoneNumber.Corpus
@ -996,6 +999,7 @@ test-suite duckling-test
, Duckling.Ordinal.Tests , Duckling.Ordinal.Tests
-- PhoneNumber -- PhoneNumber
, Duckling.PhoneNumber.AR.Tests
, Duckling.PhoneNumber.PT.Tests , Duckling.PhoneNumber.PT.Tests
, Duckling.PhoneNumber.Tests , Duckling.PhoneNumber.Tests

View File

@ -0,0 +1,24 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.PhoneNumber.AR.Tests
( tests
) where
import Data.String
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.PhoneNumber.AR.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "PhoneNumber Tests"
[ makeCorpusTest [This PhoneNumber] corpus
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
]

View File

@ -22,6 +22,7 @@ import Duckling.PhoneNumber.Corpus
import Duckling.PhoneNumber.Types import Duckling.PhoneNumber.Types
import Duckling.Testing.Asserts import Duckling.Testing.Asserts
import Duckling.Testing.Types import Duckling.Testing.Types
import qualified Duckling.PhoneNumber.AR.Tests as AR
import qualified Duckling.PhoneNumber.PT.Tests as PT import qualified Duckling.PhoneNumber.PT.Tests as PT
tests :: TestTree tests :: TestTree
@ -30,6 +31,7 @@ tests = testGroup "PhoneNumber Tests"
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus , makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
, surroundTests , surroundTests
, PT.tests , PT.tests
, AR.tests
] ]
surroundTests :: TestTree surroundTests :: TestTree