Support Arabic phone numbers

Summary: Support Arabic phone numbers for AR locales.

Reviewed By: patapizza

Differential Revision: D14591780

fbshipit-source-id: 93875c5b407c3fe2a06276c6fa3af2a26c91639e
This commit is contained in:
Nathan Hausman 2019-03-29 09:35:28 -07:00 committed by Facebook Github Bot
parent 830e5e3d8e
commit b90ff9a64e
8 changed files with 238 additions and 18 deletions

View File

@ -0,0 +1,64 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.AR.Helpers
( digitsMap
, numeralToStringMap
, parseArabicIntAsText
, parseArabicIntegerFromText
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe (mapMaybe)
import Data.String
import Data.Text (Text)
import Duckling.Numeral.Helpers (parseInteger)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
numeralToStringMap :: HashMap Char String
numeralToStringMap =
HashMap.fromList
[ ('٠', "0")
, ('١', "1")
, ('٢', "2")
, ('٣', "3")
, ('٤', "4")
, ('٥', "5")
, ('٦', "6")
, ('٧', "7")
, ('٨', "8")
, ('٩', "9")
]
digitsMap :: HashMap Text Integer
digitsMap =
HashMap.fromList
[ ("عشر", 2)
, ("ثلاث", 3)
, ("اربع", 4)
, ("أربع", 4)
, ("خمس", 5)
, ("ست", 6)
, ("سبع", 7)
, ("ثمان", 8)
, ("تسع", 9)
]
parseArabicIntAsText :: Text -> Text
parseArabicIntAsText intText =
Text.pack
$ concat
$ mapMaybe (`HashMap.lookup` numeralToStringMap) (Text.unpack intText)
parseArabicIntegerFromText :: Text -> Maybe Integer
parseArabicIntegerFromText given = parseInteger $ parseArabicIntAsText given

View File

@ -9,10 +9,7 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.AR.Rules
( rules
) where
module Duckling.Numeral.AR.Rules (rules) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
@ -22,6 +19,7 @@ import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.AR.Helpers (digitsMap)
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
@ -62,19 +60,6 @@ ruleInteger18 = Rule
, prod = \_ -> integer 12
}
digitsMap :: HashMap Text Integer
digitsMap = HashMap.fromList
[ ("عشر", 2)
, ("ثلاث", 3)
, ("اربع", 4)
, ("أربع", 4)
, ("خمس", 5)
, ("ست", 6)
, ("سبع", 7)
, ("ثمان", 8)
, ("تسع", 9)
]
ruleInteger19 :: Rule
ruleInteger19 = Rule
{ name = "integer (20..90)"

View File

@ -0,0 +1,84 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.PhoneNumber.AR.Corpus
( corpus
, negativeCorpus
) where
import Prelude
import Data.String
import Duckling.Locale
import Duckling.PhoneNumber.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus =
(testContext { locale = makeLocale AR Nothing }, testOptions, allExamples)
negativeCorpus :: NegativeCorpus
negativeCorpus = (testContext, testOptions, xs)
where
xs =
[ "١٢٣٤٥"
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦٧٧٧٧٧٧"
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦"
]
-- Tests include both unicode characters and equivalent unicode decimal code
-- representation because the Arabic phone number regex is constructed with
-- unicode decimal form.
allExamples :: [Example]
allExamples =
concat
[ examples (PhoneNumberValue "6507018887")
[ "٦٥٠٧٠١٨٨٨٧"
, "\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
, "٦٥٠ ٧٠١ ٨٨٨٧"
, "\1638\1637\1632 \1639\1632\1633 \1640\1640\1640\1639"
, "٦٥٠-٧٠١-٨٨٨٧"
, "\1638\1637\1632-\1639\1632\1633-\1640\1640\1640\1639"
]
, examples (PhoneNumberValue "(+1) 6507018887")
[ "+١ ٦٥٠٧٠١٨٨٨٧"
, "+\1633 \1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
, "(+١)٦٥٠٧٠١٨٨٨٧"
, "(+\1633)\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
, "(+١) ٦٥٠ - ٧٠١ ٨٨٨٧"
, "(+\1633) \1638\1637\1632 - \1639\1632\1633 \1640\1640\1640\1639"
]
, examples (PhoneNumberValue "(+33) 146647998")
[ "+٣٣ ١ ٤٦٦٤٧٩٩٨"
, "+\1635\1635 \1633 \1636\1638\1638\1636\1639\1641\1641\1640"
]
, examples (PhoneNumberValue "0620702220")
[ "٠٦ ٢٠٧٠ ٢٢٢٠"
]
, examples (PhoneNumberValue "6507018887 ext 897")
[ "٦٥٠٧٠١٨٨٨٧ ext ٨٩٧"
, "٦٥٠٧٠١٨٨٨٧ x ٨٩٧"
, "٦٥٠٧٠١٨٨٨٧ ext. ٨٩٧"
]
, examples (PhoneNumberValue "6507018887 ext 897")
[ "٦٥٠٧٠١٨٨٨٧ فرعي ٨٩٧"
]
, examples (PhoneNumberValue "(+1) 2025550121")
[ "+١-٢٠٢-٥٥٥-٠١٢١"
, "+١ ٢٠٢.٥٥٥.٠١٢١"
]
, examples (PhoneNumberValue "4866827")
[ "٤.٨.٦.٦.٨.٢.٧"
]
, examples (PhoneNumberValue "(+55) 19992842606")
[ "(+٥٥) ١٩٩٩٢٨٤٢٦٠٦"
]
]

View File

@ -0,0 +1,56 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.PhoneNumber.AR.Rules (rules) where
import Duckling.Dimensions.Types
import Duckling.Numeral.AR.Helpers
( parseArabicIntAsText
, parseArabicIntegerFromText
)
import Duckling.PhoneNumber.Types (PhoneNumberData(..))
import Duckling.Regex.Types
import Duckling.Types
import Prelude
import qualified Data.Text as Text
import qualified Duckling.PhoneNumber.Types as TPhoneNumber
rulePhoneNumber :: Rule
rulePhoneNumber = Rule
{ name = "phone number"
, pattern =
-- Arabic is a right to left langauge except for numbers, which are read
-- left to right. This regex uses the unicode range for Arabic numbers
-- [\1632-\1641] to make the code easier to read and maintain. The unicode
-- sequence \1601\1585\1593\1610, corresponding to فرعي, is a popular
-- Arabic equivalent for "extension" and is used in this regex.
[ regex $
"(?:\\(?\\+([\1632-\1641]{1,2})\\)?[\\s-\\.]*)?" ++ -- area code
"((?=[-\1632-\1641()\\s\\.]{6,16}(?:\\s*(?:e?xt?|\1601\1585\1593\1610)?\\.?\\s*(?:[\1632-\1641]{1,20}))?(?:[^\1632-\1641]+|$))(?:[\1632-\1641(]{1,20}(?:[-)\\s\\.]*[\1632-\1641]{1,20}){0,20}){1,20})" ++ -- nums
"(?:\\s*(?:e?xt?|\1601\1585\1593\1610)\\.?\\s*([\1632-\1641]{1,20}))?" -- extension
]
, prod = \xs -> case xs of
(Token RegexMatch (GroupMatch (code:nums:ext:_)):_) ->
let
mnums = parseArabicIntAsText $ cleanup nums
cleanup = Text.filter (not . isWhitespace)
isWhitespace x = elem x ['.', ' ', '-', '\t', '(', ')']
in Just $ Token PhoneNumber $ PhoneNumberData
{ TPhoneNumber.prefix = parseArabicIntegerFromText code
, TPhoneNumber.number = mnums
, TPhoneNumber.extension = parseArabicIntegerFromText ext
}
_ -> Nothing
}
rules :: [Rule]
rules = [rulePhoneNumber]

View File

@ -22,6 +22,7 @@ import qualified Duckling.AmountOfMoney.AR.Rules as AmountOfMoney
import qualified Duckling.Duration.AR.Rules as Duration
import qualified Duckling.Numeral.AR.Rules as Numeral
import qualified Duckling.Ordinal.AR.Rules as Ordinal
import qualified Duckling.PhoneNumber.AR.Rules as PhoneNumber
import qualified Duckling.Quantity.AR.Rules as Quantity
import qualified Duckling.Temperature.AR.Rules as Temperature
import qualified Duckling.Time.AR.Rules as Time
@ -43,7 +44,7 @@ langRules (This Duration) = Duration.rules
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = Ordinal.rules
langRules (This PhoneNumber) = []
langRules (This PhoneNumber) = PhoneNumber.rules
langRules (This Quantity) = Quantity.rules
langRules (This RegexMatch) = []
langRules (This Temperature) = Temperature.rules

View File

@ -385,6 +385,7 @@ library
-- Numeral
, Duckling.Numeral.AR.Corpus
, Duckling.Numeral.AR.Helpers
, Duckling.Numeral.AR.Rules
, Duckling.Numeral.BG.Corpus
, Duckling.Numeral.BG.Rules
@ -547,6 +548,8 @@ library
, Duckling.Ordinal.Types
-- PhoneNumber
, Duckling.PhoneNumber.AR.Corpus
, Duckling.PhoneNumber.AR.Rules
, Duckling.PhoneNumber.PT.Corpus
, Duckling.PhoneNumber.PT.Rules
, Duckling.PhoneNumber.Corpus
@ -996,6 +999,7 @@ test-suite duckling-test
, Duckling.Ordinal.Tests
-- PhoneNumber
, Duckling.PhoneNumber.AR.Tests
, Duckling.PhoneNumber.PT.Tests
, Duckling.PhoneNumber.Tests

View File

@ -0,0 +1,24 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.PhoneNumber.AR.Tests
( tests
) where
import Data.String
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.PhoneNumber.AR.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "PhoneNumber Tests"
[ makeCorpusTest [This PhoneNumber] corpus
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
]

View File

@ -22,6 +22,7 @@ import Duckling.PhoneNumber.Corpus
import Duckling.PhoneNumber.Types
import Duckling.Testing.Asserts
import Duckling.Testing.Types
import qualified Duckling.PhoneNumber.AR.Tests as AR
import qualified Duckling.PhoneNumber.PT.Tests as PT
tests :: TestTree
@ -30,6 +31,7 @@ tests = testGroup "PhoneNumber Tests"
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
, surroundTests
, PT.tests
, AR.tests
]
surroundTests :: TestTree