mirror of
https://github.com/facebook/duckling.git
synced 2025-01-07 06:19:10 +03:00
Support Arabic phone numbers
Summary: Support Arabic phone numbers for AR locales. Reviewed By: patapizza Differential Revision: D14591780 fbshipit-source-id: 93875c5b407c3fe2a06276c6fa3af2a26c91639e
This commit is contained in:
parent
830e5e3d8e
commit
b90ff9a64e
64
Duckling/Numeral/AR/Helpers.hs
Normal file
64
Duckling/Numeral/AR/Helpers.hs
Normal file
@ -0,0 +1,64 @@
|
||||
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This source code is licensed under the BSD-style license found in the
|
||||
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
|
||||
{-# LANGUAGE GADTs #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Duckling.Numeral.AR.Helpers
|
||||
( digitsMap
|
||||
, numeralToStringMap
|
||||
, parseArabicIntAsText
|
||||
, parseArabicIntegerFromText
|
||||
) where
|
||||
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import Data.Maybe (mapMaybe)
|
||||
import Data.String
|
||||
import Data.Text (Text)
|
||||
import Duckling.Numeral.Helpers (parseInteger)
|
||||
import Prelude
|
||||
|
||||
import qualified Data.HashMap.Strict as HashMap
|
||||
import qualified Data.Text as Text
|
||||
|
||||
numeralToStringMap :: HashMap Char String
|
||||
numeralToStringMap =
|
||||
HashMap.fromList
|
||||
[ ('٠', "0")
|
||||
, ('١', "1")
|
||||
, ('٢', "2")
|
||||
, ('٣', "3")
|
||||
, ('٤', "4")
|
||||
, ('٥', "5")
|
||||
, ('٦', "6")
|
||||
, ('٧', "7")
|
||||
, ('٨', "8")
|
||||
, ('٩', "9")
|
||||
]
|
||||
|
||||
digitsMap :: HashMap Text Integer
|
||||
digitsMap =
|
||||
HashMap.fromList
|
||||
[ ("عشر", 2)
|
||||
, ("ثلاث", 3)
|
||||
, ("اربع", 4)
|
||||
, ("أربع", 4)
|
||||
, ("خمس", 5)
|
||||
, ("ست", 6)
|
||||
, ("سبع", 7)
|
||||
, ("ثمان", 8)
|
||||
, ("تسع", 9)
|
||||
]
|
||||
|
||||
parseArabicIntAsText :: Text -> Text
|
||||
parseArabicIntAsText intText =
|
||||
Text.pack
|
||||
$ concat
|
||||
$ mapMaybe (`HashMap.lookup` numeralToStringMap) (Text.unpack intText)
|
||||
|
||||
parseArabicIntegerFromText :: Text -> Maybe Integer
|
||||
parseArabicIntegerFromText given = parseInteger $ parseArabicIntAsText given
|
@ -9,10 +9,7 @@
|
||||
{-# LANGUAGE GADTs #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Duckling.Numeral.AR.Rules
|
||||
( rules
|
||||
) where
|
||||
|
||||
module Duckling.Numeral.AR.Rules (rules) where
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import Data.Maybe
|
||||
import Data.String
|
||||
@ -22,6 +19,7 @@ import qualified Data.HashMap.Strict as HashMap
|
||||
import qualified Data.Text as Text
|
||||
|
||||
import Duckling.Dimensions.Types
|
||||
import Duckling.Numeral.AR.Helpers (digitsMap)
|
||||
import Duckling.Numeral.Helpers
|
||||
import Duckling.Numeral.Types (NumeralData (..))
|
||||
import Duckling.Regex.Types
|
||||
@ -62,19 +60,6 @@ ruleInteger18 = Rule
|
||||
, prod = \_ -> integer 12
|
||||
}
|
||||
|
||||
digitsMap :: HashMap Text Integer
|
||||
digitsMap = HashMap.fromList
|
||||
[ ("عشر", 2)
|
||||
, ("ثلاث", 3)
|
||||
, ("اربع", 4)
|
||||
, ("أربع", 4)
|
||||
, ("خمس", 5)
|
||||
, ("ست", 6)
|
||||
, ("سبع", 7)
|
||||
, ("ثمان", 8)
|
||||
, ("تسع", 9)
|
||||
]
|
||||
|
||||
ruleInteger19 :: Rule
|
||||
ruleInteger19 = Rule
|
||||
{ name = "integer (20..90)"
|
||||
|
84
Duckling/PhoneNumber/AR/Corpus.hs
Normal file
84
Duckling/PhoneNumber/AR/Corpus.hs
Normal file
@ -0,0 +1,84 @@
|
||||
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This source code is licensed under the BSD-style license found in the
|
||||
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Duckling.PhoneNumber.AR.Corpus
|
||||
( corpus
|
||||
, negativeCorpus
|
||||
) where
|
||||
|
||||
import Prelude
|
||||
import Data.String
|
||||
|
||||
import Duckling.Locale
|
||||
import Duckling.PhoneNumber.Types
|
||||
import Duckling.Resolve
|
||||
import Duckling.Testing.Types
|
||||
|
||||
corpus :: Corpus
|
||||
corpus =
|
||||
(testContext { locale = makeLocale AR Nothing }, testOptions, allExamples)
|
||||
|
||||
negativeCorpus :: NegativeCorpus
|
||||
negativeCorpus = (testContext, testOptions, xs)
|
||||
where
|
||||
xs =
|
||||
[ "١٢٣٤٥"
|
||||
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦٧٧٧٧٧٧"
|
||||
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦"
|
||||
]
|
||||
|
||||
-- Tests include both unicode characters and equivalent unicode decimal code
|
||||
-- representation because the Arabic phone number regex is constructed with
|
||||
-- unicode decimal form.
|
||||
allExamples :: [Example]
|
||||
allExamples =
|
||||
concat
|
||||
[ examples (PhoneNumberValue "6507018887")
|
||||
[ "٦٥٠٧٠١٨٨٨٧"
|
||||
, "\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
|
||||
, "٦٥٠ ٧٠١ ٨٨٨٧"
|
||||
, "\1638\1637\1632 \1639\1632\1633 \1640\1640\1640\1639"
|
||||
, "٦٥٠-٧٠١-٨٨٨٧"
|
||||
, "\1638\1637\1632-\1639\1632\1633-\1640\1640\1640\1639"
|
||||
]
|
||||
, examples (PhoneNumberValue "(+1) 6507018887")
|
||||
[ "+١ ٦٥٠٧٠١٨٨٨٧"
|
||||
, "+\1633 \1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
|
||||
, "(+١)٦٥٠٧٠١٨٨٨٧"
|
||||
, "(+\1633)\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
|
||||
, "(+١) ٦٥٠ - ٧٠١ ٨٨٨٧"
|
||||
, "(+\1633) \1638\1637\1632 - \1639\1632\1633 \1640\1640\1640\1639"
|
||||
]
|
||||
, examples (PhoneNumberValue "(+33) 146647998")
|
||||
[ "+٣٣ ١ ٤٦٦٤٧٩٩٨"
|
||||
, "+\1635\1635 \1633 \1636\1638\1638\1636\1639\1641\1641\1640"
|
||||
]
|
||||
, examples (PhoneNumberValue "0620702220")
|
||||
[ "٠٦ ٢٠٧٠ ٢٢٢٠"
|
||||
]
|
||||
, examples (PhoneNumberValue "6507018887 ext 897")
|
||||
[ "٦٥٠٧٠١٨٨٨٧ ext ٨٩٧"
|
||||
, "٦٥٠٧٠١٨٨٨٧ x ٨٩٧"
|
||||
, "٦٥٠٧٠١٨٨٨٧ ext. ٨٩٧"
|
||||
]
|
||||
, examples (PhoneNumberValue "6507018887 ext 897")
|
||||
[ "٦٥٠٧٠١٨٨٨٧ فرعي ٨٩٧"
|
||||
]
|
||||
, examples (PhoneNumberValue "(+1) 2025550121")
|
||||
[ "+١-٢٠٢-٥٥٥-٠١٢١"
|
||||
, "+١ ٢٠٢.٥٥٥.٠١٢١"
|
||||
]
|
||||
, examples (PhoneNumberValue "4866827")
|
||||
[ "٤.٨.٦.٦.٨.٢.٧"
|
||||
]
|
||||
, examples (PhoneNumberValue "(+55) 19992842606")
|
||||
[ "(+٥٥) ١٩٩٩٢٨٤٢٦٠٦"
|
||||
]
|
||||
]
|
56
Duckling/PhoneNumber/AR/Rules.hs
Normal file
56
Duckling/PhoneNumber/AR/Rules.hs
Normal file
@ -0,0 +1,56 @@
|
||||
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This source code is licensed under the BSD-style license found in the
|
||||
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
|
||||
{-# LANGUAGE GADTs #-}
|
||||
{-# LANGUAGE NoRebindableSyntax #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
module Duckling.PhoneNumber.AR.Rules (rules) where
|
||||
|
||||
import Duckling.Dimensions.Types
|
||||
import Duckling.Numeral.AR.Helpers
|
||||
( parseArabicIntAsText
|
||||
, parseArabicIntegerFromText
|
||||
)
|
||||
import Duckling.PhoneNumber.Types (PhoneNumberData(..))
|
||||
import Duckling.Regex.Types
|
||||
import Duckling.Types
|
||||
import Prelude
|
||||
|
||||
import qualified Data.Text as Text
|
||||
import qualified Duckling.PhoneNumber.Types as TPhoneNumber
|
||||
|
||||
rulePhoneNumber :: Rule
|
||||
rulePhoneNumber = Rule
|
||||
{ name = "phone number"
|
||||
, pattern =
|
||||
-- Arabic is a right to left langauge except for numbers, which are read
|
||||
-- left to right. This regex uses the unicode range for Arabic numbers
|
||||
-- [\1632-\1641] to make the code easier to read and maintain. The unicode
|
||||
-- sequence \1601\1585\1593\1610, corresponding to فرعي, is a popular
|
||||
-- Arabic equivalent for "extension" and is used in this regex.
|
||||
[ regex $
|
||||
"(?:\\(?\\+([\1632-\1641]{1,2})\\)?[\\s-\\.]*)?" ++ -- area code
|
||||
"((?=[-\1632-\1641()\\s\\.]{6,16}(?:\\s*(?:e?xt?|\1601\1585\1593\1610)?\\.?\\s*(?:[\1632-\1641]{1,20}))?(?:[^\1632-\1641]+|$))(?:[\1632-\1641(]{1,20}(?:[-)\\s\\.]*[\1632-\1641]{1,20}){0,20}){1,20})" ++ -- nums
|
||||
"(?:\\s*(?:e?xt?|\1601\1585\1593\1610)\\.?\\s*([\1632-\1641]{1,20}))?" -- extension
|
||||
]
|
||||
, prod = \xs -> case xs of
|
||||
(Token RegexMatch (GroupMatch (code:nums:ext:_)):_) ->
|
||||
let
|
||||
mnums = parseArabicIntAsText $ cleanup nums
|
||||
cleanup = Text.filter (not . isWhitespace)
|
||||
isWhitespace x = elem x ['.', ' ', '-', '\t', '(', ')']
|
||||
in Just $ Token PhoneNumber $ PhoneNumberData
|
||||
{ TPhoneNumber.prefix = parseArabicIntegerFromText code
|
||||
, TPhoneNumber.number = mnums
|
||||
, TPhoneNumber.extension = parseArabicIntegerFromText ext
|
||||
}
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
rules :: [Rule]
|
||||
rules = [rulePhoneNumber]
|
@ -22,6 +22,7 @@ import qualified Duckling.AmountOfMoney.AR.Rules as AmountOfMoney
|
||||
import qualified Duckling.Duration.AR.Rules as Duration
|
||||
import qualified Duckling.Numeral.AR.Rules as Numeral
|
||||
import qualified Duckling.Ordinal.AR.Rules as Ordinal
|
||||
import qualified Duckling.PhoneNumber.AR.Rules as PhoneNumber
|
||||
import qualified Duckling.Quantity.AR.Rules as Quantity
|
||||
import qualified Duckling.Temperature.AR.Rules as Temperature
|
||||
import qualified Duckling.Time.AR.Rules as Time
|
||||
@ -43,7 +44,7 @@ langRules (This Duration) = Duration.rules
|
||||
langRules (This Email) = []
|
||||
langRules (This Numeral) = Numeral.rules
|
||||
langRules (This Ordinal) = Ordinal.rules
|
||||
langRules (This PhoneNumber) = []
|
||||
langRules (This PhoneNumber) = PhoneNumber.rules
|
||||
langRules (This Quantity) = Quantity.rules
|
||||
langRules (This RegexMatch) = []
|
||||
langRules (This Temperature) = Temperature.rules
|
||||
|
@ -385,6 +385,7 @@ library
|
||||
|
||||
-- Numeral
|
||||
, Duckling.Numeral.AR.Corpus
|
||||
, Duckling.Numeral.AR.Helpers
|
||||
, Duckling.Numeral.AR.Rules
|
||||
, Duckling.Numeral.BG.Corpus
|
||||
, Duckling.Numeral.BG.Rules
|
||||
@ -547,6 +548,8 @@ library
|
||||
, Duckling.Ordinal.Types
|
||||
|
||||
-- PhoneNumber
|
||||
, Duckling.PhoneNumber.AR.Corpus
|
||||
, Duckling.PhoneNumber.AR.Rules
|
||||
, Duckling.PhoneNumber.PT.Corpus
|
||||
, Duckling.PhoneNumber.PT.Rules
|
||||
, Duckling.PhoneNumber.Corpus
|
||||
@ -996,6 +999,7 @@ test-suite duckling-test
|
||||
, Duckling.Ordinal.Tests
|
||||
|
||||
-- PhoneNumber
|
||||
, Duckling.PhoneNumber.AR.Tests
|
||||
, Duckling.PhoneNumber.PT.Tests
|
||||
, Duckling.PhoneNumber.Tests
|
||||
|
||||
|
24
tests/Duckling/PhoneNumber/AR/Tests.hs
Normal file
24
tests/Duckling/PhoneNumber/AR/Tests.hs
Normal file
@ -0,0 +1,24 @@
|
||||
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This source code is licensed under the BSD-style license found in the
|
||||
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
|
||||
module Duckling.PhoneNumber.AR.Tests
|
||||
( tests
|
||||
) where
|
||||
|
||||
import Data.String
|
||||
import Test.Tasty
|
||||
|
||||
import Duckling.Dimensions.Types
|
||||
import Duckling.PhoneNumber.AR.Corpus
|
||||
import Duckling.Testing.Asserts
|
||||
|
||||
tests :: TestTree
|
||||
tests = testGroup "PhoneNumber Tests"
|
||||
[ makeCorpusTest [This PhoneNumber] corpus
|
||||
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
|
||||
]
|
@ -22,6 +22,7 @@ import Duckling.PhoneNumber.Corpus
|
||||
import Duckling.PhoneNumber.Types
|
||||
import Duckling.Testing.Asserts
|
||||
import Duckling.Testing.Types
|
||||
import qualified Duckling.PhoneNumber.AR.Tests as AR
|
||||
import qualified Duckling.PhoneNumber.PT.Tests as PT
|
||||
|
||||
tests :: TestTree
|
||||
@ -30,6 +31,7 @@ tests = testGroup "PhoneNumber Tests"
|
||||
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
|
||||
, surroundTests
|
||||
, PT.tests
|
||||
, AR.tests
|
||||
]
|
||||
|
||||
surroundTests :: TestTree
|
||||
|
Loading…
Reference in New Issue
Block a user