mirror of
https://github.com/facebook/duckling.git
synced 2025-01-08 06:46:39 +03:00
Support Arabic phone numbers
Summary: Support Arabic phone numbers for AR locales. Reviewed By: patapizza Differential Revision: D14591780 fbshipit-source-id: 93875c5b407c3fe2a06276c6fa3af2a26c91639e
This commit is contained in:
parent
830e5e3d8e
commit
b90ff9a64e
64
Duckling/Numeral/AR/Helpers.hs
Normal file
64
Duckling/Numeral/AR/Helpers.hs
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
-- All rights reserved.
|
||||||
|
--
|
||||||
|
-- This source code is licensed under the BSD-style license found in the
|
||||||
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
|
||||||
|
{-# LANGUAGE GADTs #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
module Duckling.Numeral.AR.Helpers
|
||||||
|
( digitsMap
|
||||||
|
, numeralToStringMap
|
||||||
|
, parseArabicIntAsText
|
||||||
|
, parseArabicIntegerFromText
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.HashMap.Strict (HashMap)
|
||||||
|
import Data.Maybe (mapMaybe)
|
||||||
|
import Data.String
|
||||||
|
import Data.Text (Text)
|
||||||
|
import Duckling.Numeral.Helpers (parseInteger)
|
||||||
|
import Prelude
|
||||||
|
|
||||||
|
import qualified Data.HashMap.Strict as HashMap
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
|
numeralToStringMap :: HashMap Char String
|
||||||
|
numeralToStringMap =
|
||||||
|
HashMap.fromList
|
||||||
|
[ ('٠', "0")
|
||||||
|
, ('١', "1")
|
||||||
|
, ('٢', "2")
|
||||||
|
, ('٣', "3")
|
||||||
|
, ('٤', "4")
|
||||||
|
, ('٥', "5")
|
||||||
|
, ('٦', "6")
|
||||||
|
, ('٧', "7")
|
||||||
|
, ('٨', "8")
|
||||||
|
, ('٩', "9")
|
||||||
|
]
|
||||||
|
|
||||||
|
digitsMap :: HashMap Text Integer
|
||||||
|
digitsMap =
|
||||||
|
HashMap.fromList
|
||||||
|
[ ("عشر", 2)
|
||||||
|
, ("ثلاث", 3)
|
||||||
|
, ("اربع", 4)
|
||||||
|
, ("أربع", 4)
|
||||||
|
, ("خمس", 5)
|
||||||
|
, ("ست", 6)
|
||||||
|
, ("سبع", 7)
|
||||||
|
, ("ثمان", 8)
|
||||||
|
, ("تسع", 9)
|
||||||
|
]
|
||||||
|
|
||||||
|
parseArabicIntAsText :: Text -> Text
|
||||||
|
parseArabicIntAsText intText =
|
||||||
|
Text.pack
|
||||||
|
$ concat
|
||||||
|
$ mapMaybe (`HashMap.lookup` numeralToStringMap) (Text.unpack intText)
|
||||||
|
|
||||||
|
parseArabicIntegerFromText :: Text -> Maybe Integer
|
||||||
|
parseArabicIntegerFromText given = parseInteger $ parseArabicIntAsText given
|
@ -9,10 +9,7 @@
|
|||||||
{-# LANGUAGE GADTs #-}
|
{-# LANGUAGE GADTs #-}
|
||||||
{-# LANGUAGE OverloadedStrings #-}
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
module Duckling.Numeral.AR.Rules
|
module Duckling.Numeral.AR.Rules (rules) where
|
||||||
( rules
|
|
||||||
) where
|
|
||||||
|
|
||||||
import Data.HashMap.Strict (HashMap)
|
import Data.HashMap.Strict (HashMap)
|
||||||
import Data.Maybe
|
import Data.Maybe
|
||||||
import Data.String
|
import Data.String
|
||||||
@ -22,6 +19,7 @@ import qualified Data.HashMap.Strict as HashMap
|
|||||||
import qualified Data.Text as Text
|
import qualified Data.Text as Text
|
||||||
|
|
||||||
import Duckling.Dimensions.Types
|
import Duckling.Dimensions.Types
|
||||||
|
import Duckling.Numeral.AR.Helpers (digitsMap)
|
||||||
import Duckling.Numeral.Helpers
|
import Duckling.Numeral.Helpers
|
||||||
import Duckling.Numeral.Types (NumeralData (..))
|
import Duckling.Numeral.Types (NumeralData (..))
|
||||||
import Duckling.Regex.Types
|
import Duckling.Regex.Types
|
||||||
@ -62,19 +60,6 @@ ruleInteger18 = Rule
|
|||||||
, prod = \_ -> integer 12
|
, prod = \_ -> integer 12
|
||||||
}
|
}
|
||||||
|
|
||||||
digitsMap :: HashMap Text Integer
|
|
||||||
digitsMap = HashMap.fromList
|
|
||||||
[ ("عشر", 2)
|
|
||||||
, ("ثلاث", 3)
|
|
||||||
, ("اربع", 4)
|
|
||||||
, ("أربع", 4)
|
|
||||||
, ("خمس", 5)
|
|
||||||
, ("ست", 6)
|
|
||||||
, ("سبع", 7)
|
|
||||||
, ("ثمان", 8)
|
|
||||||
, ("تسع", 9)
|
|
||||||
]
|
|
||||||
|
|
||||||
ruleInteger19 :: Rule
|
ruleInteger19 :: Rule
|
||||||
ruleInteger19 = Rule
|
ruleInteger19 = Rule
|
||||||
{ name = "integer (20..90)"
|
{ name = "integer (20..90)"
|
||||||
|
84
Duckling/PhoneNumber/AR/Corpus.hs
Normal file
84
Duckling/PhoneNumber/AR/Corpus.hs
Normal file
@ -0,0 +1,84 @@
|
|||||||
|
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
-- All rights reserved.
|
||||||
|
--
|
||||||
|
-- This source code is licensed under the BSD-style license found in the
|
||||||
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Duckling.PhoneNumber.AR.Corpus
|
||||||
|
( corpus
|
||||||
|
, negativeCorpus
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Prelude
|
||||||
|
import Data.String
|
||||||
|
|
||||||
|
import Duckling.Locale
|
||||||
|
import Duckling.PhoneNumber.Types
|
||||||
|
import Duckling.Resolve
|
||||||
|
import Duckling.Testing.Types
|
||||||
|
|
||||||
|
corpus :: Corpus
|
||||||
|
corpus =
|
||||||
|
(testContext { locale = makeLocale AR Nothing }, testOptions, allExamples)
|
||||||
|
|
||||||
|
negativeCorpus :: NegativeCorpus
|
||||||
|
negativeCorpus = (testContext, testOptions, xs)
|
||||||
|
where
|
||||||
|
xs =
|
||||||
|
[ "١٢٣٤٥"
|
||||||
|
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦٧٧٧٧٧٧"
|
||||||
|
, "١٢٣٤٥٦٧٨٩٠١٢٣٤٥٦"
|
||||||
|
]
|
||||||
|
|
||||||
|
-- Tests include both unicode characters and equivalent unicode decimal code
|
||||||
|
-- representation because the Arabic phone number regex is constructed with
|
||||||
|
-- unicode decimal form.
|
||||||
|
allExamples :: [Example]
|
||||||
|
allExamples =
|
||||||
|
concat
|
||||||
|
[ examples (PhoneNumberValue "6507018887")
|
||||||
|
[ "٦٥٠٧٠١٨٨٨٧"
|
||||||
|
, "\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
|
||||||
|
, "٦٥٠ ٧٠١ ٨٨٨٧"
|
||||||
|
, "\1638\1637\1632 \1639\1632\1633 \1640\1640\1640\1639"
|
||||||
|
, "٦٥٠-٧٠١-٨٨٨٧"
|
||||||
|
, "\1638\1637\1632-\1639\1632\1633-\1640\1640\1640\1639"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "(+1) 6507018887")
|
||||||
|
[ "+١ ٦٥٠٧٠١٨٨٨٧"
|
||||||
|
, "+\1633 \1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
|
||||||
|
, "(+١)٦٥٠٧٠١٨٨٨٧"
|
||||||
|
, "(+\1633)\1638\1637\1632\1639\1632\1633\1640\1640\1640\1639"
|
||||||
|
, "(+١) ٦٥٠ - ٧٠١ ٨٨٨٧"
|
||||||
|
, "(+\1633) \1638\1637\1632 - \1639\1632\1633 \1640\1640\1640\1639"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "(+33) 146647998")
|
||||||
|
[ "+٣٣ ١ ٤٦٦٤٧٩٩٨"
|
||||||
|
, "+\1635\1635 \1633 \1636\1638\1638\1636\1639\1641\1641\1640"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "0620702220")
|
||||||
|
[ "٠٦ ٢٠٧٠ ٢٢٢٠"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "6507018887 ext 897")
|
||||||
|
[ "٦٥٠٧٠١٨٨٨٧ ext ٨٩٧"
|
||||||
|
, "٦٥٠٧٠١٨٨٨٧ x ٨٩٧"
|
||||||
|
, "٦٥٠٧٠١٨٨٨٧ ext. ٨٩٧"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "6507018887 ext 897")
|
||||||
|
[ "٦٥٠٧٠١٨٨٨٧ فرعي ٨٩٧"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "(+1) 2025550121")
|
||||||
|
[ "+١-٢٠٢-٥٥٥-٠١٢١"
|
||||||
|
, "+١ ٢٠٢.٥٥٥.٠١٢١"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "4866827")
|
||||||
|
[ "٤.٨.٦.٦.٨.٢.٧"
|
||||||
|
]
|
||||||
|
, examples (PhoneNumberValue "(+55) 19992842606")
|
||||||
|
[ "(+٥٥) ١٩٩٩٢٨٤٢٦٠٦"
|
||||||
|
]
|
||||||
|
]
|
56
Duckling/PhoneNumber/AR/Rules.hs
Normal file
56
Duckling/PhoneNumber/AR/Rules.hs
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
-- All rights reserved.
|
||||||
|
--
|
||||||
|
-- This source code is licensed under the BSD-style license found in the
|
||||||
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
|
||||||
|
{-# LANGUAGE GADTs #-}
|
||||||
|
{-# LANGUAGE NoRebindableSyntax #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
module Duckling.PhoneNumber.AR.Rules (rules) where
|
||||||
|
|
||||||
|
import Duckling.Dimensions.Types
|
||||||
|
import Duckling.Numeral.AR.Helpers
|
||||||
|
( parseArabicIntAsText
|
||||||
|
, parseArabicIntegerFromText
|
||||||
|
)
|
||||||
|
import Duckling.PhoneNumber.Types (PhoneNumberData(..))
|
||||||
|
import Duckling.Regex.Types
|
||||||
|
import Duckling.Types
|
||||||
|
import Prelude
|
||||||
|
|
||||||
|
import qualified Data.Text as Text
|
||||||
|
import qualified Duckling.PhoneNumber.Types as TPhoneNumber
|
||||||
|
|
||||||
|
rulePhoneNumber :: Rule
|
||||||
|
rulePhoneNumber = Rule
|
||||||
|
{ name = "phone number"
|
||||||
|
, pattern =
|
||||||
|
-- Arabic is a right to left langauge except for numbers, which are read
|
||||||
|
-- left to right. This regex uses the unicode range for Arabic numbers
|
||||||
|
-- [\1632-\1641] to make the code easier to read and maintain. The unicode
|
||||||
|
-- sequence \1601\1585\1593\1610, corresponding to فرعي, is a popular
|
||||||
|
-- Arabic equivalent for "extension" and is used in this regex.
|
||||||
|
[ regex $
|
||||||
|
"(?:\\(?\\+([\1632-\1641]{1,2})\\)?[\\s-\\.]*)?" ++ -- area code
|
||||||
|
"((?=[-\1632-\1641()\\s\\.]{6,16}(?:\\s*(?:e?xt?|\1601\1585\1593\1610)?\\.?\\s*(?:[\1632-\1641]{1,20}))?(?:[^\1632-\1641]+|$))(?:[\1632-\1641(]{1,20}(?:[-)\\s\\.]*[\1632-\1641]{1,20}){0,20}){1,20})" ++ -- nums
|
||||||
|
"(?:\\s*(?:e?xt?|\1601\1585\1593\1610)\\.?\\s*([\1632-\1641]{1,20}))?" -- extension
|
||||||
|
]
|
||||||
|
, prod = \xs -> case xs of
|
||||||
|
(Token RegexMatch (GroupMatch (code:nums:ext:_)):_) ->
|
||||||
|
let
|
||||||
|
mnums = parseArabicIntAsText $ cleanup nums
|
||||||
|
cleanup = Text.filter (not . isWhitespace)
|
||||||
|
isWhitespace x = elem x ['.', ' ', '-', '\t', '(', ')']
|
||||||
|
in Just $ Token PhoneNumber $ PhoneNumberData
|
||||||
|
{ TPhoneNumber.prefix = parseArabicIntegerFromText code
|
||||||
|
, TPhoneNumber.number = mnums
|
||||||
|
, TPhoneNumber.extension = parseArabicIntegerFromText ext
|
||||||
|
}
|
||||||
|
_ -> Nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
rules :: [Rule]
|
||||||
|
rules = [rulePhoneNumber]
|
@ -22,6 +22,7 @@ import qualified Duckling.AmountOfMoney.AR.Rules as AmountOfMoney
|
|||||||
import qualified Duckling.Duration.AR.Rules as Duration
|
import qualified Duckling.Duration.AR.Rules as Duration
|
||||||
import qualified Duckling.Numeral.AR.Rules as Numeral
|
import qualified Duckling.Numeral.AR.Rules as Numeral
|
||||||
import qualified Duckling.Ordinal.AR.Rules as Ordinal
|
import qualified Duckling.Ordinal.AR.Rules as Ordinal
|
||||||
|
import qualified Duckling.PhoneNumber.AR.Rules as PhoneNumber
|
||||||
import qualified Duckling.Quantity.AR.Rules as Quantity
|
import qualified Duckling.Quantity.AR.Rules as Quantity
|
||||||
import qualified Duckling.Temperature.AR.Rules as Temperature
|
import qualified Duckling.Temperature.AR.Rules as Temperature
|
||||||
import qualified Duckling.Time.AR.Rules as Time
|
import qualified Duckling.Time.AR.Rules as Time
|
||||||
@ -43,7 +44,7 @@ langRules (This Duration) = Duration.rules
|
|||||||
langRules (This Email) = []
|
langRules (This Email) = []
|
||||||
langRules (This Numeral) = Numeral.rules
|
langRules (This Numeral) = Numeral.rules
|
||||||
langRules (This Ordinal) = Ordinal.rules
|
langRules (This Ordinal) = Ordinal.rules
|
||||||
langRules (This PhoneNumber) = []
|
langRules (This PhoneNumber) = PhoneNumber.rules
|
||||||
langRules (This Quantity) = Quantity.rules
|
langRules (This Quantity) = Quantity.rules
|
||||||
langRules (This RegexMatch) = []
|
langRules (This RegexMatch) = []
|
||||||
langRules (This Temperature) = Temperature.rules
|
langRules (This Temperature) = Temperature.rules
|
||||||
|
@ -385,6 +385,7 @@ library
|
|||||||
|
|
||||||
-- Numeral
|
-- Numeral
|
||||||
, Duckling.Numeral.AR.Corpus
|
, Duckling.Numeral.AR.Corpus
|
||||||
|
, Duckling.Numeral.AR.Helpers
|
||||||
, Duckling.Numeral.AR.Rules
|
, Duckling.Numeral.AR.Rules
|
||||||
, Duckling.Numeral.BG.Corpus
|
, Duckling.Numeral.BG.Corpus
|
||||||
, Duckling.Numeral.BG.Rules
|
, Duckling.Numeral.BG.Rules
|
||||||
@ -547,6 +548,8 @@ library
|
|||||||
, Duckling.Ordinal.Types
|
, Duckling.Ordinal.Types
|
||||||
|
|
||||||
-- PhoneNumber
|
-- PhoneNumber
|
||||||
|
, Duckling.PhoneNumber.AR.Corpus
|
||||||
|
, Duckling.PhoneNumber.AR.Rules
|
||||||
, Duckling.PhoneNumber.PT.Corpus
|
, Duckling.PhoneNumber.PT.Corpus
|
||||||
, Duckling.PhoneNumber.PT.Rules
|
, Duckling.PhoneNumber.PT.Rules
|
||||||
, Duckling.PhoneNumber.Corpus
|
, Duckling.PhoneNumber.Corpus
|
||||||
@ -996,6 +999,7 @@ test-suite duckling-test
|
|||||||
, Duckling.Ordinal.Tests
|
, Duckling.Ordinal.Tests
|
||||||
|
|
||||||
-- PhoneNumber
|
-- PhoneNumber
|
||||||
|
, Duckling.PhoneNumber.AR.Tests
|
||||||
, Duckling.PhoneNumber.PT.Tests
|
, Duckling.PhoneNumber.PT.Tests
|
||||||
, Duckling.PhoneNumber.Tests
|
, Duckling.PhoneNumber.Tests
|
||||||
|
|
||||||
|
24
tests/Duckling/PhoneNumber/AR/Tests.hs
Normal file
24
tests/Duckling/PhoneNumber/AR/Tests.hs
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
-- Copyright (c) 2016-present, Facebook, Inc.
|
||||||
|
-- All rights reserved.
|
||||||
|
--
|
||||||
|
-- This source code is licensed under the BSD-style license found in the
|
||||||
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||||
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||||
|
|
||||||
|
|
||||||
|
module Duckling.PhoneNumber.AR.Tests
|
||||||
|
( tests
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.String
|
||||||
|
import Test.Tasty
|
||||||
|
|
||||||
|
import Duckling.Dimensions.Types
|
||||||
|
import Duckling.PhoneNumber.AR.Corpus
|
||||||
|
import Duckling.Testing.Asserts
|
||||||
|
|
||||||
|
tests :: TestTree
|
||||||
|
tests = testGroup "PhoneNumber Tests"
|
||||||
|
[ makeCorpusTest [This PhoneNumber] corpus
|
||||||
|
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
|
||||||
|
]
|
@ -22,6 +22,7 @@ import Duckling.PhoneNumber.Corpus
|
|||||||
import Duckling.PhoneNumber.Types
|
import Duckling.PhoneNumber.Types
|
||||||
import Duckling.Testing.Asserts
|
import Duckling.Testing.Asserts
|
||||||
import Duckling.Testing.Types
|
import Duckling.Testing.Types
|
||||||
|
import qualified Duckling.PhoneNumber.AR.Tests as AR
|
||||||
import qualified Duckling.PhoneNumber.PT.Tests as PT
|
import qualified Duckling.PhoneNumber.PT.Tests as PT
|
||||||
|
|
||||||
tests :: TestTree
|
tests :: TestTree
|
||||||
@ -30,6 +31,7 @@ tests = testGroup "PhoneNumber Tests"
|
|||||||
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
|
, makeNegativeCorpusTest [This PhoneNumber] negativeCorpus
|
||||||
, surroundTests
|
, surroundTests
|
||||||
, PT.tests
|
, PT.tests
|
||||||
|
, AR.tests
|
||||||
]
|
]
|
||||||
|
|
||||||
surroundTests :: TestTree
|
surroundTests :: TestTree
|
||||||
|
Loading…
Reference in New Issue
Block a user