Summary: Pull Request resolved: https://github.com/facebook/duckling/pull/520

Reviewed By: patapizza

Differential Revision: D25072459

Pulled By: chessai

fbshipit-source-id: 5db72eda36fe166a452b2345cab75fb1508b192b
This commit is contained in:
Sajjad Heydari 2020-11-19 12:06:34 -08:00 committed by Facebook GitHub Bot
parent 11595b7377
commit 700118644c
13 changed files with 478 additions and 0 deletions

View File

@ -32,6 +32,7 @@ import qualified Duckling.Dimensions.EL as ELDimensions
import qualified Duckling.Dimensions.EN as ENDimensions
import qualified Duckling.Dimensions.ES as ESDimensions
import qualified Duckling.Dimensions.ET as ETDimensions
import qualified Duckling.Dimensions.FA as FADimensions
import qualified Duckling.Dimensions.FI as FIDimensions
import qualified Duckling.Dimensions.FR as FRDimensions
import qualified Duckling.Dimensions.GA as GADimensions
@ -110,6 +111,7 @@ langDimensions EL = ELDimensions.allDimensions
langDimensions EN = ENDimensions.allDimensions
langDimensions ES = ESDimensions.allDimensions
langDimensions ET = ETDimensions.allDimensions
langDimensions FA = FADimensions.allDimensions
langDimensions FI = FIDimensions.allDimensions
langDimensions FR = FRDimensions.allDimensions
langDimensions GA = GADimensions.allDimensions

16
Duckling/Dimensions/FA.hs Normal file
View File

@ -0,0 +1,16 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
module Duckling.Dimensions.FA
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Seal Dimension]
allDimensions =
[ Seal Numeral
]

View File

@ -76,6 +76,7 @@ data Lang
| EN
| ES
| ET
| FA
| FI
| FR
| GA

View File

@ -0,0 +1,102 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.FA.Corpus
( corpus
) where
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus = (testContext {locale = makeLocale FA Nothing}, testOptions, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "0"
, "۰"
, "صفر"
]
, examples (NumeralValue 1)
[ "1"
, "۱"
, "یک"
]
, examples (NumeralValue 11)
[ "یازده"
]
, examples (NumeralValue 17)
[ "هفده"
]
, examples (NumeralValue 21)
[ "بیست و یک"
]
, examples (NumeralValue 23)
[ "بیست و سه"
]
, examples (NumeralValue 70)
[ "هفتاد"
]
, examples (NumeralValue 71)
[ "هفتاد و یک"
]
, examples (NumeralValue 78)
[ "هفتاد و هشت"
]
, examples (NumeralValue 73)
[ "هفتاد و سه"
]
, examples (NumeralValue 80)
[ "هشتاد"
]
, examples (NumeralValue 81)
[ "هشتاد و یک"
]
, examples (NumeralValue 82)
[ "هشتاد و دو"
]
, examples (NumeralValue 90)
[ "نود"
]
, examples (NumeralValue 91)
[ "نود و یک"
]
, examples (NumeralValue 92)
[ "نود و دو"
]
, examples (NumeralValue 99)
[ "نود و نه"
]
, examples (NumeralValue 33)
[ "33"
, "۳۳"
, "سی و سه"
]
, examples (NumeralValue 118)
[ "صد و هجده"
]
, examples (NumeralValue 4020)
[ "چهار هزار و بیست"
]
, examples (NumeralValue 100000)
[ "صد هزار"
, "100000"
, "۱۰۰۰۰۰"
]
, examples (NumeralValue 3000000)
[ "سه میلیون"
, "3000000"
]
]

View File

@ -0,0 +1,254 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Numeral.FA.Rules
( rules ) where
import Control.Monad (join)
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
zeroNineteenMap :: HashMap Text Integer
zeroNineteenMap = HashMap.fromList
[ ("صفر", 0)
, ("یک", 1)
, ("دو", 2)
, ("سه", 3)
, ("چهار", 4)
, ("پنج", 5)
, ("شش", 6)
, ("شیش", 6)
, ("هفت", 7)
, ("هشت", 8)
, ("نه", 9)
, ("ده", 10)
, ("یازده", 11)
, ("دوازده", 12)
, ("سیزده", 13)
, ("چهارده", 14)
, ("پانزده", 15)
, ("پونزده", 15)
, ("شانزده", 16)
, ("شونزده", 16)
, ("هفده", 17)
, ("هیفده", 17)
, ("هجده", 18)
, ("هیجده", 18)
, ("نوزده", 19)
]
ruleToNineteen :: Rule
ruleToNineteen = Rule
{ name = "integer (0..19)"
, pattern =
[ regex "(صفر|یک|سه|چهارده|چهار|پنج|شی?ش|هفت|هشت|نه|یازده|دوازده|سیزده|پ(ا|و)نزده|ش(ا|و)نزده|هی?فده|هی?جده|نوزده|ده|دو)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
let x = Text.toLower match in
(HashMap.lookup x zeroNineteenMap >>= integer)
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "بیست" , 20 )
, ( "سی" , 30 )
, ( "چهل" , 40 )
, ( "پنجاه" , 50 )
, ( "شصت" , 60 )
, ( "هفتاد" , 70 )
, ( "هشتاد" , 80 )
, ( "نود" , 90 )
, ( "صد" , 100 )
, ( "دویست" , 200 )
, ( "سیصد" , 300 )
, ( "سی صد" , 300 )
, ( "چهارصد" , 400 )
, ( "چهار صد" , 400 )
, ( "پانصد" , 500 )
, ( "پونصد" , 500 )
, ( "شیشصد" , 600 )
, ( "شیش صد" , 600 )
, ( "ششصد" , 600 )
, ( "شش صد" , 600 )
, ( "هفتصد" , 700 )
, ( "هفت صد" , 700 )
, ( "هشتصد" , 800 )
, ( "هشت صد" , 800 )
, ( "نهصد" , 900 )
, ( "نه صد" , 900 )
]
ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "(دویست|(سی|چهار|پان|پون|شی?ش|هفت|هشت|نه)? ?صد|بیست|سی|چهل|پنجاه|شصت|هفتاد|هشتاد|نود)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern =
[ regex "(هزار|میلیون|ملیون|میلیارد)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"هزار" -> double 1e3 >>= withGrain 2 >>= withMultipliable
"میلیون" -> double 1e6 >>= withGrain 3 >>= withMultipliable
"ملیون" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"میلیارد" -> double 1e9 >>= withGrain 9 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer 21..99"
, pattern =
[ oneOf [20,30..90]
, regex "و"
, numberBetween 1 10
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = tens}:
_:
Token Numeral NumeralData{TNumeral.value = units}:
_) -> double $ tens + units
_ -> Nothing
}
ruleCompositeHundred :: Rule
ruleCompositeHundred = Rule
{ name = "integer 21..99"
, pattern =
[ oneOf [100,200..900]
, regex "و"
, numberBetween 1 100
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = tens}:
_:
Token Numeral NumeralData{TNumeral.value = units}:
_) -> double $ tens + units
_ -> Nothing
}
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect 2 numbers"
, pattern =
[ Predicate $ and . sequence [hasGrain, isPositive]
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleSumAnd :: Rule
ruleSumAnd = Rule
{ name = "intersect 2 numbers (with and)"
, pattern =
[ Predicate $ and . sequence [hasGrain, isPositive]
, regex "و"
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
_:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, Predicate isMultipliable
]
, prod = \tokens -> case tokens of
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
numeralToStringMap :: HashMap Char String
numeralToStringMap =
HashMap.fromList
[ ('۰', "0")
, ('۱', "1")
, ('۲', "2")
, ('۳', "3")
, ('۴', "4")
, ('۵', "5")
, ('۶', "6")
, ('۷', "7")
, ('۸', "8")
, ('۹', "9")
]
parseIntAsText :: Text -> Text
parseIntAsText =
Text.pack
. join
. mapMaybe (`HashMap.lookup` numeralToStringMap)
. Text.unpack
parseIntegerFromText :: Text -> Maybe Integer
parseIntegerFromText = parseInteger . parseIntAsText
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "Persian integer numeric"
, pattern =
[ regex "([۰-۹]{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseIntegerFromText match >>= integer
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleToNineteen
, ruleTens
, rulePowersOfTen
, ruleCompositeTens
, ruleCompositeHundred
, ruleSum
, ruleSumAnd
, ruleMultiply
]

View File

@ -27,6 +27,7 @@ import qualified Duckling.Ranking.Classifiers.EN_XX as EN_XXClassifiers
import qualified Duckling.Ranking.Classifiers.ES_XX as ES_XXClassifiers
import qualified Duckling.Ranking.Classifiers.ET_XX as ET_XXClassifiers
import qualified Duckling.Ranking.Classifiers.FI_XX as FI_XXClassifiers
import qualified Duckling.Ranking.Classifiers.FA_XX as FA_XXClassifiers
import qualified Duckling.Ranking.Classifiers.FR_XX as FR_XXClassifiers
import qualified Duckling.Ranking.Classifiers.GA_XX as GA_XXClassifiers
import qualified Duckling.Ranking.Classifiers.HE_XX as HE_XXClassifiers
@ -77,6 +78,7 @@ classifiers (Locale EN _) = EN_XXClassifiers.classifiers
classifiers (Locale ES _) = ES_XXClassifiers.classifiers
classifiers (Locale ET _) = ET_XXClassifiers.classifiers
classifiers (Locale FI _) = FI_XXClassifiers.classifiers
classifiers (Locale FA _) = FA_XXClassifiers.classifiers
classifiers (Locale FR _) = FR_XXClassifiers.classifiers
classifiers (Locale GA _) = GA_XXClassifiers.classifiers
classifiers (Locale HE _) = HE_XXClassifiers.classifiers

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.FA_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -34,6 +34,7 @@ import qualified Duckling.Rules.EN as ENRules
import qualified Duckling.Rules.ES as ESRules
import qualified Duckling.Rules.ET as ETRules
import qualified Duckling.Rules.FI as FIRules
import qualified Duckling.Rules.FA as FARules
import qualified Duckling.Rules.FR as FRRules
import qualified Duckling.Rules.GA as GARules
import qualified Duckling.Rules.HE as HERules
@ -102,6 +103,7 @@ defaultRules EN = ENRules.defaultRules
defaultRules ES = ESRules.defaultRules
defaultRules ET = ETRules.defaultRules
defaultRules FI = FIRules.defaultRules
defaultRules FA = FARules.defaultRules
defaultRules FR = FRRules.defaultRules
defaultRules GA = GARules.defaultRules
defaultRules HE = HERules.defaultRules
@ -150,6 +152,7 @@ localeRules EN = ENRules.localeRules
localeRules ES = ESRules.localeRules
localeRules ET = ETRules.localeRules
localeRules FI = FIRules.localeRules
localeRules FA = FARules.localeRules
localeRules FR = FRRules.localeRules
localeRules GA = GARules.localeRules
localeRules HE = HERules.localeRules
@ -198,6 +201,7 @@ langRules EN = ENRules.langRules
langRules ES = ESRules.langRules
langRules ET = ETRules.langRules
langRules FI = FIRules.langRules
langRules FA = FARules.langRules
langRules FR = FRRules.langRules
langRules GA = GARules.langRules
langRules HE = HERules.langRules

45
Duckling/Rules/FA.hs Normal file
View File

@ -0,0 +1,45 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
module Duckling.Rules.FA
( defaultRules
, langRules
, localeRules
) where
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Numeral.FA.Rules as Numeral
defaultRules :: Seal Dimension -> [Rule]
defaultRules = langRules
localeRules :: Region -> Seal Dimension -> [Rule]
localeRules region (Seal (CustomDimension dim)) = dimLocaleRules region dim
localeRules _ _ = []
langRules :: Seal Dimension -> [Rule]
langRules (Seal AmountOfMoney) = []
langRules (Seal CreditCardNumber) = []
langRules (Seal Distance) = []
langRules (Seal Duration) = []
langRules (Seal Email) = []
langRules (Seal Numeral) = Numeral.rules
langRules (Seal Ordinal) = []
langRules (Seal PhoneNumber) = []
langRules (Seal Quantity) = []
langRules (Seal RegexMatch) = []
langRules (Seal Temperature) = []
langRules (Seal Time) = []
langRules (Seal TimeGrain) = []
langRules (Seal Url) = []
langRules (Seal Volume) = []
langRules (Seal (CustomDimension dim)) = dimLangRules FA dim

View File

@ -59,6 +59,7 @@ library
, Duckling.Rules.ES
, Duckling.Rules.ET
, Duckling.Rules.FI
, Duckling.Rules.FA
, Duckling.Rules.FR
, Duckling.Rules.GA
, Duckling.Rules.HE
@ -115,6 +116,7 @@ library
, Duckling.Ranking.Classifiers.ES_XX
, Duckling.Ranking.Classifiers.ET_XX
, Duckling.Ranking.Classifiers.FI_XX
, Duckling.Ranking.Classifiers.FA_XX
, Duckling.Ranking.Classifiers.FR_XX
, Duckling.Ranking.Classifiers.GA_XX
, Duckling.Ranking.Classifiers.HE_XX
@ -173,6 +175,7 @@ library
, Duckling.Dimensions.ES
, Duckling.Dimensions.ET
, Duckling.Dimensions.FI
, Duckling.Dimensions.FA
, Duckling.Dimensions.FR
, Duckling.Dimensions.GA
, Duckling.Dimensions.HE
@ -431,6 +434,8 @@ library
, Duckling.Numeral.ET.Rules
, Duckling.Numeral.FI.Corpus
, Duckling.Numeral.FI.Rules
, Duckling.Numeral.FA.Corpus
, Duckling.Numeral.FA.Rules
, Duckling.Numeral.FR.Corpus
, Duckling.Numeral.FR.Rules
, Duckling.Numeral.GA.Corpus
@ -966,6 +971,7 @@ test-suite duckling-test
, Duckling.Numeral.ES.Tests
, Duckling.Numeral.ET.Tests
, Duckling.Numeral.FI.Tests
, Duckling.Numeral.FA.Tests
, Duckling.Numeral.FR.Tests
, Duckling.Numeral.GA.Tests
, Duckling.Numeral.HE.Tests

View File

@ -182,6 +182,7 @@ getCorpusForLang EN = ENTime.corpus
getCorpusForLang ES = ESTime.corpus
getCorpusForLang ET = (testContext, testOptions, [])
getCorpusForLang FI = (testContext, testOptions, [])
getCorpusForLang FA = (testContext, testOptions, [])
getCorpusForLang FR = FRTime.corpus
getCorpusForLang GA = GATime.corpus
getCorpusForLang HR = HRTime.corpus

View File

@ -0,0 +1,21 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
module Duckling.Numeral.FA.Tests
( tests ) where
import Data.String
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.FA.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "FA Tests"
[ makeCorpusTest [Seal Numeral] corpus
]

View File

@ -23,6 +23,7 @@ import qualified Duckling.Numeral.EN.Tests as EN
import qualified Duckling.Numeral.ES.Tests as ES
import qualified Duckling.Numeral.ET.Tests as ET
import qualified Duckling.Numeral.FI.Tests as FI
import qualified Duckling.Numeral.FA.Tests as FA
import qualified Duckling.Numeral.FR.Tests as FR
import qualified Duckling.Numeral.GA.Tests as GA
import qualified Duckling.Numeral.HE.Tests as HE
@ -71,6 +72,7 @@ tests = testGroup "Numeral Tests"
, EN.tests
, ES.tests
, ET.tests
, FA.tests
, FR.tests
, FI.tests
, GA.tests