Add Duration Dimension to Arabic Language

Summary: Closes https://github.com/facebookincubator/duckling/pull/94

Reviewed By: blandinw

Differential Revision: D6078221

Pulled By: patapizza

fbshipit-source-id: b653b24
This commit is contained in:
Abdallatif Sulaiman 2017-10-17 15:51:35 -07:00 committed by Facebook Github Bot
parent ed58115caf
commit 18cd2210ac
10 changed files with 338 additions and 17 deletions

View File

@ -14,6 +14,7 @@ import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
[ This Duration
, This Numeral
, This Ordinal
]

View File

@ -0,0 +1,61 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Duration.AR.Corpus
( corpus
) where
import Data.String
import Prelude
import Duckling.Duration.Types
import Duckling.Locale
import Duckling.Resolve
import Duckling.Testing.Types
import Duckling.TimeGrain.Types (Grain(..))
corpus :: Corpus
corpus = (testContext {locale = makeLocale AR Nothing}, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (DurationData 1 Second)
[ "ثانية"
, "لحظة"
]
, examples (DurationData 2 Minute)
[ "دقيقتان"
, "دقيقتين"
]
, examples (DurationData 5 Hour)
[ "خمسة ساعات"
]
, examples (DurationData 30 Day)
[ "30 يوم"
]
, examples (DurationData 1 Week)
[ "اسبوع"
]
, examples (DurationData 7 Week)
[ "سبع اسابيع"
]
, examples (DurationData 1 Month)
[ "شهر"
]
, examples (DurationData 2 Month)
[ "شهرين"
]
, examples (DurationData 2 Year)
[ "سنتين"
, "سنتان"
, "عامين"
, "عامان"
]
]

View File

@ -0,0 +1,176 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Duration.AR.Rules
( rules
) where
import Data.String
import Prelude
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Duration.Helpers
import Duckling.Numeral.Helpers (parseInt, parseInteger)
import Duckling.Numeral.Types (NumeralData(..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
import qualified Duckling.TimeGrain.Types as TG
ruleDurationQuarterOfAnHour :: Rule
ruleDurationQuarterOfAnHour = Rule
{ name = "quarter of an hour"
, pattern =
[ regex "(ربع ساعة)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 15
}
ruleDurationHalfAnHour :: Rule
ruleDurationHalfAnHour = Rule
{ name = "half an hour"
, pattern =
[ regex "(1/2\\s?ساع[ةه]?|نصف? ساع[ةه])"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 30
}
ruleDurationThreeQuartersOfAnHour :: Rule
ruleDurationThreeQuartersOfAnHour = Rule
{ name = "three-quarters of an hour"
, pattern =
[ regex "(3/4\\s?(ال)?ساع[ةه]?|ثلاث[ةه]?(\\s|-)[أا]رباع (ال)?ساع[ةه])"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 45
}
ruleNumeralQuotes :: Rule
ruleNumeralQuotes = Rule
{ name = "<integer> + '\""
, pattern =
[ Predicate isNatural
, regex "(['\"])"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v}):
Token RegexMatch (GroupMatch (x:_)):
_) -> case x of
"'" -> Just . Token Duration . duration TG.Minute $ floor v
"\"" -> Just . Token Duration . duration TG.Second $ floor v
_ -> Nothing
_ -> Nothing
}
ruleDurationDotNumeralHours :: Rule
ruleDurationDotNumeralHours = Rule
{ name = "number.number hours"
, pattern =
[ regex "(\\d+)\\.(\\d+) *ساع(ة|ات)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (h:m:_)):_) -> do
hh <- parseInteger h
mnum <- parseInteger m
let mden = 10 ^ Text.length m
Just . Token Duration $ minutesFromHourMixedFraction hh mnum mden
_ -> Nothing
}
ruleDurationAndHalfHour :: Rule
ruleDurationAndHalfHour = Rule
{ name = "<integer> and an half hour"
, pattern =
[ Predicate isNatural
, regex "و ?نصف? ساع[ةه]"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v}):_) ->
Just . Token Duration . duration TG.Minute $ 30 + 60 * floor v
_ -> Nothing
}
ruleTwoSeconds :: Rule
ruleTwoSeconds = Rule
{ name = "two seconds"
, pattern =
[ regex "ثانيتين|ثانيتان|لحظتين|لحظتان"
]
, prod = \_ -> Just . Token Duration $ duration TG.Second 2
}
ruleTwoMinutes :: Rule
ruleTwoMinutes = Rule
{ name = "two minutes"
, pattern =
[ regex "دقيقتين|دقيقتان"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 2
}
ruleTwoHours :: Rule
ruleTwoHours = Rule
{ name = "two hours"
, pattern =
[ regex "ساعتين|ساعتان"
]
, prod = \_ -> Just . Token Duration $ duration TG.Hour 2
}
ruleTwoYears :: Rule
ruleTwoYears = Rule
{ name = "dual years"
, pattern =
[ regex "سنتين|سنتان"
]
, prod = \_ -> Just . Token Duration $ duration TG.Year 2
}
-- this rule handles TG.Day, TG.Week and TG.Month
ruleDualUnitofduration :: Rule
ruleDualUnitofduration = Rule
{ name = "dual <unit-of-duration>"
, pattern =
[ dimension TimeGrain
, regex "(ان|ين)"
]
, prod = \tokens -> case tokens of
(Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 2
_ -> Nothing
}
ruleSingleUnitofduration :: Rule
ruleSingleUnitofduration = Rule
{ name = "single <unit-of-duration>"
, pattern =
[ dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 1
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleDurationQuarterOfAnHour
, ruleDurationHalfAnHour
, ruleDurationThreeQuartersOfAnHour
, ruleDurationDotNumeralHours
, ruleDurationAndHalfHour
, ruleNumeralQuotes
, ruleTwoSeconds
, ruleTwoMinutes
, ruleTwoHours
, ruleTwoYears
, ruleDualUnitofduration
, ruleSingleUnitofduration
]

View File

@ -7,50 +7,59 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Duration.EN.Rules
( rules ) where
( rules
) where
import Control.Monad (join)
import qualified Data.Text as Text
import Prelude
import Data.String
import Prelude
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Duration.Helpers
import Duckling.Numeral.Helpers (parseInt, parseInteger)
import Duckling.Numeral.Types (NumeralData(..))
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Regex.Types
import qualified Duckling.TimeGrain.Types as TG
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
import qualified Duckling.TimeGrain.Types as TG
ruleDurationQuarterOfAnHour :: Rule
ruleDurationQuarterOfAnHour = Rule
{ name = "quarter of an hour"
, pattern = [ regex "(1/4\\s?h(our)?|(a\\s)?quarter of an hour)" ]
, pattern =
[ regex "(1/4\\s?h(our)?|(a\\s)?quarter of an hour)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 15
}
ruleDurationHalfAnHour :: Rule
ruleDurationHalfAnHour = Rule
{ name = "half an hour"
, pattern = [regex "(1/2\\s?h(our)?|half an? hour)"]
, pattern =
[ regex "(1/2\\s?h(our)?|half an? hour)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 30
}
ruleDurationThreeQuartersOfAnHour :: Rule
ruleDurationThreeQuartersOfAnHour = Rule
{ name = "three-quarters of an hour"
, pattern = [regex "(3/4\\s?h(our)?|three(\\s|-)quarters of an hour)"]
, pattern =
[ regex "(3/4\\s?h(our)?|three(\\s|-)quarters of an hour)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 45
}
ruleDurationFortnight :: Rule
ruleDurationFortnight = Rule
{ name = "fortnight"
, pattern = [regex "(a|one)? fortnight"]
, pattern =
[ regex "(a|one)? fortnight"
]
, prod = \_ -> Just . Token Duration $ duration TG.Day 14
}
@ -88,7 +97,9 @@ ruleDurationNumeralMore = Rule
ruleDurationDotNumeralHours :: Rule
ruleDurationDotNumeralHours = Rule
{ name = "number.number hours"
, pattern = [regex "(\\d+)\\.(\\d+) *hours?"]
, pattern =
[ regex "(\\d+)\\.(\\d+) *hours?"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (h:m:_)):_) -> do
hh <- parseInteger h

View File

@ -18,8 +18,10 @@ module Duckling.Rules.AR
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Duration.AR.Rules as Duration
import qualified Duckling.Numeral.AR.Rules as Numeral
import qualified Duckling.Ordinal.AR.Rules as Ordinal
import qualified Duckling.TimeGrain.AR.Rules as TimeGrain
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
@ -30,7 +32,7 @@ localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Duration) = Duration.rules
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = Ordinal.rules
@ -39,6 +41,6 @@ langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This TimeGrain) = TimeGrain.rules
langRules (This Url) = []
langRules (This Volume) = []

View File

@ -0,0 +1,40 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.TimeGrain.AR.Rules
( rules
) where
import Data.String
import Data.Text (Text)
import Prelude
import Duckling.Dimensions.Types
import Duckling.Types
import qualified Duckling.TimeGrain.Types as TG
grains :: [(Text, String, TG.Grain)]
grains = [ ("second (grain) ", "(ثاني(ة|ه)?|ثواني|لحظ(ة|ه|ات))", TG.Second)
, ("minute (grain)" , "دق(يق(ة|ه)|ائق)" , TG.Minute)
, ("hour (grain)" , "ساع(ة|ه|ات)" , TG.Hour)
, ("day (grain)" , "يوم|(ا|أ)يام" , TG.Day)
, ("week (grain)" , "(ا|أ|إ)س(بوع|ابيع)" , TG.Week)
, ("month (grain)" , "شهر|(ا|أ|إ)شهر" , TG.Month)
, ("year (grain)" , "سن(ة|ين)|عام" , TG.Year)
]
rules :: [Rule]
rules = map go grains
where
go (name, regexPattern, grain) = Rule
{ name = name
, pattern = [regex regexPattern]
, prod = \_ -> Just $ Token TimeGrain grain
}

View File

@ -210,6 +210,8 @@ library
, Duckling.Distance.Types
-- Duration
, Duckling.Duration.AR.Corpus
, Duckling.Duration.AR.Rules
, Duckling.Duration.DA.Rules
, Duckling.Duration.DE.Rules
, Duckling.Duration.EN.Corpus
@ -488,6 +490,7 @@ library
, Duckling.Time.TimeZone.Parse
-- TimeGrain
, Duckling.TimeGrain.AR.Rules
, Duckling.TimeGrain.DA.Rules
, Duckling.TimeGrain.DE.Rules
, Duckling.TimeGrain.EN.Rules
@ -616,6 +619,7 @@ test-suite duckling-test
, Duckling.Distance.Tests
-- Duration
, Duckling.Duration.AR.Tests
, Duckling.Duration.EN.Tests
, Duckling.Duration.FR.Tests
, Duckling.Duration.GA.Tests

View File

@ -120,7 +120,7 @@ supportedDimensionsTest = testCase "Supported Dimensions Test" $ do
mapM_ check
[ ( AR
, [ This Email, This AmountOfMoney, This PhoneNumber, This Url
, This Numeral, This Ordinal
, This Duration, This Numeral, This Ordinal
]
)
, ( PL

View File

@ -0,0 +1,24 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Duration.AR.Tests
( tests
) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Duration.AR.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "AR Tests"
[ makeCorpusTest [This Duration] corpus
]

View File

@ -10,10 +10,11 @@ module Duckling.Duration.Tests
( tests
) where
import Prelude
import Data.String
import Prelude
import Test.Tasty
import qualified Duckling.Duration.AR.Tests as AR
import qualified Duckling.Duration.EN.Tests as EN
import qualified Duckling.Duration.FR.Tests as FR
import qualified Duckling.Duration.GA.Tests as GA
@ -31,7 +32,8 @@ import qualified Duckling.Duration.ZH.Tests as ZH
tests :: TestTree
tests = testGroup "Duration Tests"
[ EN.tests
[ AR.tests
, EN.tests
, FR.tests
, GA.tests
, HU.tests