Added TimeGrain and Duration Dimensions to Russian language

Summary:
- Added Duration dimension to Russian language
- Added TimeGrain dimension to Russian language
- Refactored isNatural and isNaturalWith out of Duration helpers into Numeral helpers
- Implemented <integer> and a half rule for Russian Numeral
- Changed the type of inSeconds to polymorphic one
Closes https://github.com/facebook/duckling/pull/105

Reviewed By: blandinw

Differential Revision: D6312604

Pulled By: patapizza

fbshipit-source-id: 9ae237b4beb6915ff8da013230457937d8e56733
This commit is contained in:
igor-drozdov 2017-11-15 10:41:33 -08:00 committed by Facebook Github Bot
parent f6492b5da0
commit 29d776dee5
20 changed files with 335 additions and 28 deletions

View File

@ -15,6 +15,7 @@ import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Distance
, This Duration
, This Numeral
, This Ordinal
, This Quantity

View File

@ -114,6 +114,7 @@ ruleDurationHalfAnHour = Rule
, prod = \_ -> Just . Token Duration $ duration TG.Minute 30
}
-- TODO: Single-word composition (#110)
ruleNumeralWithGrain :: Rule
ruleNumeralWithGrain = Rule
{ name = "<number><grain> (one word)"
@ -213,6 +214,7 @@ ruleDurationAndAHalf = Rule
_ -> Nothing
}
-- TODO: Single-word composition (#110)
ruleDurationAndAHalfOneWord :: Rule
ruleDurationAndAHalfOneWord = Rule
{ name = "<integer-and-half> <grain>"

View File

@ -12,8 +12,8 @@ module Duckling.Duration.GA.Corpus
( corpus
) where
import Prelude
import Data.String
import Prelude
import Duckling.Duration.Types
import Duckling.Locale
@ -36,6 +36,9 @@ allExamples = concat
[ "leathuair"
, "30 noimead"
]
, examples (DurationData 27 Minute)
[ "7 noimead 20"
]
, examples (DurationData 14 Day)
[ "coicís"
]

View File

@ -17,6 +17,7 @@ import Data.String
import Duckling.Dimensions.Types
import Duckling.Duration.Helpers
import Duckling.Numeral.Helpers (numberWith)
import Duckling.Numeral.Types (NumeralData(..))
import qualified Duckling.Numeral.Types as TNumeral
import qualified Duckling.TimeGrain.Types as TG
@ -44,9 +45,9 @@ ruleAonDurationAmhain :: Rule
ruleAonDurationAmhain = Rule
{ name = "aon X amhain"
, pattern =
[ isNumeralWith TNumeral.value (== 1)
[ numberWith TNumeral.value (== 1)
, dimension TimeGrain
, isNumeralWith TNumeral.value (== 1)
, numberWith TNumeral.value (== 1)
]
, prod = \tokens -> case tokens of
(_:Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 1
@ -57,9 +58,9 @@ ruleIntegerUnitofdurationInteger :: Rule
ruleIntegerUnitofdurationInteger = Rule
{ name = "<unit-integer> <unit-of-duration> <tens-integer>"
, pattern =
[ isNumeralWith TNumeral.value (< 10)
[ numberWith TNumeral.value (< 10)
, dimension TimeGrain
, isNumeralWith TNumeral.value (`elem` [10, 20 .. 50])
, numberWith TNumeral.value (`elem` [10, 20 .. 50])
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v1}):

View File

@ -13,7 +13,6 @@ module Duckling.Duration.Helpers
( duration
, isGrain
, isNatural
, isNumeralWith
, minutesFromHourMixedFraction
, timesOneAndAHalf
) where
@ -22,11 +21,10 @@ import Prelude
import Duckling.Dimensions.Types
import Duckling.Duration.Types (DurationData (DurationData))
import qualified Duckling.Duration.Types as TDuration
import Duckling.Numeral.Types (NumeralData (NumeralData))
import qualified Duckling.Numeral.Types as TNumeral
import qualified Duckling.TimeGrain.Types as TG
import Duckling.Numeral.Helpers (isNatural)
import Duckling.Types
import qualified Duckling.Duration.Types as TDuration
import qualified Duckling.TimeGrain.Types as TG
-- -----------------------------------------------------------------
-- Patterns
@ -35,16 +33,6 @@ isGrain :: TG.Grain -> Predicate
isGrain value (Token TimeGrain grain) = grain == value
isGrain _ _ = False
isNatural :: Predicate
isNatural (Token Numeral NumeralData {TNumeral.value = x}) =
TNumeral.isNatural x
isNatural _ = False
isNumeralWith :: (NumeralData -> t) -> (t -> Bool) -> PatternItem
isNumeralWith f pred = Predicate $ \x -> case x of
(Token Numeral x) -> pred $ f x
_ -> False
-- -----------------------------------------------------------------
-- Production

View File

@ -0,0 +1,96 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Duration.RU.Corpus
( corpus
) where
import Data.String
import Prelude
import Duckling.Duration.Types
import Duckling.Locale
import Duckling.Resolve
import Duckling.Testing.Types
import Duckling.TimeGrain.Types (Grain(..))
corpus :: Corpus
corpus = (testContext {locale = makeLocale RU Nothing}, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (DurationData 1 Second)
[ "1 сек"
, "1 секунда"
, "секунда"
, "1\""
]
, examples (DurationData 15 Minute)
[ "15 мин"
, "пятнадцать минут"
, "15'"
]
, examples (DurationData 30 Minute)
[ "30 минут"
, "пол часа"
, "полчаса"
, "тридцать минут"
]
, examples (DurationData 5400 Second)
[ "полтора часа"
, "1.5 часа"
, "5400 секунд"
]
, examples (DurationData 8 Hour)
[ "8 часов"
, "8 ч"
, "восемь часов"
]
, examples (DurationData 15 Day)
[ "15 дней"
, "пятнадцать дней"
, "полмесяца"
]
, examples (DurationData 7 Week)
[ "7 недель"
, "семь недель"
]
, examples (DurationData 1 Month)
[ "1 месяц"
, "месяц"
, "ровно месяц"
]
, examples (DurationData 6 Month)
[ "6 месяцев"
, "шесть месяцев"
, "полгода"
, "пол года"
]
, examples (DurationData 9072000 Second)
[ "3.5 месяца"
, "три с половиной месяца"
, "приблизительно 3.5 месяца"
]
, examples (DurationData 3 Quarter)
[ "3 квартала"
]
, examples (DurationData 2 Year)
[ "2 года"
, "два года"
, "где-то два года"
]
, examples (DurationData 12 Hour)
[ "12 часов"
, "двенадцать часов"
, "полдня"
, "примерно полдня"
, "пол дня"
]
]

View File

@ -0,0 +1,118 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Duration.RU.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Duration.Helpers
import Duckling.Numeral.Helpers (numberWith)
import Duckling.Numeral.Types (NumeralData(..), isInteger)
import Duckling.Duration.Types (DurationData (DurationData))
import Duckling.Regex.Types
import Duckling.Types
import Duckling.TimeGrain.Types
import qualified Duckling.Numeral.Types as TNumeral
grainsMap :: HashMap Text Grain
grainsMap = HashMap.fromList
[ ("года" , Year)
, ("месяца", Month)
, ("дня" , Day)
, ("часа" , Hour)
, ("минуты", Minute)
]
-- TODO: Single-word composition (#110)
ruleHalves :: Rule
ruleHalves = Rule
{ name = "half of a grain"
, pattern =
[ regex "пол\\s?(года|месяца|дня|часа|минуты)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (x:_)):_) -> do
grain <- HashMap.lookup (Text.toLower x) grainsMap
Token Duration <$> timesOneAndAHalf grain 0
_ -> Nothing
}
ruleNumeralQuotes :: Rule
ruleNumeralQuotes = Rule
{ name = "<integer> + '\""
, pattern =
[ Predicate isNatural
, regex "(['\"])"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v}):
Token RegexMatch (GroupMatch (x:_)):
_) -> case x of
"'" -> Just . Token Duration . duration Minute $ floor v
"\"" -> Just . Token Duration . duration Second $ floor v
_ -> Nothing
_ -> Nothing
}
ruleDurationPrecision :: Rule
ruleDurationPrecision = Rule
{ name = "about|exactly <duration>"
, pattern =
[ regex "(где-то|приблизительно|примерно|ровно)"
, dimension Duration
]
, prod = \tokens -> case tokens of
(_:token:_) -> Just token
_ -> Nothing
}
ruleGrainAsDuration :: Rule
ruleGrainAsDuration = Rule
{ name = "a <unit-of-duration>"
, pattern =
[ dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 1
_ -> Nothing
}
rulePositiveDuration :: Rule
rulePositiveDuration = Rule
{ name = "<positive-numeral> <time-grain>"
, pattern =
[ numberWith TNumeral.value $ and . sequence [not . isInteger, (>0)]
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v}):
Token TimeGrain grain:
_) -> Just . Token Duration . duration Second . floor $ inSeconds grain v
_ -> Nothing
}
rules :: [Rule]
rules =
[ rulePositiveDuration
, ruleDurationPrecision
, ruleNumeralQuotes
, ruleGrainAsDuration
, ruleHalves
]

View File

@ -13,14 +13,14 @@ module Duckling.Duration.Rules
( rules
) where
import Prelude
import Data.String
import Prelude
import Duckling.Dimensions.Types
import Duckling.Duration.Helpers
import Duckling.Numeral.Types (NumeralData(..))
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegerUnitofduration :: Rule
ruleIntegerUnitofduration = Rule

View File

@ -98,6 +98,7 @@ ruleDecimalNumeral = Rule
_ -> Nothing
}
-- TODO: Single-word composition (#110)
ruleInteger3 :: Rule
ruleInteger3 = Rule
{ name = "integer ([2-9][1-9])"
@ -262,6 +263,7 @@ zeroNineteenMap = HashMap.fromList
, ("neunzehn", 19)
]
-- TODO: Single-word composition (#110)
ruleToNineteen :: Rule
ruleToNineteen = Rule
{ name = "integer (0..19)"
@ -316,6 +318,7 @@ ruleInteger = Rule
_ -> Nothing
}
-- TODO: Single-word composition (#110)
ruleInteger2 :: Rule
ruleInteger2 = Rule
{ name = "integer (20..90)"

View File

@ -14,6 +14,7 @@ module Duckling.Numeral.Helpers
, double
, integer
, multiply
, isNatural
, divide
, notOkForAnyTime
, numberBetween
@ -88,6 +89,11 @@ numberBetween low up = Predicate $ \x ->
low <= v && v < up
_ -> False
isNatural :: Predicate
isNatural (Token Numeral NumeralData {value = v}) =
isInteger v && v > 0
isNatural _ = False
oneOf :: [Double] -> PatternItem
oneOf vs = Predicate $ \x ->
case x of

View File

@ -97,6 +97,7 @@ ruleDecimalNumeral = Rule
_ -> Nothing
}
-- TODO: Single-word composition (#110)
ruleInteger3 :: Rule
ruleInteger3 = Rule
{ name = "integer ([2-9][1-9])"

View File

@ -93,6 +93,11 @@ allExamples = concat
, examples (NumeralValue 1.5)
[ "1.5"
, "полторы"
, "один с половиной"
]
, examples (NumeralValue 3.5)
[ "3.5"
, "три с половиной"
]
, examples (NumeralValue 1.1)
[ "1.1"

View File

@ -106,6 +106,18 @@ ruleDecimalOneAndAHalf = Rule
, prod = \_ -> double 1.5
}
ruleIntegerAndAHalf :: Rule
ruleIntegerAndAHalf = Rule
{ name = "<integer> and a half"
, pattern =
[ Predicate isNatural
, regex "с половиной"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ v + 0.5
_ -> Nothing
}
hundredsMap :: HashMap Text Integer
hundredsMap = HashMap.fromList
[ ( "сто", 100)
@ -281,6 +293,7 @@ rules =
, ruleInteger6
, ruleInteger7
, ruleInteger8
, ruleIntegerAndAHalf
, ruleDecimalOneAndAHalf
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator

View File

@ -58,9 +58,6 @@ getIntValue x = if rest == 0 then Just int else Nothing
isInteger :: Double -> Bool
isInteger = isJust . getIntValue
isNatural :: Double -> Bool
isNatural x = isInteger x && x > 0
isIntegerBetween :: Double -> Int -> Int -> Bool
isIntegerBetween x low high = case getIntValue x of
Just int -> low <= int && int <= high

View File

@ -19,9 +19,11 @@ import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Distance.RU.Rules as Distance
import qualified Duckling.Duration.RU.Rules as Duration
import qualified Duckling.Numeral.RU.Rules as Numeral
import qualified Duckling.Ordinal.RU.Rules as Ordinal
import qualified Duckling.Quantity.RU.Rules as Quantity
import qualified Duckling.TimeGrain.RU.Rules as TimeGrain
import qualified Duckling.Volume.RU.Rules as Volume
defaultRules :: Some Dimension -> [Rule]
@ -33,7 +35,7 @@ localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This Distance) = Distance.rules
langRules (This Duration) = []
langRules (This Duration) = Duration.rules
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = Ordinal.rules
@ -42,6 +44,6 @@ langRules (This Quantity) = Quantity.rules
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This TimeGrain) = TimeGrain.rules
langRules (This Url) = []
langRules (This Volume) = Volume.rules

View File

@ -0,0 +1,41 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.TimeGrain.RU.Rules
( rules
) where
import Data.String
import Data.Text (Text)
import Prelude
import Duckling.Dimensions.Types
import Duckling.TimeGrain.Types
import Duckling.Types
grains :: [(Text, String, Grain)]
grains = [ ("second (grain) ", "сек(унд(а|у|ы)?)?", Second)
, ("minute (grain)" , "мин(ут(а|у|ы)?)?", Minute)
, ("hour (grain)" , "ч(ас(а|ов)?)?", Hour)
, ("day (grain)" , "день|дня|дней", Day)
, ("week (grain)" , "недел(ь|я|и|ю)?", Week)
, ("month (grain)" , "месяц(а|ев)?", Month)
, ("quarter (grain)", "квартал(а)?", Quarter)
, ("year (grain)" , "года?|лет", Year)
]
rules :: [Rule]
rules = map go grains
where
go (name, regexPattern, grain) = Rule
{ name = name
, pattern = [regex regexPattern]
, prod = \_ -> Just $ Token TimeGrain grain
}

View File

@ -61,7 +61,7 @@ add utcTime Quarter n =
updateUTCDay utcTime . Time.addGregorianMonthsClip $ 3 * n
add utcTime Year n = updateUTCDay utcTime $ Time.addGregorianYearsClip n
inSeconds :: Grain -> Int -> Int
inSeconds :: Num a => Grain -> a -> a
inSeconds NoGrain n = n
inSeconds Second n = n
inSeconds Minute n = n * 60

View File

@ -247,6 +247,8 @@ library
, Duckling.Duration.ZH.Corpus
, Duckling.Duration.RO.Corpus
, Duckling.Duration.RO.Rules
, Duckling.Duration.RU.Corpus
, Duckling.Duration.RU.Rules
, Duckling.Duration.TR.Corpus
, Duckling.Duration.TR.Rules
, Duckling.Duration.Helpers
@ -542,6 +544,7 @@ library
, Duckling.TimeGrain.PL.Rules
, Duckling.TimeGrain.PT.Rules
, Duckling.TimeGrain.RO.Rules
, Duckling.TimeGrain.RU.Rules
, Duckling.TimeGrain.SV.Rules
, Duckling.TimeGrain.TR.Rules
, Duckling.TimeGrain.VI.Rules
@ -669,6 +672,7 @@ test-suite duckling-test
, Duckling.Duration.PL.Tests
, Duckling.Duration.PT.Tests
, Duckling.Duration.RO.Tests
, Duckling.Duration.RU.Tests
, Duckling.Duration.SV.Tests
, Duckling.Duration.TR.Tests
, Duckling.Duration.ZH.Tests

View File

@ -0,0 +1,24 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Duration.RU.Tests
( tests
) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Duration.RU.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "RU Tests"
[ makeCorpusTest [This Duration] corpus
]

View File

@ -27,6 +27,7 @@ import qualified Duckling.Duration.NL.Tests as NL
import qualified Duckling.Duration.PL.Tests as PL
import qualified Duckling.Duration.PT.Tests as PT
import qualified Duckling.Duration.RO.Tests as RO
import qualified Duckling.Duration.RU.Tests as RU
import qualified Duckling.Duration.SV.Tests as SV
import qualified Duckling.Duration.TR.Tests as TR
import qualified Duckling.Duration.ZH.Tests as ZH
@ -46,6 +47,7 @@ tests = testGroup "Duration Tests"
, PL.tests
, PT.tests
, RO.tests
, RU.tests
, SV.tests
, TR.tests
, ZH.tests