Russian(RU) duration improvements (#375)

Summary:
- dimunitives for minutes and hours
- quarters of an hour
- added 'сутки' (24-hour period)
Pull Request resolved: https://github.com/facebook/duckling/pull/375

Reviewed By: stroxler

Differential Revision: D20332233

Pulled By: chessai

fbshipit-source-id: 479858e6c5de856a6965b6193c481e654a6e04fb
This commit is contained in:
evjava 2021-04-16 14:20:00 -07:00 committed by Facebook GitHub Bot
parent d712bee78b
commit f1cb3bc87c
2 changed files with 179 additions and 10 deletions

View File

@ -9,6 +9,7 @@
module Duckling.Duration.RU.Corpus
( corpus
, negativeCorpus
) where
import Data.String
@ -23,6 +24,14 @@ import Duckling.TimeGrain.Types (Grain(..))
corpus :: Corpus
corpus = (testContext {locale = makeLocale RU Nothing}, testOptions, allExamples)
negativeCorpus :: NegativeCorpus
negativeCorpus = (testContext {locale = makeLocale RU Nothing}, testOptions, examples)
where
examples =
[ "суток"
]
allExamples :: [Example]
allExamples = concat
[ examples (DurationData 1 Second)
@ -35,6 +44,8 @@ allExamples = concat
[ "15 мин"
, "пятнадцать минут"
, "15'"
, "четверть часа"
, "1 четверть часа"
]
, examples (DurationData 30 Minute)
[ "30 минут"
@ -42,6 +53,11 @@ allExamples = concat
, "полчаса"
, "тридцать минут"
]
, examples (DurationData 45 Minute)
[ "45 минут"
, "3 четверти часа"
, "три четверти часа"
]
, examples (DurationData 5400 Second)
[ "полтора часа"
, "1.5 часа"
@ -92,4 +108,40 @@ allExamples = concat
, "примерно полдня"
, "пол дня"
]
, examples (DurationData 1 Hour)
[ "час"
, "1 час"
, "часик"
, "часок"
, "часочек"
]
, examples (DurationData 5 Hour)
[ "5 часов"
, "5 часиков"
, "5 часочков"
]
, examples (DurationData 1 Minute)
[ "минута"
, "минуту"
, "минутка"
, "минутку"
, "минуточка"
, "минуточку"
, "1 минутка"
]
, examples (DurationData 4 Minute)
[ "4 минуты"
, "4 минутки"
, "4 минуточки"
]
, examples (DurationData 24 Hour)
[ "сутки"
, "1 сутки"
]
, examples (DurationData 120 Hour)
[ "5 суток"
]
, examples (DurationData 115 Minute)
[ "1 час 55 минут"
]
]

View File

@ -6,6 +6,7 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
@ -14,7 +15,6 @@ module Duckling.Duration.RU.Rules
) where
import Data.HashMap.Strict (HashMap)
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
@ -28,7 +28,9 @@ import Duckling.Duration.Types (DurationData (DurationData))
import Duckling.Regex.Types
import Duckling.Types
import Duckling.TimeGrain.Types
import qualified Duckling.Duration.Types as TDuration
import qualified Duckling.Numeral.Types as TNumeral
import qualified Duckling.TimeGrain.Types as TG
grainsMap :: HashMap Text Grain
grainsMap = HashMap.fromList
@ -46,7 +48,7 @@ ruleHalves = Rule
, pattern =
[ regex "пол\\s?(года|месяца|дня|часа|минуты)"
]
, prod = \tokens -> case tokens of
, prod = \case
(Token RegexMatch (GroupMatch (x:_)):_) -> do
grain <- HashMap.lookup (Text.toLower x) grainsMap
Token Duration <$> nPlusOneHalf grain 0
@ -60,12 +62,12 @@ ruleNumeralQuotes = Rule
[ Predicate isNatural
, regex "(['\"])"
]
, prod = \tokens -> case tokens of
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v}:
Token RegexMatch (GroupMatch (x:_)):
_) -> case x of
"'" -> Just . Token Duration . duration Minute $ floor v
"\"" -> Just . Token Duration . duration Second $ floor v
"'" -> Just $ Token Duration $ duration Minute $ floor v
"\"" -> Just $ Token Duration $ duration Second $ floor v
_ -> Nothing
_ -> Nothing
}
@ -77,7 +79,7 @@ ruleDurationPrecision = Rule
[ regex "(где-то|приблизительно|примерно|ровно)"
, dimension Duration
]
, prod = \tokens -> case tokens of
, prod = \case
(_:token:_) -> Just token
_ -> Nothing
}
@ -88,8 +90,8 @@ ruleGrainAsDuration = Rule
, pattern =
[ dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 1
, prod = \case
(Token TimeGrain grain:_) -> Just $ Token Duration $ duration grain 1
_ -> Nothing
}
@ -100,10 +102,116 @@ rulePositiveDuration = Rule
[ numberWith TNumeral.value $ and . sequence [not . isInteger, (>0)]
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v}:
Token TimeGrain grain:
_) -> Just . Token Duration . duration Second . floor $ inSeconds grain v
_) -> Just $ Token Duration $ duration Second $ floor $ inSeconds grain v
_ -> Nothing
}
hourDiminutive :: Rule
hourDiminutive = Rule
{ name = "hour diminutive"
, pattern =
[ regex "час(ок|ик|очек)"
]
, prod = \case
_ -> Just $ Token Duration $ duration Hour 1
}
hoursDiminutive :: Rule
hoursDiminutive = Rule
{ name = "hour diminutive 2"
, pattern =
[ numberWith TNumeral.value isInteger
, regex "час(иков|очков)"
]
, prod = \case
Token Numeral NumeralData{TNumeral.value = v}:_ -> do
n <- TNumeral.getIntValue v
Just $ Token Duration $ duration Hour n
_ -> Nothing
}
minuteDiminutive :: Rule
minuteDiminutive = Rule
{ name = "minute diminutive"
, pattern =
[ regex "минутк.|минуточк."
]
, prod = \case
_ -> Just $ Token Duration $ duration Minute 1
}
minutesDiminutive :: Rule
minutesDiminutive = Rule
{ name = "minute diminutive"
, pattern =
[ numberWith TNumeral.value isInteger
, regex "минутк.|минуток|минуточк.|минуточек"
]
, prod = \case
Token Numeral NumeralData{TNumeral.value = v}:_ -> do
n <- TNumeral.getIntValue v
Just $ Token Duration $ duration Minute n
_ -> Nothing
}
ruleDurationQuarterOfAnHour :: Rule
ruleDurationQuarterOfAnHour = Rule
{ name = "quarter of an hour"
, pattern =
[ regex "((одн(у|а|ой)|1)\\s)?четверт. (часа|ч|ч\\.)"
]
, prod = \_ -> Just $ Token Duration $ duration TG.Minute 15
}
ruleDurationThreeQuartersOfAnHour :: Rule
ruleDurationThreeQuartersOfAnHour = Rule
{ name = "3 quarters of an hour"
, pattern =
[ numberWith TNumeral.value (== 3)
, regex "четверт(и|ей) (часа|ч|ч\\.)"
]
, prod = \_ -> Just $ Token Duration $ duration TG.Minute 45
}
ruleDuration24h :: Rule
ruleDuration24h = Rule
{ name = "сутки"
, pattern =
[ regex "сутки"
]
, prod = \_ -> Just $ Token Duration $ duration TG.Hour 24
}
ruleDuration24hN :: Rule
ruleDuration24hN = Rule
{ name = "<integer> сутки"
, pattern =
[ numberWith TNumeral.value isInteger
, regex "сутки|суток"
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v}:_) -> do
n <- TNumeral.getIntValue v
Just $ Token Duration $ duration TG.Hour (n * 24)
_ -> Nothing
}
ruleCompositeDuration :: Rule
ruleCompositeDuration = Rule
{ name = "composite <duration>"
, pattern =
[ Predicate isNatural
, dimension TimeGrain
, dimension Duration
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v}:
Token TimeGrain g:
Token Duration dd@DurationData{TDuration.grain = dg}:
_) | g > dg -> Just $ Token Duration $ duration g (floor v) <> dd
_ -> Nothing
}
@ -114,4 +222,13 @@ rules =
, ruleNumeralQuotes
, ruleGrainAsDuration
, ruleHalves
, hourDiminutive
, hoursDiminutive
, minuteDiminutive
, minutesDiminutive
, ruleDurationQuarterOfAnHour
, ruleDurationThreeQuartersOfAnHour
, ruleDuration24h
, ruleDuration24hN
, ruleCompositeDuration
]