mirror of
https://github.com/facebook/duckling.git
synced 2025-01-07 14:29:37 +03:00
Russian(RU) duration improvements (#375)
Summary: - dimunitives for minutes and hours - quarters of an hour - added 'сутки' (24-hour period) Pull Request resolved: https://github.com/facebook/duckling/pull/375 Reviewed By: stroxler Differential Revision: D20332233 Pulled By: chessai fbshipit-source-id: 479858e6c5de856a6965b6193c481e654a6e04fb
This commit is contained in:
parent
d712bee78b
commit
f1cb3bc87c
@ -9,6 +9,7 @@
|
||||
|
||||
module Duckling.Duration.RU.Corpus
|
||||
( corpus
|
||||
, negativeCorpus
|
||||
) where
|
||||
|
||||
import Data.String
|
||||
@ -23,6 +24,14 @@ import Duckling.TimeGrain.Types (Grain(..))
|
||||
corpus :: Corpus
|
||||
corpus = (testContext {locale = makeLocale RU Nothing}, testOptions, allExamples)
|
||||
|
||||
negativeCorpus :: NegativeCorpus
|
||||
negativeCorpus = (testContext {locale = makeLocale RU Nothing}, testOptions, examples)
|
||||
where
|
||||
examples =
|
||||
[ "суток"
|
||||
]
|
||||
|
||||
|
||||
allExamples :: [Example]
|
||||
allExamples = concat
|
||||
[ examples (DurationData 1 Second)
|
||||
@ -35,6 +44,8 @@ allExamples = concat
|
||||
[ "15 мин"
|
||||
, "пятнадцать минут"
|
||||
, "15'"
|
||||
, "четверть часа"
|
||||
, "1 четверть часа"
|
||||
]
|
||||
, examples (DurationData 30 Minute)
|
||||
[ "30 минут"
|
||||
@ -42,6 +53,11 @@ allExamples = concat
|
||||
, "полчаса"
|
||||
, "тридцать минут"
|
||||
]
|
||||
, examples (DurationData 45 Minute)
|
||||
[ "45 минут"
|
||||
, "3 четверти часа"
|
||||
, "три четверти часа"
|
||||
]
|
||||
, examples (DurationData 5400 Second)
|
||||
[ "полтора часа"
|
||||
, "1.5 часа"
|
||||
@ -92,4 +108,40 @@ allExamples = concat
|
||||
, "примерно полдня"
|
||||
, "пол дня"
|
||||
]
|
||||
, examples (DurationData 1 Hour)
|
||||
[ "час"
|
||||
, "1 час"
|
||||
, "часик"
|
||||
, "часок"
|
||||
, "часочек"
|
||||
]
|
||||
, examples (DurationData 5 Hour)
|
||||
[ "5 часов"
|
||||
, "5 часиков"
|
||||
, "5 часочков"
|
||||
]
|
||||
, examples (DurationData 1 Minute)
|
||||
[ "минута"
|
||||
, "минуту"
|
||||
, "минутка"
|
||||
, "минутку"
|
||||
, "минуточка"
|
||||
, "минуточку"
|
||||
, "1 минутка"
|
||||
]
|
||||
, examples (DurationData 4 Minute)
|
||||
[ "4 минуты"
|
||||
, "4 минутки"
|
||||
, "4 минуточки"
|
||||
]
|
||||
, examples (DurationData 24 Hour)
|
||||
[ "сутки"
|
||||
, "1 сутки"
|
||||
]
|
||||
, examples (DurationData 120 Hour)
|
||||
[ "5 суток"
|
||||
]
|
||||
, examples (DurationData 115 Minute)
|
||||
[ "1 час 55 минут"
|
||||
]
|
||||
]
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
|
||||
{-# LANGUAGE GADTs #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE NoRebindableSyntax #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
@ -14,7 +15,6 @@ module Duckling.Duration.RU.Rules
|
||||
) where
|
||||
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import Data.String
|
||||
import Data.Text (Text)
|
||||
import Prelude
|
||||
import qualified Data.HashMap.Strict as HashMap
|
||||
@ -28,7 +28,9 @@ import Duckling.Duration.Types (DurationData (DurationData))
|
||||
import Duckling.Regex.Types
|
||||
import Duckling.Types
|
||||
import Duckling.TimeGrain.Types
|
||||
import qualified Duckling.Duration.Types as TDuration
|
||||
import qualified Duckling.Numeral.Types as TNumeral
|
||||
import qualified Duckling.TimeGrain.Types as TG
|
||||
|
||||
grainsMap :: HashMap Text Grain
|
||||
grainsMap = HashMap.fromList
|
||||
@ -46,7 +48,7 @@ ruleHalves = Rule
|
||||
, pattern =
|
||||
[ regex "пол\\s?(года|месяца|дня|часа|минуты)"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(Token RegexMatch (GroupMatch (x:_)):_) -> do
|
||||
grain <- HashMap.lookup (Text.toLower x) grainsMap
|
||||
Token Duration <$> nPlusOneHalf grain 0
|
||||
@ -60,12 +62,12 @@ ruleNumeralQuotes = Rule
|
||||
[ Predicate isNatural
|
||||
, regex "(['\"])"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(Token Numeral NumeralData{TNumeral.value = v}:
|
||||
Token RegexMatch (GroupMatch (x:_)):
|
||||
_) -> case x of
|
||||
"'" -> Just . Token Duration . duration Minute $ floor v
|
||||
"\"" -> Just . Token Duration . duration Second $ floor v
|
||||
"'" -> Just $ Token Duration $ duration Minute $ floor v
|
||||
"\"" -> Just $ Token Duration $ duration Second $ floor v
|
||||
_ -> Nothing
|
||||
_ -> Nothing
|
||||
}
|
||||
@ -77,7 +79,7 @@ ruleDurationPrecision = Rule
|
||||
[ regex "(где-то|приблизительно|примерно|ровно)"
|
||||
, dimension Duration
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(_:token:_) -> Just token
|
||||
_ -> Nothing
|
||||
}
|
||||
@ -88,8 +90,8 @@ ruleGrainAsDuration = Rule
|
||||
, pattern =
|
||||
[ dimension TimeGrain
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token TimeGrain grain:_) -> Just . Token Duration $ duration grain 1
|
||||
, prod = \case
|
||||
(Token TimeGrain grain:_) -> Just $ Token Duration $ duration grain 1
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
@ -100,10 +102,116 @@ rulePositiveDuration = Rule
|
||||
[ numberWith TNumeral.value $ and . sequence [not . isInteger, (>0)]
|
||||
, dimension TimeGrain
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
, prod = \case
|
||||
(Token Numeral NumeralData{TNumeral.value = v}:
|
||||
Token TimeGrain grain:
|
||||
_) -> Just . Token Duration . duration Second . floor $ inSeconds grain v
|
||||
_) -> Just $ Token Duration $ duration Second $ floor $ inSeconds grain v
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
hourDiminutive :: Rule
|
||||
hourDiminutive = Rule
|
||||
{ name = "hour diminutive"
|
||||
, pattern =
|
||||
[ regex "час(ок|ик|очек)"
|
||||
]
|
||||
, prod = \case
|
||||
_ -> Just $ Token Duration $ duration Hour 1
|
||||
}
|
||||
|
||||
hoursDiminutive :: Rule
|
||||
hoursDiminutive = Rule
|
||||
{ name = "hour diminutive 2"
|
||||
, pattern =
|
||||
[ numberWith TNumeral.value isInteger
|
||||
, regex "час(иков|очков)"
|
||||
]
|
||||
, prod = \case
|
||||
Token Numeral NumeralData{TNumeral.value = v}:_ -> do
|
||||
n <- TNumeral.getIntValue v
|
||||
Just $ Token Duration $ duration Hour n
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
minuteDiminutive :: Rule
|
||||
minuteDiminutive = Rule
|
||||
{ name = "minute diminutive"
|
||||
, pattern =
|
||||
[ regex "минутк.|минуточк."
|
||||
]
|
||||
, prod = \case
|
||||
_ -> Just $ Token Duration $ duration Minute 1
|
||||
}
|
||||
|
||||
minutesDiminutive :: Rule
|
||||
minutesDiminutive = Rule
|
||||
{ name = "minute diminutive"
|
||||
, pattern =
|
||||
[ numberWith TNumeral.value isInteger
|
||||
, regex "минутк.|минуток|минуточк.|минуточек"
|
||||
]
|
||||
, prod = \case
|
||||
Token Numeral NumeralData{TNumeral.value = v}:_ -> do
|
||||
n <- TNumeral.getIntValue v
|
||||
Just $ Token Duration $ duration Minute n
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleDurationQuarterOfAnHour :: Rule
|
||||
ruleDurationQuarterOfAnHour = Rule
|
||||
{ name = "quarter of an hour"
|
||||
, pattern =
|
||||
[ regex "((одн(у|а|ой)|1)\\s)?четверт. (часа|ч|ч\\.)"
|
||||
]
|
||||
, prod = \_ -> Just $ Token Duration $ duration TG.Minute 15
|
||||
}
|
||||
|
||||
ruleDurationThreeQuartersOfAnHour :: Rule
|
||||
ruleDurationThreeQuartersOfAnHour = Rule
|
||||
{ name = "3 quarters of an hour"
|
||||
, pattern =
|
||||
[ numberWith TNumeral.value (== 3)
|
||||
, regex "четверт(и|ей) (часа|ч|ч\\.)"
|
||||
]
|
||||
, prod = \_ -> Just $ Token Duration $ duration TG.Minute 45
|
||||
}
|
||||
|
||||
ruleDuration24h :: Rule
|
||||
ruleDuration24h = Rule
|
||||
{ name = "сутки"
|
||||
, pattern =
|
||||
[ regex "сутки"
|
||||
]
|
||||
, prod = \_ -> Just $ Token Duration $ duration TG.Hour 24
|
||||
}
|
||||
|
||||
ruleDuration24hN :: Rule
|
||||
ruleDuration24hN = Rule
|
||||
{ name = "<integer> сутки"
|
||||
, pattern =
|
||||
[ numberWith TNumeral.value isInteger
|
||||
, regex "сутки|суток"
|
||||
]
|
||||
, prod = \case
|
||||
(Token Numeral NumeralData{TNumeral.value = v}:_) -> do
|
||||
n <- TNumeral.getIntValue v
|
||||
Just $ Token Duration $ duration TG.Hour (n * 24)
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleCompositeDuration :: Rule
|
||||
ruleCompositeDuration = Rule
|
||||
{ name = "composite <duration>"
|
||||
, pattern =
|
||||
[ Predicate isNatural
|
||||
, dimension TimeGrain
|
||||
, dimension Duration
|
||||
]
|
||||
, prod = \case
|
||||
(Token Numeral NumeralData{TNumeral.value = v}:
|
||||
Token TimeGrain g:
|
||||
Token Duration dd@DurationData{TDuration.grain = dg}:
|
||||
_) | g > dg -> Just $ Token Duration $ duration g (floor v) <> dd
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
@ -114,4 +222,13 @@ rules =
|
||||
, ruleNumeralQuotes
|
||||
, ruleGrainAsDuration
|
||||
, ruleHalves
|
||||
, hourDiminutive
|
||||
, hoursDiminutive
|
||||
, minuteDiminutive
|
||||
, minutesDiminutive
|
||||
, ruleDurationQuarterOfAnHour
|
||||
, ruleDurationThreeQuartersOfAnHour
|
||||
, ruleDuration24h
|
||||
, ruleDuration24hN
|
||||
, ruleCompositeDuration
|
||||
]
|
||||
|
Loading…
Reference in New Issue
Block a user