Support for Greek durations

Summary: Adding support for Greek time grains and durations.

Reviewed By: patapizza

Differential Revision: D6249955

fbshipit-source-id: 1c69e26
This commit is contained in:
Panagiotis Vekris 2017-11-06 18:33:20 -08:00 committed by Facebook Github Bot
parent ba46d592cd
commit e8937e1cd6
10 changed files with 487 additions and 8 deletions

View File

@ -14,5 +14,6 @@ import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
[ This Duration
, This Numeral
]

View File

@ -0,0 +1,131 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Duration.EL.Corpus
( corpus
, negativeCorpus
) where
import Prelude
import Data.String
import Duckling.Duration.Types
import Duckling.Locale
import Duckling.Resolve
import Duckling.Testing.Types
import Duckling.TimeGrain.Types (Grain(..))
corpus :: Corpus
corpus = (testContext {locale = makeLocale EL Nothing}, allExamples)
negativeCorpus :: NegativeCorpus
negativeCorpus = (testContext, examples)
where
examples =
[ "για μήνες"
, "και ημέρες"
]
allExamples :: [Example]
allExamples = concat
[ examples (DurationData 1 Second)
[ "ένα δευτερόλεπτο"
, "ενός δευτερολέπτου"
, "1 δεύτερο"
, "1\""
]
, examples (DurationData 30 Second)
[ "30 δευτερόλεπτα"
, "τριάντα δευτερολέπτων"
, "μισό λεπτό"
, "30\""
]
, examples (DurationData 1 Minute)
[ "ενός λεπτού"
, "1 λεπτού"
]
, examples (DurationData 2 Minute)
[ "2 λεπτά"
, "δύο λεπτά"
, "δυο λεπτά"
, "2'"
]
, examples (DurationData 15 Minute)
[ "ένα τέταρτο"
, "δεκαπέντε λεπτά"
, "δεκαπεντάλεπτο"
, "15'"
]
, examples (DurationData 30 Minute)
[ "μισάωρο"
, "τριάντα λεπτά"
, "μισή ώρα"
, "30'"
]
, examples (DurationData 45 Minute)
[ "τρία τέταρτα"
, "σαρανταπεντάλεπτος"
, "45'"
]
, examples (DurationData 60 Minute)
[ "60 λεπτά"
, "εξηντάλεπτο"
]
, examples (DurationData 90 Minute)
[ "μια και μισή ώρα"
, "περίπου μια και μισή ώρα"
, "ακριβώς μια και μισή ώρα"
, "μιάμιση ώρα"
, "1,5 ώρα"
]
, examples (DurationData 60 Hour)
[ "δυόμισι μέρες"
, "60 ώρες"
, "εξήντα ώρες"
]
, examples (DurationData 15 Day)
[ "15 μέρες"
, "δεκαπενθήμερο"
]
, examples (DurationData 30 Day)
[ "30 μέρες"
]
, examples (DurationData 7 Week)
[ "εφτά εβδομάδες"
, "7 βδομάδες"
]
, examples (DurationData 1 Month)
[ "1 μήνας"
, "ένα μήνα"
]
, examples (DurationData 3 Quarter)
[ "3 τρίμηνα"
]
, examples (DurationData 18 Month)
[ "18 μήνες"
, "ενάμισης χρόνος"
, "ένας και μισός χρόνος"
, "ενάμισι έτος"
, "ένα και μισό έτος"
]
, examples (DurationData 2 Year)
[ "δυο χρόνια"
, "δύο έτη"
, "διετία"
, "διετής"
, "δίχρονο"
]
, examples (DurationData 35 Year)
[ "τριανταπενταετής"
, "τριανταπεντάχρονος"
, "τριανταπενταετία"
, "35 χρόνια"
]
]

View File

@ -0,0 +1,257 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Duration.EL.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Duration.Helpers
import Duckling.Duration.Types (DurationData (DurationData))
import Duckling.Numeral.Helpers (parseInt, parseInteger, integer)
import Duckling.Numeral.Types (NumeralData(..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
import qualified Duckling.TimeGrain.Types as TG
numeralMap :: HashMap Text Int
numeralMap = HashMap.fromList
[ ( "δι" , 2 )
, ( "δί" , 2 )
, ( "τρι" , 3 )
, ( "τρί" , 3 )
, ( "τετρ" , 4 )
, ( "πεντ" , 5 )
, ( "πενθ" , 5 )
, ( "εξ" , 6 )
, ( "επτ" , 7 )
, ( "εφτ" , 7 )
, ( "οκτ" , 8 )
, ( "οχτ" , 8 )
, ( "εννι" , 9 )
, ( "δεκ" , 10 )
, ( "δεκαπεντ" , 15 )
, ( "δεκαπενθ" , 15 )
, ( "εικοσ" , 20 )
, ( "εικοσιπεντ" , 25 )
, ( "εικοσιπενθ" , 25 )
, ( "τριαντ" , 30 )
, ( "τριανταπεντ" , 35 )
, ( "τριανταπενθ" , 35 )
, ( "σαραντ" , 40 )
, ( "σαρανταπεντ" , 45 )
, ( "σαρανταπενθ" , 45 )
, ( "πενηντ" , 50 )
, ( "πενηνταπετν" , 55 )
, ( "πενηνταπετθ" , 55 )
, ( "εξηντ" , 60 )
, ( "ενενηντ" , 90 )
-- The following are used as prefixes
, ( "μιά" , 1 )
, ( "ενά" , 1 )
, ( "δυό" , 2 )
, ( "τρεισί" , 3 )
, ( "τεσσερισή" , 4 )
, ( "τεσσερσή" , 4 )
, ( "πεντέ" , 5 )
, ( "εξί" , 6 )
, ( "επτά" , 7 )
, ( "εφτά" , 7 )
, ( "οκτώ" , 8 )
, ( "οχτώ" , 8 )
, ( "εννιά" , 9 )
, ( "δεκά" , 10 )
, ( "εντεκά" , 11 )
, ( "δωδεκά" , 12 )
]
timeGrainMap :: HashMap Text TG.Grain
timeGrainMap = HashMap.fromList
[ ( "λεπτο" , TG.Minute )
, ( "ωρο" , TG.Hour )
, ( "μερο" , TG.Day )
, ( "ήμερο" , TG.Day )
, ( "μηνο" , TG.Month )
, ( "ετία" , TG.Year )
, ( "ετίας" , TG.Year )
, ( "ετή" , TG.Year )
, ( "ετέ" , TG.Year )
, ( "χρονο" , TG.Year )
]
ruleDurationQuarterOfAnHour :: Rule
ruleDurationQuarterOfAnHour = Rule
{ name = "quarter of an hour"
, pattern =
[ regex "(1/4\\s?((της )ώρας|ω)|ένα τέταρτο|ενός τετάρτου)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 15
}
ruleDurationHalfAnHour :: Rule
ruleDurationHalfAnHour = Rule
{ name = "half an hour"
, pattern =
[ regex "(1/2\\s?((της )?ώρας?|ω)|μισάωρου?)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 30
}
ruleNumeralWithGrain :: Rule
ruleNumeralWithGrain = Rule
{ name = "<number><grain> (one word)"
, pattern =
[ regex $ "(δ[ιί]|τρ[ιί]|τετρ|πεν[θτ]|εξ|ε[πφ]τ|ο[κχ]τ|εννι|δεκ|"
++ "δεκαπεν[θτ]|εικοσ|εικοσιπεν[θτ]|τριαντ|τριανταπεν[θτ]|σαραντ|"
++ "σαρανταπεν[θτ]|πενηντ|πενηνταπεν[θτ]|εξηντ|ενενηντ)[αά]?"
++ "(λεπτο|ωρο|ή?μερο|μηνο|ετία?|ετ[ήέ]|χρονο)ς?υ?"
]
, prod = \tokens -> case tokens of
( Token RegexMatch (GroupMatch (m:g:_)) : _ ) ->
(Token Duration .) . duration <$> HashMap.lookup g timeGrainMap
<*> HashMap.lookup m numeralMap
_ -> Nothing
}
ruleDurationThreeQuartersOfAnHour :: Rule
ruleDurationThreeQuartersOfAnHour = Rule
{ name = "three-quarters of an hour"
, pattern =
[ regex "(3/4\\s?((της )ώρας|ω)|τρία τέταρτα|τριών τετάρτων)"
]
, prod = \_ -> Just . Token Duration $ duration TG.Minute 45
}
ruleNumeralQuotes :: Rule
ruleNumeralQuotes = Rule
{ name = "<integer> + '\""
, pattern =
[ Predicate isNatural
, regex "(['\"])"
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = v}):
Token RegexMatch (GroupMatch (x:_)):
_) -> case x of
"'" -> Just . Token Duration . duration TG.Minute $ floor v
"\"" -> Just . Token Duration . duration TG.Second $ floor v
_ -> Nothing
_ -> Nothing
}
ruleDurationNumeralMore :: Rule
ruleDurationNumeralMore = Rule
{ name = "<integer> more <unit-of-duration>"
, pattern =
[ Predicate isNatural
, dimension TimeGrain
, regex "ακόμα|λιγότερ[οη]"
]
, prod = \tokens -> case tokens of
(Token Numeral nd:Token TimeGrain grain:_:_) ->
Just . Token Duration . duration grain . floor $ TNumeral.value nd
_ -> Nothing
}
ruleDurationDotNumeralHours :: Rule
ruleDurationDotNumeralHours = Rule
{ name = "number.number hours"
, pattern =
[ regex "(\\d+),(\\d+)"
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (h:m:_)):Token TimeGrain TG.Hour:_) -> do
hh <- parseInteger h
mnum <- parseInteger m
let mden = 10 ^ Text.length m
Just . Token Duration $ minutesFromHourMixedFraction hh mnum mden
_ -> Nothing
}
ruleHalfDuration :: Rule
ruleHalfDuration = Rule
{ name = "half a <grain>"
, pattern =
[ regex "μισ[ήό]ς?"
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
(_:Token TimeGrain g:_) -> Token Duration <$> timesOneAndAHalf g 0
_ -> Nothing
}
ruleDurationAndAHalf :: Rule
ruleDurationAndAHalf = Rule
{ name = "<integer> and a half <grain>"
, pattern =
[ Predicate isNatural
, regex "και μισ[ήό]ς?"
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token Numeral nd:_:Token TimeGrain grain:_) ->
timesOneAndAHalf grain (floor $ TNumeral.value nd) >>=
Just . Token Duration
_ -> Nothing
}
ruleDurationAndAHalfOneWord :: Rule
ruleDurationAndAHalfOneWord = Rule
{ name = "<integer-and-half> <grain>"
, pattern =
[ regex $ "(μιά|ενά|δυό|τρεισί|τεσσερι?σή|πεντέ|εξί|ε[πφ]τά|ο[κχ]τώ|εννιά|"
++ "δεκά|εντεκά|δωδεκά)μισ[ιη]ς?"
, dimension TimeGrain
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (num:_)):Token TimeGrain grain:_) ->
HashMap.lookup num numeralMap >>=
timesOneAndAHalf grain >>=
Just . Token Duration
_ -> Nothing
}
ruleDurationPrecision :: Rule
ruleDurationPrecision = Rule
{ name = "about|exactly <duration>"
, pattern =
[ regex "(περίπου|πάνω κάτω|ακριβώς)"
, dimension Duration
]
, prod = \tokens -> case tokens of
(_:token:_) -> Just token
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleDurationQuarterOfAnHour
, ruleDurationHalfAnHour
, ruleNumeralQuotes
, ruleDurationNumeralMore
, ruleNumeralWithGrain
, ruleDurationThreeQuartersOfAnHour
, ruleDurationDotNumeralHours
, ruleHalfDuration
, ruleDurationAndAHalf
, ruleDurationAndAHalfOneWord
, ruleDurationPrecision
]

View File

@ -15,6 +15,7 @@ module Duckling.Duration.Helpers
, isNatural
, isNumeralWith
, minutesFromHourMixedFraction
, timesOneAndAHalf
) where
import Prelude
@ -53,3 +54,12 @@ duration grain n = DurationData {TDuration.grain = grain, TDuration.value = n}
minutesFromHourMixedFraction :: Integer -> Integer -> Integer -> DurationData
minutesFromHourMixedFraction h n d =
duration TG.Minute $ fromIntegral $ 60 * h + quot (n * 60) d
timesOneAndAHalf :: TG.Grain -> Int -> Maybe DurationData
timesOneAndAHalf grain = case grain of
TG.Minute -> Just . duration TG.Second . (30+) . (60*)
TG.Hour -> Just . duration TG.Minute . (30+) . (60*)
TG.Day -> Just . duration TG.Hour . (12+) . (24*)
TG.Month -> Just . duration TG.Day . (15+) . (30*)
TG.Year -> Just . duration TG.Month . (6+) . (12*)
_ -> const Nothing

View File

@ -49,6 +49,7 @@ oneOrTwoDigitsMap = HashMap.fromList
[ ( "μηδέν" , 0 )
, ( "ένα" , 1 )
, ( "ένας" , 1 )
, ( "ενός" , 1 )
, ( "μία" , 1 )
, ( "μια" , 1 )
, ( "δύο" , 2 )
@ -115,17 +116,21 @@ ruleNumeral = Rule
}
where
regexString = "(" ++ intercalate "|"
[ "μηδέν|ένας?|μ(ι|ί)α|δ(υ|ύ)ο|τρ(ία|εις)|τέσσερ(α|ις)|πέντε" -- [0..5]
, "έξι|ε(π|φ)τά|ο(κ|χ)τώ|ενν(ιά|έα)|δέκα|δεκαριά" -- [6..10]
, "έν(τ|δ)εκα|δώδεκα|ντουζίν(α|ες)" -- [11..12]
, "δεκα(τρία|τέσσερα|πέντε|έξι|ε(π|φ)τά|ο(χ|κ)τώ|ενν(έα|ιά))" -- [13..19]
[ "μηδέν|[εέ]ν[αοό]ς?|μ[ιί]ας?" -- [0..1]
, "δ[υύ]ο|τρ(ία|εις)|τέσσερ(α|ις)|πέντε" -- [2..5]
, "έξι|ε[πφ]τά|ο[κχ]τώ|ενν(ιά|έα)|δέκα|δεκαριά" -- [6..10]
, "έν[τδ]εκα|δώδεκα|ντουζίν(α|ες)" -- [11..12]
, "δεκα(τρία|τέσσερα|πέντε|έξι|ε[πφ]τά|ο[χκ]τώ|ενν(έα|ιά))" -- [13..19]
, "είκοσι|(τριά|σαρά|πενή|εξή|εβδομή|ογδό|ενενή)ντα" -- [2..9]0
] ++ ")"
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer 21..99"
, pattern = [oneOf [20,30..90], numberBetween 1 10]
, pattern =
[ oneOf [20,30..90]
, numberBetween 1 10
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData { TNumeral.value = tens }) :
Token Numeral (NumeralData { TNumeral.value = units }) :

View File

@ -18,7 +18,9 @@ module Duckling.Rules.EL
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Duration.EL.Rules as Duration
import qualified Duckling.Numeral.EL.Rules as Numeral
import qualified Duckling.TimeGrain.EL.Rules as TimeGrain
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
@ -29,7 +31,7 @@ localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Duration) = Duration.rules
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = []
@ -38,6 +40,6 @@ langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This TimeGrain) = TimeGrain.rules
langRules (This Url) = []
langRules (This Volume) = []

View File

@ -0,0 +1,42 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.TimeGrain.EL.Rules
( rules ) where
import Data.Text (Text)
import Data.String
import Prelude
import Duckling.Dimensions.Types
import qualified Duckling.TimeGrain.Types as TG
import Duckling.Types
grains :: [(Text, String, TG.Grain)]
grains =
[ ("second (grain) ", "δε[υύ]τερ([οό]λ[εέ]πτ)?(ου?|α|ων)", TG.Second)
, ("minute (grain)" , "λεπτ(o|όν?|ού|ά|ών)" , TG.Minute)
, ("hour (grain)" , "[ωώ]ρ(ας?|ες|ών)" , TG.Hour)
, ("day (grain)" , "η?μέρ(ας?|ες|ών)" , TG.Day)
, ("week (grain)" , "ε?βδομάδ(α|ες|ων)" , TG.Week)
, ("month (grain)" , "μήν(ας?|ες|ών)" , TG.Month)
, ("quarter (grain)", "τρ[ιί]μ[ηή]ν(ου?|α|ων)" , TG.Quarter)
, ("year (grain)" , "έτ(ου?ς|η|ών)|χρ[οό]ν(ο[ιςυ]|ι([αά]|ές)|ι?ών)"
, TG.Year)
]
rules :: [Rule]
rules = map go grains
where
go (name, regexPattern, grain) = Rule
{ name = name
, pattern = [regex regexPattern]
, prod = \_ -> Just $ Token TimeGrain grain
}

View File

@ -217,6 +217,8 @@ library
, Duckling.Duration.AR.Rules
, Duckling.Duration.DA.Rules
, Duckling.Duration.DE.Rules
, Duckling.Duration.EL.Corpus
, Duckling.Duration.EL.Rules
, Duckling.Duration.EN.Corpus
, Duckling.Duration.EN.Rules
, Duckling.Duration.FR.Corpus
@ -519,6 +521,7 @@ library
, Duckling.TimeGrain.DA.Rules
, Duckling.TimeGrain.DE.Rules
, Duckling.TimeGrain.EN.Rules
, Duckling.TimeGrain.EL.Rules
, Duckling.TimeGrain.ES.Rules
, Duckling.TimeGrain.FR.Rules
, Duckling.TimeGrain.GA.Rules
@ -645,6 +648,7 @@ test-suite duckling-test
-- Duration
, Duckling.Duration.AR.Tests
, Duckling.Duration.EL.Tests
, Duckling.Duration.EN.Tests
, Duckling.Duration.FR.Tests
, Duckling.Duration.GA.Tests

View File

@ -0,0 +1,25 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Duration.EL.Tests
( tests
) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Duration.EL.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "EL Tests"
[ makeCorpusTest [This Duration] corpus
, makeNegativeCorpusTest [This Duration] negativeCorpus
]

View File

@ -15,6 +15,7 @@ import Prelude
import Test.Tasty
import qualified Duckling.Duration.AR.Tests as AR
import qualified Duckling.Duration.EL.Tests as EL
import qualified Duckling.Duration.EN.Tests as EN
import qualified Duckling.Duration.FR.Tests as FR
import qualified Duckling.Duration.GA.Tests as GA
@ -33,6 +34,7 @@ import qualified Duckling.Duration.ZH.Tests as ZH
tests :: TestTree
tests = testGroup "Duration Tests"
[ AR.tests
, EL.tests
, EN.tests
, FR.tests
, GA.tests