Support Greek numerals

Summary:
- Setup Greek language (EL)
- Added Greek Numerals

Reviewed By: patapizza

Differential Revision: D6217873

fbshipit-source-id: 379170f
This commit is contained in:
Panagiotis Vekris 2017-11-02 17:04:44 -07:00 committed by Facebook Github Bot
parent f0a0c1e6b8
commit fda8c7c759
13 changed files with 541 additions and 0 deletions

View File

@ -25,6 +25,7 @@ import qualified Duckling.Dimensions.BG as BGDimensions
import qualified Duckling.Dimensions.CS as CSDimensions
import qualified Duckling.Dimensions.DA as DADimensions
import qualified Duckling.Dimensions.DE as DEDimensions
import qualified Duckling.Dimensions.EL as ELDimensions
import qualified Duckling.Dimensions.EN as ENDimensions
import qualified Duckling.Dimensions.ES as ESDimensions
import qualified Duckling.Dimensions.ET as ETDimensions
@ -85,6 +86,7 @@ langDimensions BG = BGDimensions.allDimensions
langDimensions CS = CSDimensions.allDimensions
langDimensions DA = DADimensions.allDimensions
langDimensions DE = DEDimensions.allDimensions
langDimensions EL = ELDimensions.allDimensions
langDimensions EN = ENDimensions.allDimensions
langDimensions ES = ESDimensions.allDimensions
langDimensions ET = ETDimensions.allDimensions

18
Duckling/Dimensions/EL.hs Normal file
View File

@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Dimensions.EL
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]

View File

@ -36,6 +36,7 @@ data Lang
| CS
| DA
| DE
| EL
| EN
| ES
| ET

View File

@ -0,0 +1,151 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.EL.Corpus
( corpus ) where
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus = (testContext {locale = makeLocale EL Nothing}, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "0"
, "μηδέν"
]
, examples (NumeralValue 1)
[ "1"
, "ένα"
]
, examples (NumeralValue 2)
[ "δύο"
]
, examples (NumeralValue 3)
[ "τρία"
]
, examples (NumeralValue 4)
[ "τέσσερα"
]
, examples (NumeralValue 5)
[ "πέντε"
]
, examples (NumeralValue 6)
[ "έξι"
]
, examples (NumeralValue 7)
[ "επτά"
, "εφτά"
]
, examples (NumeralValue 8)
[ "οκτώ"
, "οχτώ"
]
, examples (NumeralValue 9)
[ "εννιά"
, "εννέα"
]
, examples (NumeralValue 10)
[ "δέκα"
, "δεκαριά"
]
, examples (NumeralValue 11)
[ "έντεκα"
, "ένδεκα"
]
, examples (NumeralValue 15)
[ "δεκαπέντε"
]
, examples (NumeralValue 20)
[ "20"
, "είκοσι"
]
, examples (NumeralValue 50)
[ "πενήντα"
]
, examples (NumeralValue 33)
[ "33"
, "τριάντα τρία"
, "τριάντα τρεις"
]
, examples (NumeralValue 24)
[ "24"
, "είκοσι τέσσερα"
, "είκοσι τέσσερις"
]
, examples (NumeralValue 0.77)
[ "μηδέν κόμμα εβδομήντα επτά"
, "0,77"
]
, examples (NumeralValue 200032)
[ "διακόσιες χιλιάδες τριάντα δύο"
]
, examples (NumeralValue 7200032.356)
[ "εφτά εκατομμύρια διακόσιες χιλιάδες τριάντα δύο κόμμα τριακόσια πενήντα έξι"
, "7200032,356"
, "7.200.032,356"
]
, examples (NumeralValue 100000)
[ "100000"
, "100.000"
, "εκατό χιλιάδες"
]
, examples (NumeralValue 3e6)
[ "τρία εκατομμύρια"
, "3 εκατομμύρια"
]
, examples (NumeralValue 1.2e6)
[ "ένα εκατομμύριο διακόσιες χιλιάδες"
, "1.200.000"
, "1200000"
]
, examples (NumeralValue 5000)
[ "5 χιλιάδες"
, "πέντε χιλιάδες"
]
, examples (NumeralValue 122)
[ "εκατόν είκοσι δύο"
]
, examples (NumeralValue 743)
[ "εφτακόσιοι σαράντα τρεις"
, "εφτακόσια σαράντα τρία"
, "εφτακόσιες σαράντα τρεις"
]
, examples (NumeralValue 398)
[ "τριακόσια ενενήντα οχτώ"
]
, examples (NumeralValue 2e5)
[ "διακόσιες χιλιάδες"
]
, examples (NumeralValue 21011)
[ "είκοσι μία χιλιάδες έντεκα"
]
, examples (NumeralValue 721012)
[ "εφτακόσιες είκοσι μία χιλιάδες δώδεκα"
]
, examples (NumeralValue 31256721)
[ "τριάντα ένα εκατομμύρια διακόσιες πενήντα έξι χιλιάδες εφτακόσια είκοσι ένα"
, "31.256.721"
]
, examples (NumeralValue 2400)
[ "δύο χιλιάδες τετρακόσια"
]
, examples (NumeralValue 2200000)
[ "δύο κόμμα δύο εκατομμύρια"
, "2,2 εκατομμύρια"
]
]

View File

@ -0,0 +1,267 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.EL.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.List (intercalate)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
oneOrTwoDigitsMap :: HashMap Text Integer
oneOrTwoDigitsMap = HashMap.fromList
[ ( "μηδέν" , 0 )
, ( "ένα" , 1 )
, ( "ένας" , 1 )
, ( "μία" , 1 )
, ( "μια" , 1 )
, ( "δύο" , 2 )
, ( "δυο" , 2 )
, ( "τρία" , 3 )
, ( "τρεις" , 3 )
, ( "τέσσερα" , 4 )
, ( "τέσσερις" , 4 )
, ( "πέντε" , 5 )
, ( "έξι" , 6 )
, ( "επτά" , 7 )
, ( "εφτά" , 7 )
, ( "οκτώ" , 8 )
, ( "οχτώ" , 8 )
, ( "εννιά" , 9 )
, ( "εννέα" , 9 )
, ( "δέκα" , 10 )
, ( "δεκαριά" , 10 )
, ( "έντεκα" , 11 )
, ( "ένδεκα" , 11 )
, ( "δώδεκα" , 12 )
, ( "ντουζίνα" , 12 )
, ( "ντουζίνες" , 12 )
, ( "δεκατρία" , 13 )
, ( "δεκατέσσερα" , 14 )
, ( "δεκαπέντε" , 15 )
, ( "δεκαέξι" , 16 )
, ( "δεκαεπτά" , 17 )
, ( "δεκαοκτώ" , 18 )
, ( "δεκαεννέα" , 19 )
, ( "δεκαεννιά" , 19 )
, ( "είκοσι" , 20 )
, ( "τριάντα" , 30 )
, ( "σαράντα" , 40 )
, ( "πενήντα" , 50 )
, ( "εξήντα" , 60 )
, ( "εβδομήντα" , 70 )
, ( "ογδόντα" , 80 )
, ( "ενενήντα" , 90 )
]
hundredsMap :: HashMap Text Integer
hundredsMap = HashMap.fromList
[ ( "δι" , 200 )
, ( "τρι" , 300 )
, ( "τετρ" , 400 )
, ( "πεντ" , 500 )
, ( "εξ" , 600 )
, ( "επτ" , 700 )
, ( "εφτ" , 700 )
, ( "οκτ" , 800 )
, ( "οχτ" , 800 )
, ( "εννι" , 900 )
]
ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number (0..19, 20, 30..90)"
, pattern = [ regex regexString ]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) oneOrTwoDigitsMap >>= integer
_ -> Nothing
}
where
regexString = "(" ++ intercalate "|"
[ "μηδέν|ένας?|μ(ι|ί)α|δ(υ|ύ)ο|τρ(ία|εις)|τέσσερ(α|ις)|πέντε" -- [0..5]
, "έξι|ε(π|φ)τά|ο(κ|χ)τώ|ενν(ιά|έα)|δέκα|δεκαριά" -- [6..10]
, "έν(τ|δ)εκα|δώδεκα|ντουζίν(α|ες)" -- [11..12]
, "δεκα(τρία|τέσσερα|πέντε|έξι|ε(π|φ)τά|ο(χ|κ)τώ|ενν(έα|ιά))" -- [13..19]
, "είκοσι|(τριά|σαρά|πενή|εξή|εβδομή|ογδό|ενενή)ντα" -- [2..9]0
] ++ ")"
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer 21..99"
, pattern = [oneOf [20,30..90], numberBetween 1 10]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData { TNumeral.value = tens }) :
Token Numeral (NumeralData { TNumeral.value = units }) :
_) -> double (tens + units)
_ -> Nothing
}
ruleHundred :: Rule
ruleHundred = Rule
{ name = "number (100)"
, pattern = [ regex "(εκατόν?)" ]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch _):_) -> integer 100 >>= withGrain 2
_ -> Nothing
}
ruleHundreds :: Rule
ruleHundreds = Rule
{ name = "number (200..900)"
, pattern =
[ regex "(δι|τρι|τετρ|πεντ|εξ|ε(π|φ)τ|ο(χ|κ)τ|εννι)ακόσι(α|ες|οι)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) hundredsMap >>=
integer >>= withGrain 2
_ -> Nothing
}
rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern =
[ regex "(χίλι(α|οι|ες)|χιλιάδες|εκατομμύρι(ο|α)|δις|δισεκατομμύριο)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"χίλια" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"χιλιάδες" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"εκατομμύριο" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"εκατομμύρια" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"δις" -> double 1e9 >>= withGrain 9 >>= withMultipliable
"δισεκατομμύριο" -> double 1e9 >>= withGrain 9 >>= withMultipliable
"δισεκατομμύρια" -> double 1e9 >>= withGrain 9 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleNegative :: Rule
ruleNegative = Rule
{ name = "negative numbers"
, pattern =
[ regex "-|μείον\\s"
, numberWith TNumeral.value (>0)
]
, prod = \tokens -> case tokens of
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1))
_ -> Nothing
}
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect 2 numbers"
, pattern =
[ numberWith (fromMaybe 0 . TNumeral.grain) (>1)
, numberWith TNumeral.multipliable not
]
, prod = \tokens ->
case tokens of
(Token Numeral (NumeralData {TNumeral.value = val1,
TNumeral.grain = Just g}):
Token Numeral (NumeralData {TNumeral.value = val2}):
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, numberWith TNumeral.multipliable id
]
, prod = \tokens -> case tokens of
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
ruleDecimals :: Rule
ruleDecimals = Rule
{ name = "decimal number"
, pattern = [regex "(\\d+,\\d+)"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDecimal True (Text.replace "," "." match)
_ -> Nothing
}
ruleCommaSpelledOut :: Rule
ruleCommaSpelledOut = Rule
{ name = "one point two"
, pattern =
[ dimension Numeral
, regex "κόμμα"
, numberWith TNumeral.grain isNothing
]
, prod = \tokens -> case tokens of
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleDots :: Rule
ruleDots = Rule
{ name = "dot-separated numbers"
, pattern = [regex "(\\d+(\\.\\d\\d\\d)+(,\\d+)?)"]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (
Text.replace "," "." $ Text.replace "." Text.empty match
) >>= double
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleNumeral
, ruleCompositeTens
, rulePowersOfTen
, ruleNegative
, ruleHundred
, ruleHundreds
, ruleSum
, ruleMultiply
, ruleDecimals
, ruleCommaSpelledOut
, ruleDots
]

View File

@ -19,6 +19,7 @@ import qualified Duckling.Ranking.Classifiers.BG_XX as BG_XXClassifiers
import qualified Duckling.Ranking.Classifiers.CS_XX as CS_XXClassifiers
import qualified Duckling.Ranking.Classifiers.DA_XX as DA_XXClassifiers
import qualified Duckling.Ranking.Classifiers.DE_XX as DE_XXClassifiers
import qualified Duckling.Ranking.Classifiers.EL_XX as EL_XXClassifiers
import qualified Duckling.Ranking.Classifiers.EN_GB as EN_GBClassifiers
import qualified Duckling.Ranking.Classifiers.EN_US as EN_USClassifiers
import qualified Duckling.Ranking.Classifiers.EN_XX as EN_XXClassifiers
@ -53,6 +54,7 @@ classifiers (Locale BG _) = BG_XXClassifiers.classifiers
classifiers (Locale CS _) = CS_XXClassifiers.classifiers
classifiers (Locale DA _) = DA_XXClassifiers.classifiers
classifiers (Locale DE _) = DE_XXClassifiers.classifiers
classifiers (Locale EL _) = EL_XXClassifiers.classifiers
classifiers (Locale EN (Just GB)) = EN_GBClassifiers.classifiers
classifiers (Locale EN (Just US)) = EN_USClassifiers.classifiers
classifiers (Locale EN _) = EN_XXClassifiers.classifiers

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.EL_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -28,6 +28,7 @@ import qualified Duckling.Rules.BG as BGRules
import qualified Duckling.Rules.CS as CSRules
import qualified Duckling.Rules.DA as DARules
import qualified Duckling.Rules.DE as DERules
import qualified Duckling.Rules.EL as ELRules
import qualified Duckling.Rules.EN as ENRules
import qualified Duckling.Rules.ES as ESRules
import qualified Duckling.Rules.ET as ETRules
@ -80,6 +81,7 @@ defaultRules BG = BGRules.defaultRules
defaultRules CS = CSRules.defaultRules
defaultRules DA = DARules.defaultRules
defaultRules DE = DERules.defaultRules
defaultRules EL = ELRules.defaultRules
defaultRules EN = ENRules.defaultRules
defaultRules ES = ESRules.defaultRules
defaultRules ET = ETRules.defaultRules
@ -112,6 +114,7 @@ localeRules BG = BGRules.localeRules
localeRules CS = CSRules.localeRules
localeRules DA = DARules.localeRules
localeRules DE = DERules.localeRules
localeRules EL = ELRules.localeRules
localeRules EN = ENRules.localeRules
localeRules ES = ESRules.localeRules
localeRules ET = ETRules.localeRules
@ -144,6 +147,7 @@ langRules BG = BGRules.langRules
langRules CS = CSRules.langRules
langRules DA = DARules.langRules
langRules DE = DERules.langRules
langRules EL = ELRules.langRules
langRules EN = ENRules.langRules
langRules ES = ESRules.langRules
langRules ET = ETRules.langRules

43
Duckling/Rules/EL.hs Normal file
View File

@ -0,0 +1,43 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Rules.EL
( defaultRules
, langRules
, localeRules
) where
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Numeral.EL.Rules as Numeral
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
localeRules :: Region -> Some Dimension -> [Rule]
localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = []
langRules (This PhoneNumber) = []
langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This Url) = []
langRules (This Volume) = []

View File

@ -45,6 +45,7 @@ library
, Duckling.Rules.CS
, Duckling.Rules.DA
, Duckling.Rules.DE
, Duckling.Rules.EL
, Duckling.Rules.EN
, Duckling.Rules.ES
, Duckling.Rules.ET
@ -82,6 +83,7 @@ library
, Duckling.Ranking.Classifiers.CS_XX
, Duckling.Ranking.Classifiers.DA_XX
, Duckling.Ranking.Classifiers.DE_XX
, Duckling.Ranking.Classifiers.EL_XX
, Duckling.Ranking.Classifiers.EN_CA
, Duckling.Ranking.Classifiers.EN_GB
, Duckling.Ranking.Classifiers.EN_US
@ -125,6 +127,7 @@ library
, Duckling.Dimensions.CS
, Duckling.Dimensions.DA
, Duckling.Dimensions.DE
, Duckling.Dimensions.EL
, Duckling.Dimensions.EN
, Duckling.Dimensions.ES
, Duckling.Dimensions.ET
@ -268,6 +271,8 @@ library
, Duckling.Numeral.DA.Rules
, Duckling.Numeral.DE.Corpus
, Duckling.Numeral.DE.Rules
, Duckling.Numeral.EL.Corpus
, Duckling.Numeral.EL.Rules
, Duckling.Numeral.EN.Corpus
, Duckling.Numeral.EN.Rules
, Duckling.Numeral.ES.Corpus
@ -668,6 +673,7 @@ test-suite duckling-test
, Duckling.Numeral.CS.Tests
, Duckling.Numeral.DA.Tests
, Duckling.Numeral.DE.Tests
, Duckling.Numeral.EL.Tests
, Duckling.Numeral.EN.Tests
, Duckling.Numeral.ES.Tests
, Duckling.Numeral.ET.Tests

View File

@ -163,6 +163,7 @@ getCorpusForLang BG = (testContext, [])
getCorpusForLang CS = (testContext, [])
getCorpusForLang DA = DATime.corpus
getCorpusForLang DE = DETime.corpus
getCorpusForLang EL = (testContext, [])
getCorpusForLang EN = ENTime.corpus
getCorpusForLang ES = ESTime.corpus
getCorpusForLang ET = (testContext, [])

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Numeral.EL.Tests
( tests ) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.EL.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "EL Tests"
[ makeCorpusTest [This Numeral] corpus
]

View File

@ -17,6 +17,7 @@ import qualified Duckling.Numeral.BG.Tests as BG
import qualified Duckling.Numeral.CS.Tests as CS
import qualified Duckling.Numeral.DA.Tests as DA
import qualified Duckling.Numeral.DE.Tests as DE
import qualified Duckling.Numeral.EL.Tests as EL
import qualified Duckling.Numeral.EN.Tests as EN
import qualified Duckling.Numeral.ES.Tests as ES
import qualified Duckling.Numeral.ET.Tests as ET
@ -50,6 +51,7 @@ tests = testGroup "Numeral Tests"
, CS.tests
, DA.tests
, DE.tests
, EL.tests
, EN.tests
, ES.tests
, ET.tests