Added Slovak (sk) language with numeral dimension and tests.

Summary: Pull Request resolved: https://github.com/facebook/duckling/pull/428

Reviewed By: haoxuany

Differential Revision: D18348514

Pulled By: patapizza

fbshipit-source-id: 9b0b9c2caa9fec8330746059eefa6185a8f3e072
This commit is contained in:
Ondrej Kováč 2020-01-10 14:31:19 -08:00 committed by Facebook Github Bot
parent cff1ca0080
commit 294771593d
13 changed files with 546 additions and 0 deletions

View File

@ -57,6 +57,7 @@ import qualified Duckling.Dimensions.PL as PLDimensions
import qualified Duckling.Dimensions.PT as PTDimensions
import qualified Duckling.Dimensions.RO as RODimensions
import qualified Duckling.Dimensions.RU as RUDimensions
import qualified Duckling.Dimensions.SK as SKDimensions
import qualified Duckling.Dimensions.SV as SVDimensions
import qualified Duckling.Dimensions.SW as SWDimensions
import qualified Duckling.Dimensions.TA as TADimensions
@ -133,6 +134,7 @@ langDimensions PL = PLDimensions.allDimensions
langDimensions PT = PTDimensions.allDimensions
langDimensions RO = RODimensions.allDimensions
langDimensions RU = RUDimensions.allDimensions
langDimensions SK = SKDimensions.allDimensions
langDimensions SV = SVDimensions.allDimensions
langDimensions SW = SWDimensions.allDimensions
langDimensions TA = TADimensions.allDimensions

18
Duckling/Dimensions/SK.hs Normal file
View File

@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Dimensions.SK
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]

View File

@ -69,6 +69,7 @@ data Lang
| PT
| RO
| RU
| SK
| SV
| SW
| TA

View File

@ -0,0 +1,110 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.SK.Corpus
( corpus
) where
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus =
( testContext {locale = makeLocale SK Nothing}
, testOptions
, allExamples
)
allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "0"
, "nula"
]
, examples (NumeralValue 1)
[ "1"
, "jeden"
, "jedna"
]
, examples (NumeralValue 2)
[ "2"
, "dva"
, "dve"
]
, examples (NumeralValue 7)
[ "7"
, "sedem"
]
, examples (NumeralValue 14)
[ "14"
, "štrnásť"
]
, examples (NumeralValue 16)
[ "16"
, "šestnásť"
]
, examples (NumeralValue 17)
[ "17"
, "sedemnásť"
]
, examples (NumeralValue 18)
[ "18"
, "osemnásť"
]
, examples (NumeralValue 20)
[ "20"
, "dvadsať"
]
, examples (NumeralValue 1.1)
[ "1,1"
, "1,10"
, "01,10"
]
, examples (NumeralValue 0.77)
[ "0,77"
, ",77"
]
, examples (NumeralValue 100000)
[ "100.000"
, "100000"
, "100K"
, "100k"
]
, examples (NumeralValue 3000000)
[ "3M"
, "3000K"
, "3000000"
, "3.000.000"
]
, examples (NumeralValue 1200000)
[ "1.200.000"
, "1200000"
, "1,2M"
, "1200K"
, ",0012G"
]
, examples (NumeralValue (-1200000))
[ "- 1.200.000"
, "-1200000"
, "mínus 1.200.000"
, "-1,2M"
, "-1200K"
, "-,0012G"
]
, examples (NumeralValue 5000)
[ "5 tisíc"
, "päť tisíc"
]
]

View File

@ -0,0 +1,309 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.SK.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntersectWithAnd :: Rule
ruleIntersectWithAnd = Rule
{ name = "intersect (with and)"
, pattern =
[ Predicate hasGrain
, regex "a"
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
_:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern =
[ regex "-|mínus|záporné"
, Predicate isPositive
]
, prod = \case
(_:
Token Numeral NumeralData{TNumeral.value = v}:
_) -> double $ v * (-1)
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
, pattern =
[ regex "(zo)?pár"
]
, prod = \_ -> integer 3
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
, pattern =
[ regex "(\\d+(\\.\\d\\d\\d)+\\,\\d+)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):
_) -> let fmt = Text.replace "," "." $ Text.replace "." Text.empty match
in parseDouble fmt >>= double
_ -> Nothing
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule
{ name = "decimal number"
, pattern =
[ regex "(\\d*,\\d+)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):
_) -> parseDecimal False match
_ -> Nothing
}
ruleIntegerCompositeTens :: Rule
ruleIntegerCompositeTens = Rule
{ name = "integer 21..99"
, pattern =
[ oneOf [20, 30..90]
, numberBetween 1 10
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = tens}:
Token Numeral NumeralData{TNumeral.value = units}:
_) -> double $ tens + units
_ -> Nothing
}
ruleSingle :: Rule
ruleSingle = Rule
{ name = "single"
, pattern =
[ regex "jed(en|no|na)"
]
, prod = \_ -> integer 1 >>= withGrain 1
}
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect"
, pattern =
[ Predicate hasGrain
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, Predicate isMultipliable
]
, prod = \case
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule
{ name = "numbers suffixes (K, M, G)"
, pattern =
[ dimension Numeral
, regex "([kmg])(?=[\\W\\$€]|$)"
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v}:
Token RegexMatch (GroupMatch (match:_)):
_) -> case Text.toLower match of
"k" -> double $ v * 1e3
"m" -> double $ v * 1e6
"g" -> double $ v * 1e9
_ -> Nothing
_ -> Nothing
}
rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern =
[ regex "(sto(vky)?|tisíce?|milióny?)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"sto" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"stovky" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"tisíc" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"tisíce" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"milión" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"milióny" -> double 1e6 >>= withGrain 6 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleCouple :: Rule
ruleCouple = Rule
{ name = "couple, a pair"
, pattern =
[ regex "páry?"
]
, prod = \_ -> integer 2
}
ruleDozen :: Rule
ruleDozen = Rule
{ name = "dozen"
, pattern =
[ regex "tucet"
]
, prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable
}
zeroToNineteenMap :: HashMap Text Integer
zeroToNineteenMap = HashMap.fromList
[ ( "nula" , 0 )
, ( "jeden" , 1 )
, ( "jedna" , 1 )
, ( "jedno" , 1 )
, ( "dva" , 2 )
, ( "dve" , 2 )
, ( "tri" , 3 )
, ( "štyri" , 4 )
, ( "päť" , 5 )
, ( "šesť" , 6 )
, ( "sedem" , 7 )
, ( "osem" , 8 )
, ( "deväť" , 9 )
, ( "desať" , 10 )
, ( "jedenásť", 11 )
, ( "dvanásť" , 12 )
, ( "trinásť" , 13 )
, ( "štrnásť" , 14 )
, ( "pätnásť" , 15 )
, ( "šestnásť", 16 )
, ( "sedemnásť", 17 )
, ( "osemnásť" , 18 )
, ( "devätnásť", 19 )
]
ruleInteger :: Rule
ruleInteger = Rule
{ name = "integer (0..19)"
-- e.g. jedenásť must be before jeden, otherwise jeden will always shadow jedenásť
, pattern =
[ regex "(nula|jed(enásť|en|na|no)|dv(anásť|a|e)|trinásť|tri|štrnásť|štyri|pätnásť|päť|šestnásť|šesť|sedemnásť|sedem|osemnásť|osem|devätnásť|deväť|desať)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) zeroToNineteenMap >>= integer
_ -> Nothing
}
dozenMap :: HashMap Text Integer
dozenMap = HashMap.fromList
[ ( "dvadsať" , 20)
, ( "tridsať" , 30)
, ( "štyridsať" , 40)
, ( "päťdesiat" , 50)
, ( "šesťdesiat" , 60)
, ( "sedemdesiat" , 70)
, ( "osemdesiat" , 80)
, ( "devätdesiat" , 90)
]
ruleInteger2 :: Rule
ruleInteger2 = Rule
{ name = "integer (20..90)"
, pattern =
[ regex "((dva|tri|štyri)dsať|(päť|šesť|sedem|osem|devät)desiat)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) dozenMap >>= integer
_ -> Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule
{ name = "number dot number"
, pattern =
[ dimension Numeral
, regex "celá|celých|celé"
, Predicate $ not . hasGrain
]
, prod = \case
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule
{ name = "integer with thousands separator ."
, pattern =
[ regex "(\\d{1,3}(\\.\\d\\d\\d){1,5})"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):
_) -> let fmt = Text.replace "." Text.empty match
in parseDouble fmt >>= double
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleCouple
, ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleDozen
, ruleFew
, ruleInteger
, ruleInteger2
, ruleIntegerCompositeTens
, ruleIntegerWithThousandsSeparator
, ruleSum
, ruleIntersectWithAnd
, ruleMultiply
, ruleNumeralDotNumeral
, ruleNumeralsPrefixWithNegativeOrMinus
, ruleNumeralsSuffixesKMG
, rulePowersOfTen
, ruleSingle
]

View File

@ -51,6 +51,7 @@ import qualified Duckling.Ranking.Classifiers.PL_XX as PL_XXClassifiers
import qualified Duckling.Ranking.Classifiers.PT_XX as PT_XXClassifiers
import qualified Duckling.Ranking.Classifiers.RO_XX as RO_XXClassifiers
import qualified Duckling.Ranking.Classifiers.RU_XX as RU_XXClassifiers
import qualified Duckling.Ranking.Classifiers.SK_XX as SK_XXClassifiers
import qualified Duckling.Ranking.Classifiers.SV_XX as SV_XXClassifiers
import qualified Duckling.Ranking.Classifiers.SW_XX as SW_XXClassifiers
import qualified Duckling.Ranking.Classifiers.TA_XX as TA_XXClassifiers
@ -99,6 +100,7 @@ classifiers (Locale PL _) = PL_XXClassifiers.classifiers
classifiers (Locale PT _) = PT_XXClassifiers.classifiers
classifiers (Locale RO _) = RO_XXClassifiers.classifiers
classifiers (Locale RU _) = RU_XXClassifiers.classifiers
classifiers (Locale SK _) = SK_XXClassifiers.classifiers
classifiers (Locale SV _) = SV_XXClassifiers.classifiers
classifiers (Locale SW _) = SW_XXClassifiers.classifiers
classifiers (Locale TA _) = TA_XXClassifiers.classifiers

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.SK_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -58,6 +58,7 @@ import qualified Duckling.Rules.PL as PLRules
import qualified Duckling.Rules.PT as PTRules
import qualified Duckling.Rules.RO as RORules
import qualified Duckling.Rules.RU as RURules
import qualified Duckling.Rules.SK as SKRules
import qualified Duckling.Rules.SV as SVRules
import qualified Duckling.Rules.SW as SWRules
import qualified Duckling.Rules.TA as TARules
@ -124,6 +125,7 @@ defaultRules PL = PLRules.defaultRules
defaultRules PT = PTRules.defaultRules
defaultRules RO = RORules.defaultRules
defaultRules RU = RURules.defaultRules
defaultRules SK = SKRules.defaultRules
defaultRules SV = SVRules.defaultRules
defaultRules SW = SWRules.defaultRules
defaultRules TA = TARules.defaultRules
@ -170,6 +172,7 @@ localeRules PL = PLRules.localeRules
localeRules PT = PTRules.localeRules
localeRules RO = RORules.localeRules
localeRules RU = RURules.localeRules
localeRules SK = SKRules.localeRules
localeRules SV = SVRules.localeRules
localeRules SW = SWRules.localeRules
localeRules TA = TARules.localeRules
@ -216,6 +219,7 @@ langRules PL = PLRules.langRules
langRules PT = PTRules.langRules
langRules RO = RORules.langRules
langRules RU = RURules.langRules
langRules SK = SKRules.langRules
langRules SV = SVRules.langRules
langRules SW = SWRules.langRules
langRules TA = TARules.langRules

46
Duckling/Rules/SK.hs Normal file
View File

@ -0,0 +1,46 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
module Duckling.Rules.SK
( defaultRules
, langRules
, localeRules
) where
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Numeral.SK.Rules as Numeral
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
localeRules :: Region -> Some Dimension -> [Rule]
localeRules region (This (CustomDimension dim)) = dimLocaleRules region dim
localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This CreditCardNumber) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = []
langRules (This PhoneNumber) = []
langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This Url) = []
langRules (This Volume) = []
langRules (This (CustomDimension dim)) = dimLangRules SK dim

View File

@ -84,6 +84,7 @@ library
, Duckling.Rules.PT
, Duckling.Rules.RO
, Duckling.Rules.RU
, Duckling.Rules.SK
, Duckling.Rules.SV
, Duckling.Rules.SW
, Duckling.Rules.TA
@ -140,6 +141,7 @@ library
, Duckling.Ranking.Classifiers.PT_XX
, Duckling.Ranking.Classifiers.RO_XX
, Duckling.Ranking.Classifiers.RU_XX
, Duckling.Ranking.Classifiers.SK_XX
, Duckling.Ranking.Classifiers.SV_XX
, Duckling.Ranking.Classifiers.SW_XX
, Duckling.Ranking.Classifiers.TA_XX
@ -194,6 +196,7 @@ library
, Duckling.Dimensions.PT
, Duckling.Dimensions.RO
, Duckling.Dimensions.RU
, Duckling.Dimensions.SK
, Duckling.Dimensions.SV
, Duckling.Dimensions.SW
, Duckling.Dimensions.TA
@ -458,6 +461,8 @@ library
, Duckling.Numeral.RO.Rules
, Duckling.Numeral.RU.Corpus
, Duckling.Numeral.RU.Rules
, Duckling.Numeral.SK.Corpus
, Duckling.Numeral.SK.Rules
, Duckling.Numeral.SV.Corpus
, Duckling.Numeral.SV.Rules
, Duckling.Numeral.SW.Corpus
@ -959,6 +964,7 @@ test-suite duckling-test
, Duckling.Numeral.PT.Tests
, Duckling.Numeral.RO.Tests
, Duckling.Numeral.RU.Tests
, Duckling.Numeral.SK.Tests
, Duckling.Numeral.SV.Tests
, Duckling.Numeral.SW.Tests
, Duckling.Numeral.TA.Tests

View File

@ -206,6 +206,7 @@ getCorpusForLang PL = PLTime.corpus
getCorpusForLang PT = PTTime.corpus
getCorpusForLang RO = ROTime.corpus
getCorpusForLang RU = (testContext, testOptions, [])
getCorpusForLang SK = (testContext, testOptions, [])
getCorpusForLang SV = SVTime.corpus
getCorpusForLang SW = (testContext, testOptions, [])
getCorpusForLang TA = (testContext, testOptions, [])

View File

@ -0,0 +1,23 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Numeral.SK.Tests
( tests ) where
import Prelude
import Data.String
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.SK.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "SK Tests"
[ makeCorpusTest [This Numeral] corpus
]

View File

@ -47,6 +47,7 @@ import qualified Duckling.Numeral.PL.Tests as PL
import qualified Duckling.Numeral.PT.Tests as PT
import qualified Duckling.Numeral.RO.Tests as RO
import qualified Duckling.Numeral.RU.Tests as RU
import qualified Duckling.Numeral.SK.Tests as SK
import qualified Duckling.Numeral.SV.Tests as SV
import qualified Duckling.Numeral.SW.Tests as SW
import qualified Duckling.Numeral.TA.Tests as TA
@ -94,6 +95,7 @@ tests = testGroup "Numeral Tests"
, PT.tests
, RO.tests
, RU.tests
, SK.tests
, SV.tests
, SW.tests
, TA.tests