Numeral/DE: fix keiner + cleanup

Summary:
* fixed keine[rn]
* removed redundant `ruleInteger`
* replaced pattern matching with hashmap lookup
* renamed `dozensMap` to `tensMap`

Reviewed By: blandinw

Differential Revision: D6336252

fbshipit-source-id: 740734ab7b0b289adc4f466f966c4c5e59af75ad
This commit is contained in:
Julien Odent 2017-11-15 13:08:50 -08:00 committed by Facebook Github Bot
parent 29d776dee5
commit 54ccbf81df
5 changed files with 40 additions and 70 deletions

View File

@ -9,10 +9,11 @@
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.DE.Corpus
( corpus ) where
( corpus
) where
import Prelude
import Data.String
import Prelude
import Duckling.Locale
import Duckling.Numeral.Types
@ -27,6 +28,8 @@ allExamples = concat
[ examples (NumeralValue 0)
[ "0"
, "null"
, "keiner"
, "keinen"
]
, examples (NumeralValue 1)
[ "1"
@ -36,6 +39,12 @@ allExamples = concat
[ "3"
, "Drei"
]
, examples (NumeralValue 8)
[ "acht"
]
, examples (NumeralValue 18)
[ "achtzehn"
]
, examples (NumeralValue 30)
[ "30"
, "dreissig"

View File

@ -264,59 +264,29 @@ zeroNineteenMap = HashMap.fromList
]
-- TODO: Single-word composition (#110)
ruleToNineteen :: Rule
ruleToNineteen = Rule
ruleZeroToNineteen :: Rule
ruleZeroToNineteen = Rule
{ name = "integer (0..19)"
-- e.g. fourteen must be before four,
-- otherwise four will always shadow fourteen
, pattern = [regex "(keine?|keine?s|keiner|keinen|null|nichts|eins?(er)?|zwei|dreizehn|drei|vierzehn|vier|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf|füfzehn)"]
, pattern =
[ regex "(keine[rn]|keine?s?|null|nichts|eins?(er)?|zwei|dreizehn|drei|vierzehn|vier|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf|füfzehn)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) zeroNineteenMap >>= integer
_ -> Nothing
}
ruleInteger :: Rule
ruleInteger = Rule
{ name = "integer (0..19)"
, pattern =
[ regex "(keine?|keine?s|keiner|keinen|null|nichts|eins?(er)?|zwei|dreizehn|drei|vierzehn|vier|fünf|sechzehn|sechs|siebzehn|sieben|achtzehn|acht|neunzehn|neun|elf|zwölf|füfzehn)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"nichts" -> integer 0
"keine" -> integer 0
"null" -> integer 0
"keiner" -> integer 0
"kein" -> integer 0
"keins" -> integer 0
"keinen" -> integer 0
"keines" -> integer 0
"einer" -> integer 1
"eins" -> integer 1
"ein" -> integer 1
"eine" -> integer 1
"zwei" -> integer 2
"drei" -> integer 3
"vier" -> integer 4
"fünf" -> integer 5
"sechs" -> integer 6
"sieben" -> integer 7
"acht" -> integer 8
"neun" -> integer 9
"zehn" -> integer 10
"elf" -> integer 11
"zwölf" -> integer 12
"dreizehn" -> integer 13
"vierzehn" -> integer 14
"fünfzehn" -> integer 15
"sechzehn" -> integer 16
"siebzehn" -> integer 17
"achtzehn" -> integer 18
"neunzehn" -> integer 19
_ -> Nothing
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "zwanzig" , 20 )
, ( "dreissig", 30 )
, ( "vierzig" , 40 )
, ( "fünfzig" , 50 )
, ( "sechzig" , 60 )
, ( "siebzig" , 70 )
, ( "achtzig" , 80 )
, ( "neunzig" , 90 )
]
-- TODO: Single-word composition (#110)
ruleInteger2 :: Rule
@ -326,16 +296,8 @@ ruleInteger2 = Rule
[ regex "(zwanzig|dreissig|vierzig|fünfzig|sechzig|siebzig|achtzig|neunzig)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"zwanzig" -> integer 20
"dreissig" -> integer 30
"vierzig" -> integer 40
"fünfzig" -> integer 50
"sechzig" -> integer 60
"siebzig" -> integer 70
"achtzig" -> integer 80
"neunzig" -> integer 90
_ -> Nothing
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
@ -374,7 +336,6 @@ rules =
, ruleDecimalWithThousandsSeparator
, ruleDozen
, ruleFew
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
@ -387,5 +348,5 @@ rules =
, ruleNumeralsUnd
, rulePowersOfTen
, ruleTen
, ruleToNineteen
, ruleZeroToNineteen
]

View File

@ -121,8 +121,8 @@ ruleTwentyoneToTwentynine = Rule
_ -> Nothing
}
dozensMap :: HashMap Text Integer
dozensMap = HashMap.fromList
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "h\x00FAsz", 20 )
, ( "harminc", 30 )
, ( "negyven", 40 )
@ -141,7 +141,7 @@ ruleTens = Rule
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) dozensMap >>= integer
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
@ -153,7 +153,7 @@ ruleCompositeTens = Rule
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
v1 <- HashMap.lookup (Text.toLower m1) dozensMap
v1 <- HashMap.lookup (Text.toLower m1) tensMap
v2 <- HashMap.lookup (Text.toLower m2) ruleNumeralMap
integer $ v1 + v2
_ -> Nothing

View File

@ -248,8 +248,8 @@ ruleInteger = Rule
_ -> Nothing
}
dozensMap :: HashMap Text Integer
dozensMap = HashMap.fromList
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "tyve" , 20 )
, ( "tjue" , 20 )
, ( "tredve" , 30 )
@ -271,7 +271,7 @@ ruleInteger2 = Rule
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) dozensMap >>= integer
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}

View File

@ -28,8 +28,8 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
dozensMap :: HashMap Text Integer
dozensMap = HashMap.fromList
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "двадцать", 20)
, ( "тридцать", 30)
, ( "сорок", 40)
@ -48,7 +48,7 @@ ruleInteger5 = Rule
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) dozensMap >>= integer
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}