Numeral/ES: Style fixes - names, order, etc

Summary:
Whle looking into fixing https://github.com/facebook/duckling/issues/380
I was having a bit of trouble navigating the existing rules and guessing
what is / is not supported.

This diff refactors the Numeral/ES code to be easier to navigate:
- rename all the `ruleNumeral{1,2,3,4,5,6}` rules to be descriptive
- changes the order to be themed from small to large numbers, and
  make sure the order of defines matches the order of rules at the end
  of the module
- use [20 .. 90] instead of manually specifying the same list out-of-order

Reviewed By: chessai

Differential Revision: D27858134

fbshipit-source-id: b13983d75b36bb4e2b387ef06fe61066d81ae19a
This commit is contained in:
Steven Troxler 2021-04-23 09:35:59 -07:00 committed by Facebook GitHub Bot
parent bf696ba185
commit 6db071069b

View File

@ -24,42 +24,6 @@ import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Regex.Types
import Duckling.Types
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern = [regex "-|menos", Predicate isPositive]
, prod = \tokens -> case tokens of
(_ : Token Numeral NumeralData { TNumeral.value = v } : _) ->
double $ v * (-1)
_ -> Nothing
}
byTensMap :: HashMap.HashMap Text.Text Integer
byTensMap =
HashMap.fromList
[ ("veinte", 20)
, ("treinta", 30)
, ("cuarenta", 40)
, ("cincuenta", 50)
, ("sesenta", 60)
, ("setenta", 70)
, ("ochenta", 80)
, ("noventa", 90)
]
ruleNumeral2 :: Rule
ruleNumeral2 = Rule
{ name = "number (20..90)"
, pattern =
[ regex
"(veinte|treinta|cuarenta|cincuenta|sesenta|setenta|ochenta|noventa)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match : _)) : _) ->
HashMap.lookup (Text.toLower match) byTensMap >>= integer
_ -> Nothing
}
zeroToFifteenMap :: HashMap.HashMap Text.Text Integer
zeroToFifteenMap =
HashMap.fromList
@ -87,8 +51,8 @@ zeroToFifteenMap =
, ("quince", 15)
]
ruleNumeral :: Rule
ruleNumeral = Rule
ruleNumeralZeroToFifteen :: Rule
ruleNumeralZeroToFifteen = Rule
{ name = "number (0..15)"
, pattern =
[ regex
@ -100,6 +64,24 @@ ruleNumeral = Rule
_ -> Nothing
}
ruleBelowTenWithTwoDigits :: Rule
ruleBelowTenWithTwoDigits = Rule
{
name = "integer (0-9) with two digits"
, pattern =
[
regex "((c|z)ero)|0"
, numberBetween 1 10
]
, prod = \case
(
_:
Token Numeral NumeralData { TNumeral.value = v }:
_
) -> double v
_ -> Nothing
}
sixteenToTwentyNineMap :: HashMap.HashMap Text.Text Integer
sixteenToTwentyNineMap =
HashMap.fromList
@ -125,8 +107,9 @@ sixteenToTwentyNineMap =
, ("veintinueve", 29)
]
ruleNumeral5 :: Rule
ruleNumeral5 = Rule
ruleNumeralSixteenToTwentyNine :: Rule
ruleNumeralSixteenToTwentyNine = Rule
{ name = "number (16..19 21..29)"
, pattern =
[ regex
@ -138,27 +121,50 @@ ruleNumeral5 = Rule
_ -> Nothing
}
ruleNumeral3 :: Rule
ruleNumeral3 = Rule
{ name = "number (16..19)"
ruleNumeralSixteenToNineteenWithDiez :: Rule
ruleNumeralSixteenToNineteenWithDiez = Rule
{ name = "number (16..19, two words)"
, pattern = [numberWith TNumeral.value (== 10), regex "y", numberBetween 6 10]
, prod = \tokens -> case tokens of
(_ : _ : Token Numeral NumeralData { TNumeral.value = v } : _) ->
double $ 10 + v
_ -> Nothing
}
byTensMap :: HashMap.HashMap Text.Text Integer
byTensMap =
HashMap.fromList
[ ("veinte", 20)
, ("treinta", 30)
, ("cuarenta", 40)
, ("cincuenta", 50)
, ("sesenta", 60)
, ("setenta", 70)
, ("ochenta", 80)
, ("noventa", 90)
]
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule
{ name = "numbers suffixes (K, M, G)"
, pattern = [dimension Numeral, regex "([kmg])(?=[\\W\\$€]|$)"]
ruleNumeralTwentyToNinetyTens :: Rule
ruleNumeralTwentyToNinetyTens = Rule
{ name = "number (20..90)"
, pattern =
[ regex
"(veinte|treinta|cuarenta|cincuenta|sesenta|setenta|ochenta|noventa)"
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData { TNumeral.value = v } : Token RegexMatch (GroupMatch (match : _)) : _) ->
case Text.toLower match of
"k" -> double $ v * 1e3
"m" -> double $ v * 1e6
"g" -> double $ v * 1e9
_ -> Nothing
(Token RegexMatch (GroupMatch (match : _)) : _) ->
HashMap.lookup (Text.toLower match) byTensMap >>= integer
_ -> Nothing
}
ruleNumeralTwentyOneToNinetyNine :: Rule
ruleNumeralTwentyOneToNinetyNine = Rule
{ name = "number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)"
, pattern =
[oneOf [20, 30 .. 90], regex "y", numberBetween 1 10]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData { TNumeral.value = v1 } : _ : Token Numeral NumeralData { TNumeral.value = v2 } : _) ->
double $ v1 + v2
_ -> Nothing
}
@ -179,8 +185,8 @@ oneHundredToThousandMap =
, ("mil", 1000)
]
ruleNumeral6 :: Rule
ruleNumeral6 = Rule
ruleNumeralHundredsToMil :: Rule
ruleNumeralHundredsToMil = Rule
{ name = "number 100..1000 "
, pattern =
[ regex
@ -192,20 +198,9 @@ ruleNumeral6 = Rule
_ -> Nothing
}
ruleNumeral4 :: Rule
ruleNumeral4 = Rule
{ name = "number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)"
, pattern =
[oneOf [70, 20, 60, 50, 40, 90, 30, 80], regex "y", numberBetween 1 10]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData { TNumeral.value = v1 } : _ : Token Numeral NumeralData { TNumeral.value = v2 } : _) ->
double $ v1 + v2
_ -> Nothing
}
ruleNumerals :: Rule
ruleNumerals = Rule
{ name = "numbers 200..999"
ruleNumeralThreePartHundreds :: Rule
ruleNumeralThreePartHundreds = Rule
{ name = "<2-10> cientos <0-99>"
, pattern =
[ numberBetween 2 10
, numberWith TNumeral.value (== 100)
@ -227,35 +222,42 @@ ruleNumeralDotNumeral = Rule
_ -> Nothing
}
ruleBelowTenWithTwoDigits :: Rule
ruleBelowTenWithTwoDigits = Rule
{
name = "integer (0-9) with two digits"
, pattern =
[
regex "((c|z)ero)|0"
, numberBetween 1 10
]
, prod = \case
(
_:
Token Numeral NumeralData { TNumeral.value = v }:
_
) -> double v
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule
{ name = "numbers suffixes (K, M, G)"
, pattern = [dimension Numeral, regex "([kmg])(?=[\\W\\$€]|$)"]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData { TNumeral.value = v } : Token RegexMatch (GroupMatch (match : _)) : _) ->
case Text.toLower match of
"k" -> double $ v * 1e3
"m" -> double $ v * 1e6
"g" -> double $ v * 1e9
_ -> Nothing
_ -> Nothing
}
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern = [regex "-|menos", Predicate isPositive]
, prod = \tokens -> case tokens of
(_ : Token Numeral NumeralData { TNumeral.value = v } : _) ->
double $ v * (-1)
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleBelowTenWithTwoDigits
, ruleNumeral
, ruleNumeral2
, ruleNumeral3
, ruleNumeral4
, ruleNumeral5
, ruleNumeral6
[ ruleNumeralZeroToFifteen
, ruleBelowTenWithTwoDigits
, ruleNumeralSixteenToTwentyNine
, ruleNumeralSixteenToNineteenWithDiez
, ruleNumeralTwentyToNinetyTens
, ruleNumeralTwentyOneToNinetyNine
, ruleNumeralHundredsToMil
, ruleNumeralThreePartHundreds
, ruleNumeralDotNumeral
, ruleNumerals
, ruleNumeralsPrefixWithNegativeOrMinus
, ruleNumeralsSuffixesKMG
, ruleNumeralsPrefixWithNegativeOrMinus
]