duckling/Duckling/Numeral/CA/Rules.hs
leandro.guisandez@pgconocimiento.com 7907812184 Initialise Catalan language with Numeral
Summary: Adds Catalan language and Numeral rules for it

Reviewed By: haoxuany

Differential Revision: D26518604

Pulled By: chessai

fbshipit-source-id: e6b4b0ceb9b7931d086c732dd03fb5cbbe062d5b
2021-04-08 14:47:02 -07:00

301 lines
7.8 KiB
Haskell

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.CA.Rules (rules) where
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import Data.Maybe
import Data.String
import Data.Text (Text)
import qualified Data.Text as Text
import Prelude
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData(..))
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Regex.Types
import Duckling.Types
zeroToFifteenMap :: HashMap Text Integer
zeroToFifteenMap =
HashMap.fromList
[ ("zero", 0)
, ("u", 1)
, ("un", 1)
, ("una", 1)
, ("dos", 2)
, ("dues", 2)
, ("tres", 3)
, ("quatre", 4)
, ("cinc", 5)
, ("sis", 6)
, ("set", 7)
, ("vuit", 8)
, ("nou", 9)
, ("deu", 10)
, ("onze", 11)
, ("dotze", 12)
, ("tretze", 13)
, ("catorze", 14)
, ("quinze", 15)
]
ruleZeroToFifteen :: Rule
ruleZeroToFifteen = Rule
{ name = "number (0..15)"
, pattern =
[ regex
"(zero|u(na|n)?|d(o|ue)s|tres|quatre|cinc|sis|set|vuit|nou|deu|onze|dotze|tretze|catorze|quinze)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) zeroToFifteenMap >>= integer
_ -> Nothing
}
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern = [regex "-|menys", Predicate isPositive]
, prod = \case
(_ : Token Numeral NumeralData { TNumeral.value = v } : _) ->
double $ negate v
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap =
HashMap.fromList
[ ("vint", 20)
, ("trenta", 30)
, ("quaranta", 40)
, ("cinquanta", 50)
, ("seixanta", 60)
, ("setanta", 70)
, ("vuitanta", 80)
, ("noranta", 90)
]
ruleTens :: Rule
ruleTens = Rule
{ name = "number (20..90)"
, pattern =
[ regex
"(vint|(tre|quara|cinqua|seixa|seta|vuita|nora)nta)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)) : _) ->
HashMap.lookup (Text.toLower match) tensMap >>= integer
_ -> Nothing
}
sixteenToTwentyNineMap :: HashMap Text Integer
sixteenToTwentyNineMap =
HashMap.fromList
[ ("setze", 16)
, ("disset", 17)
, ("dèsset", 17)
, ("devuit", 18)
, ("divuit", 18)
, ("dihuit", 18)
, ("dinou", 19)
, ("dènou", 19)
, ("denou", 19)
, ("vint-i-u", 21)
, ("vint-i-una", 21)
, ("vint-i-dos", 22)
, ("vint-i-tres", 23)
, ("vint-i-quatre", 24)
, ("vint-i-cinc", 25)
, ("vint-i-sis", 26)
, ("vint-i-set", 27)
, ("vint-i-vuit", 28)
, ("vint-i-nou", 29)
]
ruleLowerTensWithOnes :: Rule
ruleLowerTensWithOnes = Rule
{ name = "number (16..19 21..29)"
, pattern =
[ regex
"(setze|d(i|e|è)sset|d(e|i)(v|h)uit|d(i|e|è)nou|vint-i-u(na)?|vint-i-dos|vint-i-tres|vint-i-quatre|vint-i-cinc|vint-i-sis|vint-i-set|vint-i-vuit|vint-i-nou)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) sixteenToTwentyNineMap >>= integer
_ -> Nothing
}
ruleHigherTensWithOnes :: Rule
ruleHigherTensWithOnes = Rule
{ name = "number (31..39 41..49 51..59 61..69 71..79 81..89 91..99)"
, pattern =
[oneOf [30, 40, 50, 60, 70, 80, 90], regex "-", numberBetween 1 9]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v1}:
_:
Token Numeral NumeralData{TNumeral.value = v2}:
_) -> double $ v1 + v2
_ -> Nothing
}
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule
{ name = "numbers suffixes (K, M, G)"
, pattern = [dimension Numeral, regex "([kmg])(?=[\\W\\$€]|$)"]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v}:
Token RegexMatch (GroupMatch (match:_)):
_) ->
case Text.toLower match of
"k" -> double $ v * 1e3
"m" -> double $ v * 1e6
"g" -> double $ v * 1e9
_ -> Nothing
_ -> Nothing
}
oneHundredToThousandMap :: HashMap Text Integer
oneHundredToThousandMap =
HashMap.fromList
[ ("cent", 100)
, ("cents", 100)
, ("dos-cents", 200)
, ("tres-cents", 300)
, ("quatre-cents", 400)
, ("cinc-cents", 500)
, ("sis-cents", 600)
, ("set-cents", 700)
, ("vuit-cents", 800)
, ("nou-cents", 900)
, ("mil", 1000)
]
ruleTwenties :: Rule
ruleTwenties = Rule
{ name = "number (21..29)"
, pattern =
[oneOf [20], regex "(-i-| i )", numberBetween 1 10]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v1}:
_:
Token Numeral NumeralData{TNumeral.value = v2}:
_) -> double $ v1 + v2
_ -> Nothing
}
ruleHundreds :: Rule
ruleHundreds = Rule
{ name = "number 100..1000 "
, pattern =
[ regex
"(cent(s)?|dos-cents|tres-cents|quatre-cents|cinc-cents|sis-cents|set-cents|vuit-cents|nou-cents|mil)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) oneHundredToThousandMap >>= integer
_ -> Nothing
}
-- Afegeixo regex "-" perque les centenes s'escriuen dos-cent, tres-cent
ruleNumerals :: Rule
ruleNumerals = Rule
{ name = "numbers 200..999"
, pattern =
[ numberBetween 2 10
, regex "-"
, numberWith TNumeral.value (== 100)
, numberBetween 0 100
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v1}:
_:
Token Numeral NumeralData{TNumeral.value = v2}:
_) -> double $ 100 * v1 + v2
_ -> Nothing
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule
{ name = "number dot number"
, pattern = [dimension Numeral, regex "coma", Predicate $ not . hasGrain]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = v1}:
_:
Token Numeral NumeralData{TNumeral.value = v2}:
_) -> double $ v1 + decimalsToDouble v2
_ -> Nothing
}
ruleBelowTenWithTwoDigits :: Rule
ruleBelowTenWithTwoDigits = Rule
{ name = "integer (0-9) with two digits"
, pattern =
[ regex "zero|0"
, numberBetween 1 10
]
, prod = \case
(_:Token Numeral NumeralData{TNumeral.value = v}:_) -> double v
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator ."
, pattern = [regex "(\\d+(\\.\\d\\d\\d)+,\\d+)"]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
let fmt = Text.replace "," "." . Text.replace "." Text.empty $ match
in parseDouble fmt >>= double
_ -> Nothing
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule
{ name = "decimal number ,"
, pattern = [regex "(\\d*,\\d+)"]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDecimal False match
_ -> Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule
{ name = "integer with thousands separator ."
, pattern = [regex "(\\d{1,3}(\\.\\d\\d\\d){1,5})"]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace "." Text.empty match) >>= double
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleBelowTenWithTwoDigits
, ruleZeroToFifteen
, ruleTens
, ruleTwenties
, ruleLowerTensWithOnes
, ruleHigherTensWithOnes
, ruleHundreds
, ruleNumeralDotNumeral
, ruleNumerals
, ruleNumeralsPrefixWithNegativeOrMinus
, ruleNumeralsSuffixesKMG
, ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleIntegerWithThousandsSeparator
]