mirror of
https://github.com/facebook/duckling.git
synced 2024-12-01 08:19:36 +03:00
83ea150d94
Summary: We noticed that using UTF-8 characters directly in regexes work. Hence converting back the escaped characters for readability and maintenance. Reviewed By: blandinw Differential Revision: D5787146 fbshipit-source-id: e5a4b9a
308 lines
8.3 KiB
Haskell
308 lines
8.3 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Duckling.Numeral.PT.Rules
|
|
( rules ) where
|
|
|
|
import Data.Maybe
|
|
import qualified Data.Text as Text
|
|
import Prelude
|
|
import Data.String
|
|
|
|
import Duckling.Dimensions.Types
|
|
import Duckling.Numeral.Helpers
|
|
import Duckling.Numeral.Types (NumeralData (..))
|
|
import qualified Duckling.Numeral.Types as TNumeral
|
|
import Duckling.Regex.Types
|
|
import Duckling.Types
|
|
|
|
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
|
|
ruleNumeralsPrefixWithNegativeOrMinus = Rule
|
|
{ name = "numbers prefix with -, negative or minus"
|
|
, pattern =
|
|
[ regex "-|menos"
|
|
, dimension Numeral
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1))
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntegerNumeric :: Rule
|
|
ruleIntegerNumeric = Rule
|
|
{ name = "integer (numeric)"
|
|
, pattern =
|
|
[ regex "(\\d{1,18})"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> do
|
|
v <- parseInt match
|
|
integer $ toInteger v
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecimalWithThousandsSeparator :: Rule
|
|
ruleDecimalWithThousandsSeparator = Rule
|
|
{ name = "decimal with thousands separator"
|
|
, pattern =
|
|
[ regex "(\\d+(\\.\\d\\d\\d)+,\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> let dot = Text.singleton '.'
|
|
comma = Text.singleton ','
|
|
fmt = Text.replace comma dot $ Text.replace dot Text.empty match
|
|
in parseDouble fmt >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecimalNumeral :: Rule
|
|
ruleDecimalNumeral = Rule
|
|
{ name = "decimal number"
|
|
, pattern =
|
|
[ regex "(\\d*,\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> parseDecimal False match
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeral2 :: Rule
|
|
ruleNumeral2 = Rule
|
|
{ name = "number (20..90)"
|
|
, pattern =
|
|
[ regex "(vinte|trinta|quarenta|cincoenta|cinq(ü)enta|cinquenta|sessenta|setenta|oitenta|noventa)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"vinte" -> integer 20
|
|
"trinta" -> integer 30
|
|
"quarenta" -> integer 40
|
|
"cinq\252enta" -> integer 50
|
|
"cincoenta" -> integer 50
|
|
"cinquenta" -> integer 50
|
|
"sessenta" -> integer 60
|
|
"setenta" -> integer 70
|
|
"oitenta" -> integer 80
|
|
"noventa" -> integer 90
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeral :: Rule
|
|
ruleNumeral = Rule
|
|
{ name = "number (0..15)"
|
|
, pattern =
|
|
[ regex "(zero|uma?|d(oi|ua)s|tr(ê|e)s|quatro|cinco|seis|sete|oito|nove|dez|onze|doze|treze|(ca|qua)torze|quinze)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"zero" -> integer 0
|
|
"uma" -> integer 1
|
|
"um" -> integer 1
|
|
"dois" -> integer 2
|
|
"duas" -> integer 2
|
|
"três" -> integer 3
|
|
"tres" -> integer 3
|
|
"quatro" -> integer 4
|
|
"cinco" -> integer 5
|
|
"seis" -> integer 6
|
|
"sete" -> integer 7
|
|
"oito" -> integer 8
|
|
"nove" -> integer 9
|
|
"dez" -> integer 10
|
|
"onze" -> integer 11
|
|
"doze" -> integer 12
|
|
"treze" -> integer 13
|
|
"catorze" -> integer 14
|
|
"quatorze" -> integer 14
|
|
"quinze" -> integer 15
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeral5 :: Rule
|
|
ruleNumeral5 = Rule
|
|
{ name = "number (16..19)"
|
|
, pattern =
|
|
[ regex "(dez[ea]sseis|dez[ea]ssete|dezoito|dez[ea]nove)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"dezesseis" -> integer 16
|
|
"dezasseis" -> integer 16
|
|
"dezessete" -> integer 17
|
|
"dezassete" -> integer 17
|
|
"dezoito" -> integer 18
|
|
"dezenove" -> integer 19
|
|
"dezanove" -> integer 19
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeral3 :: Rule
|
|
ruleNumeral3 = Rule
|
|
{ name = "number (16..19)"
|
|
, pattern =
|
|
[ numberWith TNumeral.value (== 10)
|
|
, regex "e"
|
|
, numberBetween 6 10
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(_:Token Numeral (NumeralData {TNumeral.value = v}):_) -> double $ 10 + v
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralsSuffixesKMG :: Rule
|
|
ruleNumeralsSuffixesKMG = Rule
|
|
{ name = "numbers suffixes (K, M, G)"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "([kmg])(?=[\\W\\$€]|$)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = v}):
|
|
Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> case Text.toLower match of
|
|
"k" -> double $ v * 1e3
|
|
"m" -> double $ v * 1e6
|
|
"g" -> double $ v * 1e9
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeral6 :: Rule
|
|
ruleNumeral6 = Rule
|
|
{ name = "number 100..1000 "
|
|
, pattern =
|
|
[
|
|
regex "(cem|cento|duzentos|trezentos|quatrocentos|quinhentos|seiscentos|setecentos|oitocentos|novecentos|mil)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"cento" -> integer 100
|
|
"cem" -> integer 100
|
|
"duzentos" -> integer 200
|
|
"trezentos" -> integer 300
|
|
"quatrocentos" -> integer 400
|
|
"quinhentos" -> integer 500
|
|
"seiscentos" -> integer 600
|
|
"setecentos" -> integer 700
|
|
"oitocentos" -> integer 800
|
|
"novecentos" -> integer 900
|
|
"mil" -> integer 1000
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeral4 :: Rule
|
|
ruleNumeral4 = Rule
|
|
{ name = "number (21..29 31..39 41..49 51..59 61..69 71..79 81..89 91..99)"
|
|
, pattern =
|
|
[ oneOf [70, 20, 60, 50, 40, 90, 30, 80]
|
|
, regex "e"
|
|
, numberBetween 1 10
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = v1}):
|
|
_:
|
|
Token Numeral (NumeralData {TNumeral.value = v2}):
|
|
_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDozen :: Rule
|
|
ruleDozen = Rule
|
|
{ name = "dozen"
|
|
, pattern =
|
|
[ regex "d(ú|u)zias?"
|
|
]
|
|
, prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable
|
|
}
|
|
|
|
ruleNumeralDozen :: Rule
|
|
ruleNumeralDozen = Rule
|
|
{ name = "number dozen"
|
|
, pattern =
|
|
[ numberBetween 1 11
|
|
, dimension Numeral
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = v1}):
|
|
Token Numeral (NumeralData {TNumeral.value = v2, TNumeral.grain = Just g}):
|
|
_) -> double (v1 * v2) >>= withGrain g
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumerals :: Rule
|
|
ruleNumerals = Rule
|
|
{ name = "numbers (100..999)"
|
|
, pattern =
|
|
[ numberBetween 100 1000
|
|
, regex "e"
|
|
, numberBetween 0 100
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = v1}):
|
|
_:
|
|
Token Numeral (NumeralData {TNumeral.value = v2}):
|
|
_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralDotNumeral :: Rule
|
|
ruleNumeralDotNumeral = Rule
|
|
{ name = "number dot number"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "ponto"
|
|
, numberWith TNumeral.grain isNothing
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral nd1:_:Token Numeral nd2:_) ->
|
|
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntegerWithThousandsSeparator :: Rule
|
|
ruleIntegerWithThousandsSeparator = Rule
|
|
{ name = "integer with thousands separator ."
|
|
, pattern =
|
|
[ regex "(\\d{1,3}(\\.\\d\\d\\d){1,5})"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> let fmt = Text.replace (Text.singleton '.') Text.empty match
|
|
in parseDouble fmt >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
rules :: [Rule]
|
|
rules =
|
|
[ ruleDecimalNumeral
|
|
, ruleDecimalWithThousandsSeparator
|
|
, ruleDozen
|
|
, ruleIntegerNumeric
|
|
, ruleIntegerWithThousandsSeparator
|
|
, ruleNumeral
|
|
, ruleNumeral2
|
|
, ruleNumeral3
|
|
, ruleNumeral4
|
|
, ruleNumeral5
|
|
, ruleNumeral6
|
|
, ruleNumeralDotNumeral
|
|
, ruleNumeralDozen
|
|
, ruleNumerals
|
|
, ruleNumeralsPrefixWithNegativeOrMinus
|
|
, ruleNumeralsSuffixesKMG
|
|
]
|