mirror of
https://github.com/facebook/duckling.git
synced 2024-11-30 23:33:33 +03:00
bf89e34365
Reviewed By: JoelMarcey Differential Revision: D15439223 fbshipit-source-id: c5af3cb06318748142fe503945b38beffadfc28a
388 lines
10 KiB
Haskell
388 lines
10 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree.
|
|
|
|
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
{-# LANGUAGE NoRebindableSyntax #-}
|
|
|
|
module Duckling.Numeral.PT.Rules
|
|
( rules
|
|
) where
|
|
|
|
import Control.Applicative ((<|>))
|
|
import Data.HashMap.Strict (HashMap)
|
|
import Data.Maybe
|
|
import Data.String
|
|
import Data.Text (Text)
|
|
import Prelude
|
|
import qualified Data.HashMap.Strict as HashMap
|
|
import qualified Data.Text as Text
|
|
|
|
import Duckling.Dimensions.Types
|
|
import Duckling.Numeral.Helpers
|
|
import Duckling.Numeral.Types (NumeralData (..))
|
|
import Duckling.Regex.Types
|
|
import Duckling.Types
|
|
import qualified Duckling.Numeral.Types as TNumeral
|
|
|
|
ruleDozen :: Rule
|
|
ruleDozen = Rule
|
|
{ name = "a dozen of"
|
|
, pattern =
|
|
[ regex "(uma )?d(u|ú)zias?( de)?"
|
|
]
|
|
, prod = \_ -> integer 12 >>= withMultipliable >>= notOkForAnyTime
|
|
}
|
|
|
|
zeroNineteenMap :: HashMap Text Integer
|
|
zeroNineteenMap = HashMap.fromList
|
|
[ ( "zero" , 0 )
|
|
, ( "um" , 1 )
|
|
, ( "uma" , 1 )
|
|
, ( "dois" , 2 )
|
|
, ( "duas" , 2 )
|
|
, ( "tres" , 3 )
|
|
, ( "três" , 3 )
|
|
, ( "quatro" , 4 )
|
|
, ( "cinco" , 5 )
|
|
, ( "seis" , 6 )
|
|
, ( "sete" , 7 )
|
|
, ( "oito" , 8 )
|
|
, ( "nove" , 9 )
|
|
, ( "dez" , 10 )
|
|
, ( "onze" , 11 )
|
|
, ( "doze" , 12 )
|
|
, ( "treze" , 13 )
|
|
, ( "catorze" , 14 )
|
|
, ( "quatorze" , 14 )
|
|
, ( "quinze" , 15 )
|
|
, ( "dezesseis" , 16 )
|
|
, ( "dezasseis" , 16 )
|
|
, ( "dezessete" , 17 )
|
|
, ( "dezassete" , 17 )
|
|
, ( "dezoito" , 18 )
|
|
, ( "dezenove" , 19 )
|
|
, ( "dezanove" , 19 )
|
|
]
|
|
|
|
informalMap :: HashMap Text Integer
|
|
informalMap = HashMap.fromList
|
|
[ ( "um par" , 2 )
|
|
, ( "um par de" , 2 )
|
|
, ( "par" , 2 )
|
|
, ( "pares" , 2 )
|
|
, ( "par de" , 2 )
|
|
, ( "pares de" , 2 )
|
|
, ( "um pouco" , 3 )
|
|
, ( "pouco" , 3 )
|
|
]
|
|
|
|
ruleToNineteen :: Rule
|
|
ruleToNineteen = Rule
|
|
{ name = "integer (0..19)"
|
|
, pattern =
|
|
[ regex "(zero|d(oi|ua)s|(uma? )?par(es)?( de)?|tr(e|ê)s|(um )?pouco|uma?|(c|qu)atorze|quatro|quinze|cinco|dez[ea]sseis|seis|dez[ea]ssete|sete|dezoito|oito|dez[ea]nove|nove|dez|onze|doze|treze)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
let x = Text.toLower match in
|
|
(HashMap.lookup x zeroNineteenMap >>= integer) <|>
|
|
(HashMap.lookup x informalMap >>= integer >>= notOkForAnyTime)
|
|
_ -> Nothing
|
|
}
|
|
|
|
tensMap :: HashMap Text Integer
|
|
tensMap = HashMap.fromList
|
|
[ ( "vinte" , 20 )
|
|
, ( "trinta" , 30 )
|
|
, ( "quarenta" , 40 )
|
|
, ( "cincoenta" , 50 )
|
|
, ( "cinquenta" , 50 )
|
|
, ( "cinqüenta" , 50 )
|
|
, ( "sessenta" , 60 )
|
|
, ( "setenta" , 70 )
|
|
, ( "oitenta" , 80 )
|
|
, ( "noventa" , 90 )
|
|
]
|
|
|
|
ruleTens :: Rule
|
|
ruleTens = Rule
|
|
{ name = "tens (20..90)"
|
|
, pattern =
|
|
[ regex "(vinte|trinta|quarenta|cin(co|q[uü])enta|sessenta|setenta|oitenta|noventa)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
HashMap.lookup (Text.toLower match) tensMap >>= integer
|
|
_ -> Nothing
|
|
}
|
|
|
|
centsMap :: HashMap Text Integer
|
|
centsMap = HashMap.fromList
|
|
[ ( "cem" , 100 )
|
|
, ( "cento" , 100 )
|
|
, ( "duzentos" , 200 )
|
|
, ( "trezentos" , 300 )
|
|
, ( "quatrocentos" , 400 )
|
|
, ( "quinhetos" , 500 )
|
|
, ( "seiscentos" , 600 )
|
|
, ( "setecentos" , 700 )
|
|
, ( "oitocentos" , 800 )
|
|
, ( "novecentos" , 900 )
|
|
]
|
|
|
|
ruleCent :: Rule
|
|
ruleCent = Rule
|
|
{ name = "hundreds (100..900)"
|
|
, pattern =
|
|
[ regex "(cem|cento|duzentos|trezentos|quatrocentos|quinhetos|seiscentos|setecentos|oitocentos|novecentos)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
HashMap.lookup (Text.toLower match) centsMap >>= integer
|
|
_ -> Nothing
|
|
}
|
|
|
|
rulePowersOfTen :: Rule
|
|
rulePowersOfTen = Rule
|
|
{ name = "powers of tens"
|
|
, pattern =
|
|
[ regex "(milhao|milhão|milhões|milhoes|bilhao|bilhão|bilhões|bilhoes|mil)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"mil" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
|
"milhao" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"milhão" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"milhões" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"milhoes" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"bilhao" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
|
"bilhão" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
|
"bilhões" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
|
"bilhoes" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleCompositeTens :: Rule
|
|
ruleCompositeTens = Rule
|
|
{ name = "integer 21..99"
|
|
, pattern =
|
|
[ oneOf [20,30..90]
|
|
, numberBetween 1 10
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = tens}:
|
|
Token Numeral NumeralData{TNumeral.value = units}:
|
|
_) -> double $ tens + units
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecsAnd :: Rule
|
|
ruleDecsAnd = Rule
|
|
{ name = "number (21..29 31..39 .. 91..99)"
|
|
, pattern =
|
|
[ oneOf [20, 30..90]
|
|
, regex "e"
|
|
, numberBetween 1 10
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v1}:
|
|
_:
|
|
Token Numeral NumeralData{TNumeral.value = v2}:
|
|
_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleCompositeCents :: Rule
|
|
ruleCompositeCents = Rule
|
|
{ name = "integer 101..999"
|
|
, pattern =
|
|
[ oneOf [100, 200..900]
|
|
, numberBetween 1 100
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = hundreds}:
|
|
Token Numeral NumeralData{TNumeral.value = units}:
|
|
_) -> double $ hundreds + units
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleCentsAnd :: Rule
|
|
ruleCentsAnd = Rule
|
|
{ name = "number (101..199 201..299 .. 901..999)"
|
|
, pattern =
|
|
[ oneOf [100, 200..900]
|
|
, regex "e"
|
|
, numberBetween 1 100
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v1}:
|
|
_:
|
|
Token Numeral NumeralData{TNumeral.value = v2}:
|
|
_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleSkipHundreds :: Rule
|
|
ruleSkipHundreds = Rule
|
|
{ name = "one twenty two"
|
|
, pattern =
|
|
[ numberBetween 1 10
|
|
, numberBetween 10 100
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = hundreds}:
|
|
Token Numeral NumeralData{TNumeral.value = rest}:
|
|
_) -> double $ hundreds*100 + rest
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDotSpelledOut :: Rule
|
|
ruleDotSpelledOut = Rule
|
|
{ name = "one point 2"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "ponto"
|
|
, Predicate $ not . hasGrain
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral nd1:_:Token Numeral nd2:_) ->
|
|
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleLeadingDotSpelledOut :: Rule
|
|
ruleLeadingDotSpelledOut = Rule
|
|
{ name = "point 77"
|
|
, pattern =
|
|
[ regex "ponto"
|
|
, Predicate $ not . hasGrain
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(_:Token Numeral nd:_) -> double . decimalsToDouble $ TNumeral.value nd
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecimals :: Rule
|
|
ruleDecimals = Rule
|
|
{ name = "decimal number"
|
|
, pattern =
|
|
[ regex "(\\d*\\,\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal False match
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleCommas :: Rule
|
|
ruleCommas = Rule
|
|
{ name = "dot-separated numbers"
|
|
, pattern =
|
|
[ regex "(\\d+(\\.\\d\\d\\d)+(\\,\\d+)?)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
parseDecimal False $ Text.replace "." Text.empty match
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleSuffixes :: Rule
|
|
ruleSuffixes = Rule
|
|
{ name = "suffixes (K,M,G))"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "(k|m|g)(?=[\\W$€¢£]|$)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v}:
|
|
Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> case Text.toLower match of
|
|
"k" -> double $ v * 1e3
|
|
"m" -> double $ v * 1e6
|
|
"g" -> double $ v * 1e9
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNegative :: Rule
|
|
ruleNegative = Rule
|
|
{ name = "negative numbers"
|
|
, pattern =
|
|
[ regex "(-|menos|negativo)(?!\\s*\\-)"
|
|
, Predicate isPositive
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(_:Token Numeral nd:_) -> double $ TNumeral.value nd * (-1)
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleSum :: Rule
|
|
ruleSum = Rule
|
|
{ name = "intersect 2 numbers"
|
|
, pattern =
|
|
[ Predicate hasGrain
|
|
, Predicate $ and . sequence [not . isMultipliable, isPositive]
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
|
|
Token Numeral NumeralData{TNumeral.value = val2}:
|
|
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleSumAnd :: Rule
|
|
ruleSumAnd = Rule
|
|
{ name = "intersect 2 numbers (with and)"
|
|
, pattern =
|
|
[ Predicate hasGrain
|
|
, regex "e"
|
|
, Predicate $ and . sequence [not . isMultipliable, isPositive]
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
|
|
_:
|
|
Token Numeral NumeralData{TNumeral.value = val2}:
|
|
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleMultiply :: Rule
|
|
ruleMultiply = Rule
|
|
{ name = "compose by multiplication"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, Predicate isMultipliable
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(token1:token2:_) -> multiply token1 token2
|
|
_ -> Nothing
|
|
}
|
|
|
|
rules :: [Rule]
|
|
rules =
|
|
[ ruleToNineteen
|
|
, ruleTens
|
|
, ruleCent
|
|
, rulePowersOfTen
|
|
, ruleCompositeTens
|
|
, ruleCompositeCents
|
|
, ruleSkipHundreds
|
|
, ruleDotSpelledOut
|
|
, ruleLeadingDotSpelledOut
|
|
, ruleDecimals
|
|
, ruleCommas
|
|
, ruleSuffixes
|
|
, ruleNegative
|
|
, ruleSum
|
|
, ruleDecsAnd
|
|
, ruleCentsAnd
|
|
, ruleSumAnd
|
|
, ruleMultiply
|
|
, ruleDozen
|
|
]
|