mirror of
https://github.com/facebook/duckling.git
synced 2024-12-12 23:03:29 +03:00
babe317723
Summary: Numerals that require intersection with negative numbers don't work, since the negative sign gets parsed before the intersect rules happen. This fixes it by adding a guard in for positive in the intersection rule. Reviewed By: patapizza Differential Revision: D7592225 fbshipit-source-id: 2bc9c708cadeea4012c1f3ef487c61a144325f2a
302 lines
8.4 KiB
Haskell
302 lines
8.4 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Duckling.Numeral.RO.Rules
|
|
( rules
|
|
) where
|
|
|
|
import Data.HashMap.Strict (HashMap)
|
|
import Data.Maybe
|
|
import Data.String
|
|
import Data.Text (Text)
|
|
import Prelude
|
|
import qualified Data.HashMap.Strict as HashMap
|
|
import qualified Data.Text as Text
|
|
|
|
import Duckling.Dimensions.Types
|
|
import Duckling.Numeral.Helpers
|
|
import Duckling.Numeral.Types (NumeralData (..))
|
|
import Duckling.Regex.Types
|
|
import Duckling.Types
|
|
import qualified Duckling.Numeral.Types as TNumeral
|
|
|
|
ruleNumeralsPrefixWithOrMinus :: Rule
|
|
ruleNumeralsPrefixWithOrMinus = Rule
|
|
{ name = "numbers prefix with - or minus"
|
|
, pattern =
|
|
[ regex "-|minus"
|
|
, Predicate isPositive
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1))
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo :: Rule
|
|
ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo = Rule
|
|
{ name = "special composition for missing hundreds like in one twenty two"
|
|
, pattern =
|
|
[ numberBetween 1 10
|
|
, numberBetween 11 100
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = hundreds}:
|
|
Token Numeral NumeralData{TNumeral.value = rest}:
|
|
_) -> double $ hundreds * 100 + rest
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecimalWithThousandsSeparator :: Rule
|
|
ruleDecimalWithThousandsSeparator = Rule
|
|
{ name = "decimal with thousands separator"
|
|
, pattern =
|
|
[ regex "(\\d+(\\.\\d\\d\\d)+,\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> let fmt = Text.replace "," "." $ Text.replace "." Text.empty match
|
|
in parseDouble fmt >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecimalNumeral :: Rule
|
|
ruleDecimalNumeral = Rule
|
|
{ name = "decimal number"
|
|
, pattern =
|
|
[ regex "(\\d*,\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> parseDecimal False match
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger3 :: Rule
|
|
ruleInteger3 = Rule
|
|
{ name = "integer 21..99"
|
|
, pattern =
|
|
[ oneOf [20, 30 .. 90]
|
|
, numberBetween 1 10
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v1}:
|
|
Token Numeral NumeralData{TNumeral.value = v2}:
|
|
_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleMultiply :: Rule
|
|
ruleMultiply = Rule
|
|
{ name = "compose by multiplication"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, Predicate isMultipliable
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(token1:token2:_) -> multiply token1 token2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntersect :: Rule
|
|
ruleIntersect = Rule
|
|
{ name = "intersect"
|
|
, pattern =
|
|
[ Predicate hasGrain
|
|
, Predicate $ and . sequence [not . isMultipliable, isPositive]
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
|
|
Token Numeral NumeralData{TNumeral.value = val2}:
|
|
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntersectCuI :: Rule
|
|
ruleIntersectCuI = Rule
|
|
{ name = "intersect (cu și)"
|
|
, pattern =
|
|
[ Predicate hasGrain
|
|
, regex "(s|ș)i"
|
|
, Predicate $ and . sequence [not . isMultipliable, isPositive]
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
|
|
_:
|
|
Token Numeral NumeralData{TNumeral.value = val2}:
|
|
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralsSuffixesWithNegativ :: Rule
|
|
ruleNumeralsSuffixesWithNegativ = Rule
|
|
{ name = "numbers suffixes with (negativ)"
|
|
, pattern =
|
|
[ Predicate isPositive
|
|
, regex "neg(ativ)?"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v}:
|
|
_) -> double $ v * (-1)
|
|
_ -> Nothing
|
|
}
|
|
|
|
rulePowersOfTen :: Rule
|
|
rulePowersOfTen = Rule
|
|
{ name = "powers of tens"
|
|
, pattern =
|
|
[ regex "(sut(a|e|ă)?|milio(n|ane)?|miliar(de?)?|mi[ei]?)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"suta" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
|
"sute" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
|
"sută" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
|
"mi" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
|
"mie" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
|
"mii" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
|
"milio" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"milion" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"milioane" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"miliar" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
|
"miliard" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
|
"miliarde" -> double 1e9 >>= withGrain 9 >>= withMultipliable
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
zeroTenMap :: HashMap Text Integer
|
|
zeroTenMap = HashMap.fromList
|
|
[ ("zero", 0)
|
|
, ("nimic", 0)
|
|
, ("nicio", 0)
|
|
, ("nici o", 0)
|
|
, ("nici una", 0)
|
|
, ("nici unu", 0)
|
|
, ("nici unul", 0)
|
|
, ("un", 1)
|
|
, ("una", 1)
|
|
, ("unu", 1)
|
|
, ("unul", 1)
|
|
, ("intai", 1)
|
|
, ("întai", 1)
|
|
, ("intâi", 1)
|
|
, ("întâi", 1)
|
|
, ("o", 1)
|
|
, ("doi", 2)
|
|
, ("doua", 2)
|
|
, ("două", 2)
|
|
, ("trei", 3)
|
|
, ("patru", 4)
|
|
, ("cinci", 5)
|
|
, ("sase", 6)
|
|
, ("\537ase", 6)
|
|
, ("sapte", 7)
|
|
, ("\537apte", 7)
|
|
, ("opt", 8)
|
|
, ("noua", 9)
|
|
, ("nouă", 9)
|
|
, ("zece", 10)
|
|
, ("zeci", 10)
|
|
]
|
|
|
|
ruleIntegerZeroTen :: Rule
|
|
ruleIntegerZeroTen = Rule
|
|
{ name = "integer (0..10)"
|
|
, pattern =
|
|
[ regex "(zero|nimic|nici(\\s?o|\\sun(a|ul?))|una|unul?|doi|dou(a|ă)|trei|patru|cinci|(s|ș)ase|(s|ș)apte|opt|nou(a|ă)|zec[ei]|(i|î)nt(a|â)i|un|o)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
HashMap.lookup (Text.toLower match) zeroTenMap >>= integer
|
|
_ -> Nothing
|
|
}
|
|
|
|
elevenNineteenMap :: HashMap Text Integer
|
|
elevenNineteenMap = HashMap.fromList
|
|
[ ("un", 11)
|
|
, ("doi", 12)
|
|
, ("trei", 13)
|
|
, ("pai", 14)
|
|
, ("cin", 15)
|
|
, ("cinci", 15)
|
|
, ("sai", 16)
|
|
, ("\537ai", 16)
|
|
, ("sapti", 17)
|
|
, ("\537apti", 17)
|
|
, ("sapte", 17)
|
|
, ("\537apte", 17)
|
|
, ("opti", 18)
|
|
, ("opt", 18)
|
|
, ("noua", 19)
|
|
, ("nouă", 19)
|
|
]
|
|
|
|
ruleInteger :: Rule
|
|
ruleInteger = Rule
|
|
{ name = "integer (11..19)"
|
|
, pattern =
|
|
[ regex "(cin|sapti|opti)(s|ș)pe|(cinci|(s|ș)apte|opt)sprezece|(un|doi|trei|pai|(s|ș)ai|nou(a|ă))((s|ș)pe|sprezece)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (e1:_:e2:_:r:_)):_) -> do
|
|
match <- case () of
|
|
_ | not $ Text.null e1 -> Just e1
|
|
| not $ Text.null e2 -> Just e2
|
|
| not $ Text.null r -> Just r
|
|
| otherwise -> Nothing
|
|
HashMap.lookup (Text.toLower match) elevenNineteenMap >>= integer
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger2 :: Rule
|
|
ruleInteger2 = Rule
|
|
{ name = "integer (20..90)"
|
|
, pattern =
|
|
[ regex "(dou(a|ă)|trei|patru|cinci|(s|ș)ai|(s|ș)apte|opt|nou(a|ă))\\s?zeci"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> do
|
|
unit <- HashMap.lookup (Text.toLower match) zeroTenMap
|
|
integer (unit * 10) >>= withGrain 2 >>= withMultipliable
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntegerCuSeparatorDeMiiDot :: Rule
|
|
ruleIntegerCuSeparatorDeMiiDot = Rule
|
|
{ name = "integer cu separator de mii dot"
|
|
, pattern =
|
|
[ regex "(\\d{1,3}(\\.\\d\\d\\d){1,5})"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> let fmt = Text.replace "." Text.empty match
|
|
in parseDouble fmt >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
rules :: [Rule]
|
|
rules =
|
|
[ ruleDecimalNumeral
|
|
, ruleDecimalWithThousandsSeparator
|
|
, ruleInteger
|
|
, ruleIntegerZeroTen
|
|
, ruleInteger2
|
|
, ruleInteger3
|
|
, ruleIntegerCuSeparatorDeMiiDot
|
|
, ruleIntersect
|
|
, ruleIntersectCuI
|
|
, ruleMultiply
|
|
, ruleNumeralsPrefixWithOrMinus
|
|
, ruleNumeralsSuffixesWithNegativ
|
|
, rulePowersOfTen
|
|
, ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo
|
|
]
|