mirror of
https://github.com/facebook/duckling.git
synced 2024-12-11 06:46:26 +03:00
babe317723
Summary: Numerals that require intersection with negative numbers don't work, since the negative sign gets parsed before the intersect rules happen. This fixes it by adding a guard in for positive in the intersection rule. Reviewed By: patapizza Differential Revision: D7592225 fbshipit-source-id: 2bc9c708cadeea4012c1f3ef487c61a144325f2a
315 lines
7.8 KiB
Haskell
315 lines
7.8 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Duckling.Numeral.DA.Rules
|
|
( rules
|
|
) where
|
|
|
|
import Data.HashMap.Strict (HashMap)
|
|
import Data.Maybe
|
|
import Data.String
|
|
import Data.Text (Text)
|
|
import Prelude
|
|
import qualified Data.HashMap.Strict as HashMap
|
|
import qualified Data.Text as Text
|
|
|
|
import Duckling.Dimensions.Types
|
|
import Duckling.Numeral.Helpers
|
|
import Duckling.Numeral.Types (NumeralData (..))
|
|
import Duckling.Regex.Types
|
|
import Duckling.Types
|
|
import qualified Duckling.Numeral.Types as TNumeral
|
|
|
|
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
|
|
ruleNumeralsPrefixWithNegativeOrMinus = Rule
|
|
{ name = "numbers prefix with -, negative or minus"
|
|
, pattern =
|
|
[ regex "-|minus|negativ"
|
|
, Predicate isPositive
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(_:Token Numeral NumeralData{TNumeral.value = v}:_) ->
|
|
double $ v * (-1)
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleFew :: Rule
|
|
ruleFew = Rule
|
|
{ name = "few"
|
|
, pattern =
|
|
[ regex "(nogle )?få"
|
|
]
|
|
, prod = \_ -> integer 3
|
|
}
|
|
|
|
ruleTen :: Rule
|
|
ruleTen = Rule
|
|
{ name = "ten"
|
|
, pattern =
|
|
[ regex "ti"
|
|
]
|
|
, prod = \_ -> integer 10 >>= withGrain 1
|
|
}
|
|
|
|
ruleDecimalWithThousandsSeparator :: Rule
|
|
ruleDecimalWithThousandsSeparator = Rule
|
|
{ name = "decimal with thousands separator"
|
|
, pattern =
|
|
[ regex "(\\d+(\\.\\d\\d\\d)+\\,\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
parseDouble (Text.replace "." Text.empty match) >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecimalNumeral :: Rule
|
|
ruleDecimalNumeral = Rule
|
|
{ name = "decimal number"
|
|
, pattern =
|
|
[ regex "(\\d*,\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
parseDecimal False match
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger3 :: Rule
|
|
ruleInteger3 = Rule
|
|
{ name = "integer 21..99"
|
|
, pattern =
|
|
[ oneOf [20, 30 .. 90]
|
|
, numberBetween 1 10
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v1}:
|
|
Token Numeral NumeralData{TNumeral.value = v2}:
|
|
_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleSingle :: Rule
|
|
ruleSingle = Rule
|
|
{ name = "single"
|
|
, pattern =
|
|
[ regex "enkelt"
|
|
]
|
|
, prod = \_ -> integer 1 >>= withGrain 1
|
|
}
|
|
|
|
ruleIntersect :: Rule
|
|
ruleIntersect = Rule
|
|
{ name = "intersect"
|
|
, pattern =
|
|
[ Predicate hasGrain
|
|
, Predicate $ and . sequence [not . isMultipliable, isPositive]
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
|
|
Token Numeral NumeralData{TNumeral.value = val2}:
|
|
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralsSuffixesKMG :: Rule
|
|
ruleNumeralsSuffixesKMG = Rule
|
|
{ name = "numbers suffixes (K, M, G)"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "([kmg])(?=[\\W\\$€]|$)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v}:
|
|
Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> case Text.toLower match of
|
|
"k" -> double $ v * 1e3
|
|
"m" -> double $ v * 1e6
|
|
"g" -> double $ v * 1e9
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
rulePowersOfTen :: Rule
|
|
rulePowersOfTen = Rule
|
|
{ name = "powers of tens"
|
|
, pattern =
|
|
[ regex "(hundrede?|tusinde?|million(er)?)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"hundred" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
|
"hundrede" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
|
"tusind" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
|
"tusinde" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
|
"million" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
"millioner" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleAPair :: Rule
|
|
ruleAPair = Rule
|
|
{ name = "a pair"
|
|
, pattern =
|
|
[ regex "et par"
|
|
]
|
|
, prod = \_ -> integer 2 >>= withGrain 1
|
|
}
|
|
|
|
ruleNumeralsOg :: Rule
|
|
ruleNumeralsOg = Rule
|
|
{ name = "numbers og"
|
|
, pattern =
|
|
[ numberBetween 1 10
|
|
, regex "og"
|
|
, oneOf [70, 20, 60, 50, 40, 90, 30, 80]
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v1}:
|
|
_:
|
|
Token Numeral NumeralData{TNumeral.value = v2}:
|
|
_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDozen :: Rule
|
|
ruleDozen = Rule
|
|
{ name = "dozen"
|
|
, pattern =
|
|
[ regex "dusin"
|
|
]
|
|
, prod = \_ -> integer 12 >>= withGrain 1 >>= withMultipliable
|
|
}
|
|
|
|
ruleMultiply :: Rule
|
|
ruleMultiply = Rule
|
|
{ name = "compose by multiplication"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, Predicate isMultipliable
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(token1:token2:_) -> multiply token1 token2
|
|
_ -> Nothing
|
|
}
|
|
|
|
zeroNineteenMap :: HashMap Text Integer
|
|
zeroNineteenMap = HashMap.fromList
|
|
[ ("ingen", 0)
|
|
, ("nul", 0)
|
|
, ("intet", 0)
|
|
, ("en", 1)
|
|
, ("et", 1)
|
|
, ("én", 1)
|
|
, ("ét", 1)
|
|
, ("to", 2)
|
|
, ("tre", 3)
|
|
, ("fire", 4)
|
|
, ("fem", 5)
|
|
, ("seks", 6)
|
|
, ("syv", 7)
|
|
, ("otte", 8)
|
|
, ("ni", 9)
|
|
, ("ti", 10)
|
|
, ("elleve", 11)
|
|
, ("tolv", 12)
|
|
, ("tretten", 13)
|
|
, ("fjorten", 14)
|
|
, ("femten", 15)
|
|
, ("seksten", 16)
|
|
, ("sytten", 17)
|
|
, ("atten", 18)
|
|
, ("nitten", 19)
|
|
]
|
|
|
|
ruleInteger :: Rule
|
|
ruleInteger = Rule
|
|
{ name = "integer (0..19)"
|
|
, pattern =
|
|
[ regex "(intet|ingen|nul|en|et|én|ét|to|tretten|tre|fire|femten|fem|seksten|seks|syv|otte|nitten|ni|ti|elleve|tolv|fjorten|sytten|atten)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
HashMap.lookup (Text.toLower match) zeroNineteenMap >>= integer
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger2 :: Rule
|
|
ruleInteger2 = Rule
|
|
{ name = "integer (20..90)"
|
|
, pattern =
|
|
[ regex "(tyve|tredive|fyrre|halvtreds|tres|halvfjerds|firs|halvfems)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"tyve" -> integer 20
|
|
"tredive" -> integer 30
|
|
"fyrre" -> integer 40
|
|
"halvtreds" -> integer 50
|
|
"tres" -> integer 60
|
|
"halvfjerds" -> integer 70
|
|
"firs" -> integer 80
|
|
"halvfems" -> integer 90
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralDotNumeral :: Rule
|
|
ruleNumeralDotNumeral = Rule
|
|
{ name = "number dot number"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "komma"
|
|
, Predicate $ not . hasGrain
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral NumeralData{TNumeral.value = v1}:
|
|
_:
|
|
Token Numeral NumeralData{TNumeral.value = v2}:
|
|
_) -> double $ v1 + decimalsToDouble v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntegerWithThousandsSeparator :: Rule
|
|
ruleIntegerWithThousandsSeparator = Rule
|
|
{ name = "integer with thousands separator ."
|
|
, pattern =
|
|
[ regex "(\\d{1,3}(\\.\\d\\d\\d){1,5})"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
parseDouble (Text.replace "." Text.empty match) >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
rules :: [Rule]
|
|
rules =
|
|
[ ruleAPair
|
|
, ruleDecimalNumeral
|
|
, ruleDecimalWithThousandsSeparator
|
|
, ruleDozen
|
|
, ruleFew
|
|
, ruleInteger
|
|
, ruleInteger2
|
|
, ruleInteger3
|
|
, ruleIntegerWithThousandsSeparator
|
|
, ruleIntersect
|
|
, ruleMultiply
|
|
, ruleNumeralDotNumeral
|
|
, ruleNumeralsOg
|
|
, ruleNumeralsPrefixWithNegativeOrMinus
|
|
, ruleNumeralsSuffixesKMG
|
|
, rulePowersOfTen
|
|
, ruleSingle
|
|
, ruleTen
|
|
]
|