mirror of
https://github.com/facebook/duckling.git
synced 2024-12-05 00:05:51 +03:00
54c9448fba
Summary: For consistency with the dimension name. Reviewed By: JonCoens Differential Revision: D4722216 fbshipit-source-id: 82c56d3
307 lines
8.8 KiB
Haskell
307 lines
8.8 KiB
Haskell
-- Copyright (c) 2016-present, Facebook, Inc.
|
|
-- All rights reserved.
|
|
--
|
|
-- This source code is licensed under the BSD-style license found in the
|
|
-- LICENSE file in the root directory of this source tree. An additional grant
|
|
-- of patent rights can be found in the PATENTS file in the same directory.
|
|
|
|
|
|
{-# LANGUAGE GADTs #-}
|
|
{-# LANGUAGE OverloadedStrings #-}
|
|
|
|
module Duckling.Numeral.ET.Rules
|
|
( rules ) where
|
|
|
|
import Data.Maybe
|
|
import qualified Data.Text as Text
|
|
import Prelude
|
|
import Data.String
|
|
|
|
import Duckling.Dimensions.Types
|
|
import Duckling.Numeral.Helpers
|
|
import Duckling.Numeral.Types (NumeralData (..))
|
|
import qualified Duckling.Numeral.Types as TNumeral
|
|
import Duckling.Regex.Types
|
|
import Duckling.Types
|
|
|
|
ruleIntegerWithThousandsSeparatorSpace :: Rule
|
|
ruleIntegerWithThousandsSeparatorSpace = Rule
|
|
{ name = "integer with thousands separator space"
|
|
, pattern =
|
|
[ regex "(\\d{1,3}(\\s\\d\\d\\d){1,5})"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
parseDouble (Text.replace (Text.singleton ' ') Text.empty match) >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
|
|
ruleNumeralsPrefixWithNegativeOrMinus = Rule
|
|
{ name = "numbers prefix with -, negative or minus"
|
|
, pattern =
|
|
[ regex "-|miinus|negatiivne"
|
|
, dimension Numeral
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(_:
|
|
Token Numeral (NumeralData {TNumeral.value = v}):
|
|
_) -> double $ v * (-1)
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntegerNumeric :: Rule
|
|
ruleIntegerNumeric = Rule
|
|
{ name = "integer (numeric)"
|
|
, pattern =
|
|
[ regex "(\\d{1,18})"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> do
|
|
v <- parseInt match
|
|
integer $ toInteger v
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleACoupleOf :: Rule
|
|
ruleACoupleOf = Rule
|
|
{ name = "a couple of"
|
|
, pattern =
|
|
[ regex "paar"
|
|
]
|
|
, prod = \_ -> integer 2
|
|
}
|
|
|
|
ruleTen :: Rule
|
|
ruleTen = Rule
|
|
{ name = "ten"
|
|
, pattern =
|
|
[ regex "k\x00fcmme"
|
|
]
|
|
, prod = \_ -> integer 10 >>= withGrain 1
|
|
}
|
|
|
|
ruleDecimalWithThousandsSeparator :: Rule
|
|
ruleDecimalWithThousandsSeparator = Rule
|
|
{ name = "decimal with thousands separator"
|
|
, pattern =
|
|
[ regex "(\\d+(,\\d\\d\\d)+\\.\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
parseDouble (Text.replace (Text.singleton ',') Text.empty match) >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleDecimalNumeral :: Rule
|
|
ruleDecimalNumeral = Rule
|
|
{ name = "decimal number"
|
|
, pattern =
|
|
[ regex "(\\d*\\.\\d+)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal True match
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger3 :: Rule
|
|
ruleInteger3 = Rule
|
|
{ name = "integer 21..99"
|
|
, pattern =
|
|
[ oneOf [70, 20, 60, 50, 40, 90, 30, 80]
|
|
, numberBetween 1 10
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = v1}):
|
|
Token Numeral (NumeralData {TNumeral.value = v2}):_) -> double $ v1 + v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleAFew :: Rule
|
|
ruleAFew = Rule
|
|
{ name = "(a )?few"
|
|
, pattern =
|
|
[ regex "m\x00f5ni"
|
|
]
|
|
, prod = \_ -> integer 3
|
|
}
|
|
|
|
rulePowersOfTen :: Rule
|
|
rulePowersOfTen = Rule
|
|
{ name = "powers of tens"
|
|
, pattern = [regex "(sada|tuhat|miljoni?t?)"]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"sada" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
|
"tuhat" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
|
_ -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntersect :: Rule
|
|
ruleIntersect = Rule
|
|
{ name = "intersect"
|
|
, pattern =
|
|
[ numberWith (fromMaybe 0 . TNumeral.grain) (>1)
|
|
, numberWith TNumeral.multipliable not
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = val1, TNumeral.grain = Just g}):
|
|
Token Numeral (NumeralData {TNumeral.value = val2}):
|
|
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralsSuffixesKMG :: Rule
|
|
ruleNumeralsSuffixesKMG = Rule
|
|
{ name = "numbers suffixes (K, M, G)"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "([kmg])(?=[\\W\\$\x20ac]|$)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = v}):
|
|
Token RegexMatch (GroupMatch (match:_)):
|
|
_) -> case Text.toLower match of
|
|
"k" -> double $ v * 1e3
|
|
"m" -> double $ v * 1e6
|
|
"g" -> double $ v * 1e9
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleMultiply :: Rule
|
|
ruleMultiply = Rule
|
|
{ name = "compose by multiplication"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, numberWith TNumeral.multipliable id
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(token1:token2:_) -> multiply token1 token2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger :: Rule
|
|
ruleInteger = Rule
|
|
{ name = "integer (0..19)"
|
|
, pattern =
|
|
[ regex "(null|\x00fcksteist|\x00fcks|kaksteist|kaks|kolmteist|kolm|neliteist|neli|viisteist|viis|kuusteist|kuus|seitseteist|seitse|kaheksateist|kaheksa|\x00fcheksateist|\x00fcheksa|k\x00fcmme)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"null" -> integer 0
|
|
"\x00fcks" -> integer 1
|
|
"kaks" -> integer 2
|
|
"kolm" -> integer 3
|
|
"neli" -> integer 4
|
|
"viis" -> integer 5
|
|
"kuus" -> integer 6
|
|
"seitse" -> integer 7
|
|
"kaheksa" -> integer 8
|
|
"\x00fcheksa" -> integer 9
|
|
"k\x00fcmme" -> integer 10
|
|
"\x00fcksteist" -> integer 11
|
|
"kaksteist" -> integer 12
|
|
"kolmteist" -> integer 13
|
|
"neliteist" -> integer 14
|
|
"viisteist" -> integer 15
|
|
"kuusteist" -> integer 16
|
|
"seitseteist" -> integer 17
|
|
"kaheksateist" -> integer 18
|
|
"\x00fcheksateist" -> integer 19
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger4 :: Rule
|
|
ruleInteger4 = Rule
|
|
{ name = "integer (200..900)"
|
|
, pattern =
|
|
[ regex "(kakssada|kolmsada|nelisada|viissada|kuussada|seitsesada|kaheksasada|\x00fcheksasada)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"kakssada" -> integer 200 >>= withGrain 2 >>= withMultipliable
|
|
"kolmsada" -> integer 300 >>= withGrain 2 >>= withMultipliable
|
|
"nelisada" -> integer 400 >>= withGrain 2 >>= withMultipliable
|
|
"viissada" -> integer 500 >>= withGrain 2 >>= withMultipliable
|
|
"kuussada" -> integer 600 >>= withGrain 2 >>= withMultipliable
|
|
"seitsesada" -> integer 700 >>= withGrain 2 >>= withMultipliable
|
|
"kaheksasada" -> integer 800 >>= withGrain 2 >>= withMultipliable
|
|
"\x00fcheksasada" -> integer 900 >>= withGrain 2 >>= withMultipliable
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleInteger2 :: Rule
|
|
ruleInteger2 = Rule
|
|
{ name = "integer (20..90)"
|
|
, pattern =
|
|
[ regex "((kaks|kolm|neli|viis|kuus|seitse|kaheksa|(\x00fc)heksa)k(\x00fc)mmend)"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
|
"kaksk\x00fcmmend" -> integer 20
|
|
"kolmk\x00fcmmend" -> integer 30
|
|
"nelik\x00fcmmend" -> integer 40
|
|
"viisk\x00fcmmend" -> integer 50
|
|
"kuusk\x00fcmmend" -> integer 60
|
|
"seitsek\x00fcmmend" -> integer 70
|
|
"kaheksak\x00fcmmend" -> integer 80
|
|
"\x00fcheksak\x00fcmmend" -> integer 90
|
|
_ -> Nothing
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleNumeralDotNumeral :: Rule
|
|
ruleNumeralDotNumeral = Rule
|
|
{ name = "number dot number"
|
|
, pattern =
|
|
[ dimension Numeral
|
|
, regex "dot|point"
|
|
, numberWith TNumeral.grain isNothing
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token Numeral (NumeralData {TNumeral.value = v1}):
|
|
_:
|
|
Token Numeral (NumeralData {TNumeral.value = v2}):
|
|
_) -> double $ v1 + decimalsToDouble v2
|
|
_ -> Nothing
|
|
}
|
|
|
|
ruleIntegerWithThousandsSeparator :: Rule
|
|
ruleIntegerWithThousandsSeparator = Rule
|
|
{ name = "integer with thousands separator ,"
|
|
, pattern =
|
|
[ regex "(\\d{1,3}(,\\d\\d\\d){1,5})"
|
|
]
|
|
, prod = \tokens -> case tokens of
|
|
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
|
parseDouble (Text.replace (Text.singleton ',') Text.empty match) >>= double
|
|
_ -> Nothing
|
|
}
|
|
|
|
rules :: [Rule]
|
|
rules =
|
|
[ ruleACoupleOf
|
|
, ruleAFew
|
|
, ruleDecimalNumeral
|
|
, ruleDecimalWithThousandsSeparator
|
|
, ruleInteger
|
|
, ruleInteger2
|
|
, ruleInteger3
|
|
, ruleInteger4
|
|
, ruleIntegerNumeric
|
|
, ruleIntegerWithThousandsSeparator
|
|
, ruleIntegerWithThousandsSeparatorSpace
|
|
, ruleIntersect
|
|
, ruleMultiply
|
|
, ruleNumeralDotNumeral
|
|
, ruleNumeralsPrefixWithNegativeOrMinus
|
|
, ruleNumeralsSuffixesKMG
|
|
, rulePowersOfTen
|
|
, ruleTen
|
|
]
|