duckling/Duckling/Numeral/PL/Rules.hs
Julien Odent c6a48a1d06 Duration/DE+EN: integer <hours> and <minutes> to accept valid minutes
Summary:
* as title
* refactored `numberBetween` to be a `Predicate`

Reviewed By: nishsinghal20

Differential Revision: D34180649

fbshipit-source-id: 8fbe6db83e8b22181bd0bbde533e2d3390d24c75
2022-02-12 00:17:03 -08:00

630 lines
12 KiB
Haskell

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.PL.Rules
( rules
) where
import Data.Maybe
import Data.String
import Prelude
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleSixteen :: Rule
ruleSixteen = Rule
{ name = "sixteen"
, pattern =
[ regex "szesna(s|ś)(tu|cie|toma)"
]
, prod = \_ -> integer 16
}
ruleFourteen :: Rule
ruleFourteen = Rule
{ name = "fourteen"
, pattern =
[ regex "czterna(s|ś)(tu|cie|toma)"
]
, prod = \_ -> integer 14
}
ruleTwo :: Rule
ruleTwo = Rule
{ name = "two"
, pattern =
[ regex "dw(a|(o|ó)(ch|m)|oma|iema|ie)"
]
, prod = \_ -> integer 2
}
ruleSixty :: Rule
ruleSixty = Rule
{ name = "sixty"
, pattern =
[ regex "sze(ść)dziesi(ą)t|sze(ść)dziesi(ę)ci(u|oma)"
]
, prod = \_ -> integer 60
}
ruleSeventy :: Rule
ruleSeventy = Rule
{ name = "seventy"
, pattern =
[ regex "siedemdziesi((a|ą)t|(e|ę)ci(u|oma))"
]
, prod = \_ -> integer 70
}
ruleEighty :: Rule
ruleEighty = Rule
{ name = "eighty"
, pattern =
[ regex "osiemdziesi((a|ą)t|(e|ę)ci(u|oma))"
]
, prod = \_ -> integer 80
}
ruleNinety :: Rule
ruleNinety = Rule
{ name = "ninety"
, pattern =
[ regex "dziewi(e|ę)(c|ć)dziesi((a|ą)|(e|ę))(t|ci(u|oma))"
]
, prod = \_ -> integer 90
}
ruleIntersectWithAnd :: Rule
ruleIntersectWithAnd = Rule
{ name = "intersect (with and)"
, pattern =
[ Predicate hasGrain
, regex "i|a"
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
_:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleNumeralsPrefixWithNegativeOrMinus :: Rule
ruleNumeralsPrefixWithNegativeOrMinus = Rule
{ name = "numbers prefix with -, negative or minus"
, pattern =
[ regex "-|minus|negative"
, Predicate isPositive
]
, prod = \tokens -> case tokens of
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1))
_ -> Nothing
}
ruleOne :: Rule
ruleOne = Rule
{ name = "one"
, pattern =
[ regex "jed(en|nego|nemu|nym|nej|n(a|ą))"
]
, prod = \_ -> integer 1
}
ruleTen :: Rule
ruleTen = Rule
{ name = "ten"
, pattern =
[ regex "dzisi(e|ę)(ć|c)(iu|ioma)?"
]
, prod = \_ -> integer 10
}
ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo :: Rule
ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo = Rule
{ name = "special composition for missing hundreds like in one twenty two"
, pattern =
[ Predicate $ numberBetween 1 10
, Predicate $ numberBetween 10 100
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = v1}:
Token Numeral NumeralData{TNumeral.value = v2}:
_) -> double (v1 * 100 + v2)
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
, pattern =
[ regex "(\\d+(,\\d\\d\\d)+\\.\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace "," Text.empty match) >>= double
_ -> Nothing
}
ruleNine :: Rule
ruleNine = Rule
{ name = "nine"
, pattern =
[ regex "dziewi(e|ę)(ć|c)(iu|ioma)?"
]
, prod = \_ -> integer 9
}
ruleNumeral8 :: Rule
ruleNumeral8 = Rule
{ name = "number 800"
, pattern =
[ regex "(osiem(set| setek))"
]
, prod = \_ -> integer 800 >>= withGrain 2
}
ruleTwelve :: Rule
ruleTwelve = Rule
{ name = "twelve"
, pattern =
[ regex "dwunast(u|oma)|dwana(ś|s)cie"
]
, prod = \_ -> integer 12
}
ruleDecimalNumeral :: Rule
ruleDecimalNumeral = Rule
{ name = "decimal number"
, pattern =
[ regex "(\\d*\\.\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal True match
_ -> Nothing
}
ruleFifteen :: Rule
ruleFifteen = Rule
{ name = "fifteen"
, pattern =
[ regex "pi(ę)tna(s|ś)(ta|tu|cie|toma)"
]
, prod = \_ -> integer 15
}
ruleEleven :: Rule
ruleEleven = Rule
{ name = "eleven"
, pattern =
[ regex "jedena(stu|(s|ś)cie|stoma)"
]
, prod = \_ -> integer 11
}
ruleThirteen :: Rule
ruleThirteen = Rule
{ name = "thirteen"
, pattern =
[ regex "trzyna(ś|s)(tu|cie|toma)"
]
, prod = \_ -> integer 13
}
ruleThirty :: Rule
ruleThirty = Rule
{ name = "thirty"
, pattern =
[ regex "trzydzie(ś)ci|trzydziest(u|oma)"
]
, prod = \_ -> integer 30
}
ruleNumeral2 :: Rule
ruleNumeral2 = Rule
{ name = "number 200"
, pattern =
[ regex "dwie((ś)cie| setki)"
]
, prod = \_ -> integer 200 >>= withGrain 2
}
ruleSeventeen :: Rule
ruleSeventeen = Rule
{ name = "seventeen"
, pattern =
[ regex "siedemna(s|ś)(tu|cie|toma)"
]
, prod = \_ -> integer 17
}
ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number 100"
, pattern =
[ regex "(sto|setki)"
]
, prod = \_ -> integer 100 >>= withGrain 2
}
ruleNumeral9 :: Rule
ruleNumeral9 = Rule
{ name = "number 900"
, pattern =
[ regex "dziewi(ęć)(set| setek)"
]
, prod = \_ -> integer 900 >>= withGrain 2
}
ruleSingle :: Rule
ruleSingle = Rule
{ name = "single"
, pattern =
[ regex "pojedynczy"
]
, prod = \_ -> integer 1
}
ruleTwenty :: Rule
ruleTwenty = Rule
{ name = "twenty"
, pattern =
[ regex "dwadzie(ś|s)cia|dwudziest(u|oma)"
]
, prod = \_ -> integer 20
}
ruleAFew :: Rule
ruleAFew = Rule
{ name = "a few"
, pattern =
[ regex "kilk(a|u)"
]
, prod = \_ -> integer 3
}
ruleEight :: Rule
ruleEight = Rule
{ name = "eight"
, pattern =
[ regex "o(s|ś)(iem|miu|mioma)"
]
, prod = \_ -> integer 8
}
ruleNumeral5 :: Rule
ruleNumeral5 = Rule
{ name = "number 500"
, pattern =
[ regex "pi(ęć)(set| setek)"
]
, prod = \_ -> integer 500 >>= withGrain 2
}
ruleNumeral3 :: Rule
ruleNumeral3 = Rule
{ name = "number 300"
, pattern =
[ regex "(trzy(sta| setki))"
]
, prod = \_ -> integer 300 >>= withGrain 2
}
ruleZero :: Rule
ruleZero = Rule
{ name = "zero"
, pattern =
[ regex "(zero|nic)"
]
, prod = \_ -> integer 0
}
ruleThousand :: Rule
ruleThousand = Rule
{ name = "thousand"
, pattern =
[ regex "ty(s|ś)i(a|ą|ę)c(e|y)?"
]
, prod = \_ -> integer 1000 >>= withGrain 3 >>= withMultipliable
}
ruleMillion :: Rule
ruleMillion = Rule
{ name = "million"
, pattern =
[ regex "milion(y|(ó)w)?"
]
, prod = \_ -> integer 1000000 >>= withGrain 6 >>= withMultipliable
}
ruleIntersect :: Rule
ruleIntersect = Rule
{ name = "intersect"
, pattern =
[ Predicate hasGrain
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, Predicate isMultipliable
]
, prod = \tokens -> case tokens of
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
ruleThree :: Rule
ruleThree = Rule
{ name = "three"
, pattern =
[ regex "trz(y|ema|ech)"
]
, prod = \_ -> integer 3
}
ruleFour :: Rule
ruleFour = Rule
{ name = "four"
, pattern =
[ regex "czter(ej|y|ech|em|ema)"
]
, prod = \_ -> integer 4
}
ruleNumeralsSuffixesKMG :: Rule
ruleNumeralsSuffixesKMG = Rule
{ name = "numbers suffixes (K, M, G)"
, pattern =
[ dimension Numeral
, regex "([kmg])(?=[\\W\\$€]|$)"
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = v}:
Token RegexMatch (GroupMatch (match:_)):
_) -> case Text.toLower match of
"k" -> double $ v * 1e3
"m" -> double $ v * 1e6
"g" -> double $ v * 1e9
_ -> Nothing
_ -> Nothing
}
ruleNumeral7 :: Rule
ruleNumeral7 = Rule
{ name = "number 700"
, pattern =
[ regex "(siedem(set| setek))"
]
, prod = \_ -> integer 700 >>= withGrain 2
}
ruleAPair :: Rule
ruleAPair = Rule
{ name = "a pair"
, pattern =
[ regex "para?"
]
, prod = \_ -> integer 2
}
ruleCouple :: Rule
ruleCouple = Rule
{ name = "couple"
, pattern =
[ regex "pare"
]
, prod = \_ -> integer 2
}
ruleSix :: Rule
ruleSix = Rule
{ name = "six"
, pattern =
[ regex "sze(s|ś)(c|ć)(iu|oma|u)?"
]
, prod = \_ -> integer 6
}
ruleNumeral6 :: Rule
ruleNumeral6 = Rule
{ name = "number 600"
, pattern =
[ regex "(sześć(set| setek))"
]
, prod = \_ -> integer 600 >>= withGrain 2
}
ruleNumeral4 :: Rule
ruleNumeral4 = Rule
{ name = "number 400"
, pattern =
[ regex "(cztery(sta| setki))"
]
, prod = \_ -> integer 400 >>= withGrain 2
}
ruleFive :: Rule
ruleFive = Rule
{ name = "five"
, pattern =
[ regex "pi(e|ę)(c|ć)(iu|oma|u)?"
]
, prod = \_ -> integer 5
}
ruleFourty :: Rule
ruleFourty = Rule
{ name = "fou?rty"
, pattern =
[ regex "czterdzie(ś)ci|czterdziest(u|oma)"
]
, prod = \_ -> integer 40
}
ruleDozen :: Rule
ruleDozen = Rule
{ name = "dozen"
, pattern =
[ regex "tuzin"
]
, prod = \_ -> integer 12 >>= withMultipliable
}
ruleSeven :: Rule
ruleSeven = Rule
{ name = "seven"
, pattern =
[ regex "sied(miu|em|mioma)"
]
, prod = \_ -> integer 7
}
ruleNineteen :: Rule
ruleNineteen = Rule
{ name = "nineteen"
, pattern =
[ regex "dziewietna(s|ś)(tu|cie|toma)"
]
, prod = \_ -> integer 19
}
ruleInteger2 :: Rule
ruleInteger2 = Rule
{ name = "integer 21..99"
, pattern =
[ oneOf [20, 30 .. 90]
, Predicate $ numberBetween 1 10
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = v1}:
Token Numeral NumeralData{TNumeral.value = v2}:
_) -> double $ v1 + v2
_ -> Nothing
}
ruleEighteen :: Rule
ruleEighteen = Rule
{ name = "eighteen"
, pattern =
[ regex "osiemna(s|ś)(tu|cie|toma)"
]
, prod = \_ -> integer 18
}
ruleNumeralDotNumeral :: Rule
ruleNumeralDotNumeral = Rule
{ name = "number dot number"
, pattern =
[ dimension Numeral
, regex "dot|point"
, Predicate $ not . hasGrain
]
, prod = \tokens -> case tokens of
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleIntegerWithThousandsSeparator :: Rule
ruleIntegerWithThousandsSeparator = Rule
{ name = "integer with thousands separator ,"
, pattern =
[ regex "(\\d{1,3}(,\\d\\d\\d){1,5})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace "," Text.empty match) >>= double
_ -> Nothing
}
ruleFifty :: Rule
ruleFifty = Rule
{ name = "fifty"
, pattern =
[ regex "pi(ęć)dziesi(ą)t|pi(ęć)dziesi(ę)ci(u|oma)"
]
, prod = \_ -> integer 50
}
rules :: [Rule]
rules =
[ ruleAFew
, ruleAPair
, ruleCouple
, ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleDozen
, ruleEight
, ruleEighteen
, ruleEighty
, ruleEleven
, ruleFifteen
, ruleFifty
, ruleFive
, ruleFour
, ruleFourteen
, ruleFourty
, ruleInteger2
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleIntersectWithAnd
, ruleMillion
, ruleMultiply
, ruleNine
, ruleNineteen
, ruleNinety
, ruleNumeral
, ruleNumeral2
, ruleNumeral3
, ruleNumeral4
, ruleNumeral5
, ruleNumeral6
, ruleNumeral7
, ruleNumeral8
, ruleNumeral9
, ruleNumeralDotNumeral
, ruleNumeralsPrefixWithNegativeOrMinus
, ruleNumeralsSuffixesKMG
, ruleOne
, ruleSeven
, ruleSeventeen
, ruleSeventy
, ruleSingle
, ruleSix
, ruleSixteen
, ruleSixty
, ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo
, ruleTen
, ruleThirteen
, ruleThirty
, ruleThousand
, ruleThree
, ruleTwelve
, ruleTwenty
, ruleTwo
, ruleZero
]