duckling/Duckling/Numeral/EL/Rules.hs
Aaron Yue babe317723 fix intersect rule to work with negative numbers
Summary:
Numerals that require intersection with negative numbers don't work, since the negative sign gets parsed
before the intersect rules happen. This fixes it by adding a guard in for positive in the intersection rule.

Reviewed By: patapizza

Differential Revision: D7592225

fbshipit-source-id: 2bc9c708cadeea4012c1f3ef487c61a144325f2a
2018-04-13 19:45:40 -07:00

270 lines
7.9 KiB
Haskell
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.EL.Rules
( rules
) where
import Data.HashMap.Strict (HashMap)
import Data.List (intercalate)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
oneOrTwoDigitsMap :: HashMap Text Integer
oneOrTwoDigitsMap = HashMap.fromList
[ ( "μηδέν" , 0 )
, ( "ένα" , 1 )
, ( "ένας" , 1 )
, ( "ενός" , 1 )
, ( "μία" , 1 )
, ( "μια" , 1 )
, ( "δύο" , 2 )
, ( "δυο" , 2 )
, ( "τρία" , 3 )
, ( "τρεις" , 3 )
, ( "τέσσερα" , 4 )
, ( "τέσσερις" , 4 )
, ( "πέντε" , 5 )
, ( "έξι" , 6 )
, ( "επτά" , 7 )
, ( "εφτά" , 7 )
, ( "οκτώ" , 8 )
, ( "οχτώ" , 8 )
, ( "εννιά" , 9 )
, ( "εννέα" , 9 )
, ( "δέκα" , 10 )
, ( "δεκαριά" , 10 )
, ( "έντεκα" , 11 )
, ( "ένδεκα" , 11 )
, ( "δώδεκα" , 12 )
, ( "ντουζίνα" , 12 )
, ( "ντουζίνες" , 12 )
, ( "δεκατρία" , 13 )
, ( "δεκατέσσερα" , 14 )
, ( "δεκαπέντε" , 15 )
, ( "δεκαέξι" , 16 )
, ( "δεκαεπτά" , 17 )
, ( "δεκαοκτώ" , 18 )
, ( "δεκαεννέα" , 19 )
, ( "δεκαεννιά" , 19 )
, ( "είκοσι" , 20 )
, ( "τριάντα" , 30 )
, ( "σαράντα" , 40 )
, ( "πενήντα" , 50 )
, ( "εξήντα" , 60 )
, ( "εβδομήντα" , 70 )
, ( "ογδόντα" , 80 )
, ( "ενενήντα" , 90 )
]
hundredsMap :: HashMap Text Integer
hundredsMap = HashMap.fromList
[ ( "δι" , 200 )
, ( "τρι" , 300 )
, ( "τετρ" , 400 )
, ( "πεντ" , 500 )
, ( "εξ" , 600 )
, ( "επτ" , 700 )
, ( "εφτ" , 700 )
, ( "οκτ" , 800 )
, ( "οχτ" , 800 )
, ( "εννι" , 900 )
]
ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number (0..19, 20, 30..90)"
, pattern = [ regex regexString ]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) oneOrTwoDigitsMap >>= integer
_ -> Nothing
}
where
regexString = "(" ++ intercalate "|"
[ "μηδέν|[εέ]ν[αοό]ς?|μ[ιί]ας?" -- [0..1]
, "δ[υύ]ο|τρ(ία|εις)|τέσσερ(α|ις)|πέντε" -- [2..5]
, "έξι|ε[πφ]τά|ο[κχ]τώ|ενν(ιά|έα)|δέκα|δεκαριά" -- [6..10]
, "έν[τδ]εκα|δώδεκα|ντουζίν(α|ες)" -- [11..12]
, "δεκα(τρία|τέσσερα|πέντε|έξι|ε[πφ]τά|ο[χκ]τώ|ενν(έα|ιά))" -- [13..19]
, "είκοσι|(τριά|σαρά|πενή|εξή|εβδομή|ογδό|ενενή)ντα" -- [2..9]0
] ++ ")"
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
, pattern =
[ regex "μερικ(ά|ές|οί)"
]
, prod = \_ -> integer 3
}
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer 21..99"
, pattern =
[ oneOf [20,30..90]
, numberBetween 1 10
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = tens} :
Token Numeral NumeralData{TNumeral.value = units} :
_) -> double (tens + units)
_ -> Nothing
}
ruleHundred :: Rule
ruleHundred = Rule
{ name = "number (100)"
, pattern = [ regex "(εκατόν?)" ]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch _):_) -> integer 100 >>= withGrain 2
_ -> Nothing
}
ruleHundreds :: Rule
ruleHundreds = Rule
{ name = "number (200..900)"
, pattern =
[ regex "(δι|τρι|τετρ|πεντ|εξ|ε(π|φ)τ|ο(χ|κ)τ|εννι)ακόσι(α|ες|οι)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) hundredsMap >>=
integer >>= withGrain 2
_ -> Nothing
}
rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern =
[ regex "(χίλι(α|οι|ες)|χιλιάδες|εκατομμύρι(ο|α)|δις|δισεκατομμύριο)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"χίλια" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"χιλιάδες" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"εκατομμύριο" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"εκατομμύρια" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"δις" -> double 1e9 >>= withGrain 9 >>= withMultipliable
"δισεκατομμύριο" -> double 1e9 >>= withGrain 9 >>= withMultipliable
"δισεκατομμύρια" -> double 1e9 >>= withGrain 9 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleNegative :: Rule
ruleNegative = Rule
{ name = "negative numbers"
, pattern =
[ regex "-|μείον"
, Predicate isPositive
]
, prod = \tokens -> case tokens of
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1))
_ -> Nothing
}
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect 2 numbers"
, pattern =
[ Predicate hasGrain
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \tokens -> case tokens of
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, Predicate isMultipliable
]
, prod = \tokens -> case tokens of
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
ruleDecimals :: Rule
ruleDecimals = Rule
{ name = "decimal number"
, pattern =
[ regex "(\\d+,\\d+)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDecimal True (Text.replace "," "." match)
_ -> Nothing
}
ruleCommaSpelledOut :: Rule
ruleCommaSpelledOut = Rule
{ name = "one point two"
, pattern =
[ dimension Numeral
, regex "κόμμα"
, Predicate $ not . hasGrain
]
, prod = \tokens -> case tokens of
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleDots :: Rule
ruleDots = Rule
{ name = "dot-separated numbers"
, pattern =
[ regex "(\\d+(\\.\\d\\d\\d)+(,\\d+)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (
Text.replace "," "." $ Text.replace "." Text.empty match
) >>= double
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleFew
, ruleNumeral
, ruleCompositeTens
, rulePowersOfTen
, ruleNegative
, ruleHundred
, ruleHundreds
, ruleSum
, ruleMultiply
, ruleDecimals
, ruleCommaSpelledOut
, ruleDots
]