Added AmountOfMoney dimension to Bulgarian language

Summary:
- Added AmountOfMoney dimension to Bulgarian language
Closes https://github.com/facebookincubator/duckling/pull/80

Reviewed By: JonCoens

Differential Revision: D5606699

Pulled By: patapizza

fbshipit-source-id: c18f5d4
This commit is contained in:
Veselin Stoyanov 2017-08-14 09:18:27 -07:00 committed by Facebook Github Bot
parent be113689ac
commit e9b1c8932a
10 changed files with 425 additions and 3 deletions

View File

@ -0,0 +1,119 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.AmountOfMoney.BG.Corpus
( corpus ) where
import Prelude
import Data.String
import Duckling.Lang
import Duckling.AmountOfMoney.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus = (testContext {lang = BG}, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (simple BGN 1)
[ "1 лв"
, "един лев"
, "1 Лев"
]
, examples (simple BGN 10)
[ "10 лв"
, "десет лева"
, "10лв"
]
, examples (simple BGN 15.50)
[ "15лв и 50ст"
, "петнадесет лева и петдесет стотинки"
, "15 Лв и 50 Ст"
]
, examples (simple Dollar 1)
[ "$1"
, "един долар"
, "1 долар"
]
, examples (simple Dollar 10)
[ "$10"
, "$ 10"
, "10$"
, "10 Долара"
, "десет долара"
]
, examples (simple Cent 10)
[ "10 цента"
, "десет пенита"
, "десет цента"
, "10¢"
]
, examples (simple Cent 50)
[ "50 ст"
, "петдесет стотинки"
, "50ст"
]
, examples (simple Dollar 1e4)
[ "$10К"
, "10к$"
, "$10,000"
]
, examples (simple USD 3.14)
[ "USD3.14"
, "3.14US$"
, "US$ 3.14"
]
, examples (simple EUR 20)
[ "20\x20ac"
, "20 евро"
, "20 Евро"
, "EUR 20"
, "EUR 20.0"
, "20€"
, "20 €ur"
]
, examples (simple Pound 10)
[ "\x00a3\&10"
, "десет паунда"
]
, examples (simple Dollar 20.43)
[ "$20 и 43ц"
, "$20 43"
, "20 долара 43ц"
, "20 долара 43 цента"
, "двадесет долара 43 цента"
, "20 долара 43"
, "двадесет долара и 43"
]
, examples (simple GBP 3.01)
[ "GBP3.01"
, "GBP 3.01"
, "3 GBP 1 пени"
]
, examples (between Dollar (10, 20))
[ "между 10 и 20 долара"
, "от 10 до 20 долара"
, "около 10-20 долара"
, "между 10 и 20 долара"
, "около $10-$20"
, "10-20 долара"
]
, examples (under EUR 7)
[ "под седем евро"
, "по-малко от 7 Евро"
, "под 7€"
]
, examples (above Dollar 1.42)
[ "над 1 долар и четиридесет и два цента"
, "поне $1.42"
, "над 1.42 долара"
]
]

View File

@ -0,0 +1,267 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.AmountOfMoney.BG.Rules
( rules
) where
import Data.Maybe
import Data.String
import Prelude
import qualified Data.Text as Text
import Duckling.AmountOfMoney.Helpers
import Duckling.AmountOfMoney.Types (Currency(..), AmountOfMoneyData (..))
import Duckling.Dimensions.Types
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.AmountOfMoney.Types as TAmountOfMoney
import qualified Duckling.Numeral.Types as TNumeral
ruleBGN :: Rule
ruleBGN = Rule
{ name = "лв"
, pattern =
[ regex "ле?ва?"
]
, prod = \_ -> Just . Token AmountOfMoney $ currencyOnly BGN
}
rulePounds :: Rule
rulePounds = Rule
{ name = "£"
, pattern =
[ regex "паунд(а|и)?"
]
, prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Pound
}
ruleDollar :: Rule
ruleDollar = Rule
{ name = "$"
, pattern =
[ regex "долар(а|и)?"
]
, prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Dollar
}
ruleCent :: Rule
ruleCent = Rule
{ name = "cent"
, pattern =
[ regex "ст(отинк(a|и))?|цента?|пени(та)?|пенса?|ц"
]
, prod = \_ -> Just . Token AmountOfMoney $ currencyOnly Cent
}
ruleEUR :: Rule
ruleEUR = Rule
{ name = ""
, pattern =
[ regex "евр(о|а)"
]
, prod = \_ -> Just . Token AmountOfMoney $ currencyOnly EUR
}
ruleIntersectAndXCents :: Rule
ruleIntersectAndXCents = Rule
{ name = "intersect (and X cents)"
, pattern =
[ financeWith TAmountOfMoney.value isJust
, regex "и"
, financeWith TAmountOfMoney.currency (== Cent)
]
, prod = \tokens -> case tokens of
(Token AmountOfMoney fd:
_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just c}):
_) -> Just . Token AmountOfMoney $ withCents c fd
_ -> Nothing
}
ruleIntersect :: Rule
ruleIntersect = Rule
{ name = "intersect"
, pattern =
[ financeWith TAmountOfMoney.value isJust
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token AmountOfMoney fd:
Token Numeral (NumeralData {TNumeral.value = c}):
_) -> Just . Token AmountOfMoney $ withCents c fd
_ -> Nothing
}
ruleIntersectAndNumeral :: Rule
ruleIntersectAndNumeral = Rule
{ name = "intersect (and number)"
, pattern =
[ financeWith TAmountOfMoney.value isJust
, regex "и"
, dimension Numeral
]
, prod = \tokens -> case tokens of
(Token AmountOfMoney fd:
_:
Token Numeral (NumeralData {TNumeral.value = c}):
_) -> Just . Token AmountOfMoney $ withCents c fd
_ -> Nothing
}
ruleIntersectXCents :: Rule
ruleIntersectXCents = Rule
{ name = "intersect (X cents)"
, pattern =
[ financeWith TAmountOfMoney.value isJust
, financeWith id $ \x -> case TAmountOfMoney.value x of
Just v | v > 0 -> TAmountOfMoney.currency x == Cent
_ -> False
]
, prod = \tokens -> case tokens of
(Token AmountOfMoney fd:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just c}):
_) -> Just . Token AmountOfMoney $ withCents c fd
_ -> Nothing
}
rulePrecision :: Rule
rulePrecision = Rule
{ name = "about|exactly <amount-of-money>"
, pattern =
[ regex "точно|около|приблизително|близо (до)?|почти"
, dimension AmountOfMoney
]
, prod = \tokens -> case tokens of
(_:token:_) -> Just token
_ -> Nothing
}
ruleIntervalBetweenNumeral :: Rule
ruleIntervalBetweenNumeral = Rule
{ name = "between|from <numeral> to|and <amount-of-money>"
, pattern =
[ regex "между|от"
, dimension Numeral
, regex "до|и"
, financeWith TAmountOfMoney.value isJust
]
, prod = \tokens -> case tokens of
(_:
Token Numeral (NumeralData {TNumeral.value = from}):
_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just to, TAmountOfMoney.currency = c}):
_) ->
Just . Token AmountOfMoney . withInterval (from, to) $ currencyOnly c
_ -> Nothing
}
ruleIntervalBetween :: Rule
ruleIntervalBetween = Rule
{ name = "between|from <amount-of-money> to|and <amount-of-money>"
, pattern =
[ regex "между|от"
, financeWith TAmountOfMoney.value isJust
, regex "до|и"
, financeWith TAmountOfMoney.value isJust
]
, prod = \tokens -> case tokens of
(_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just from, TAmountOfMoney.currency = c1}):
_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just to, TAmountOfMoney.currency = c2}):
_) | c1 == c2 ->
Just . Token AmountOfMoney . withInterval (from, to) $ currencyOnly c1
_ -> Nothing
}
ruleIntervalNumeralDash :: Rule
ruleIntervalNumeralDash = Rule
{ name = "<numeral> - <amount-of-money>"
, pattern =
[ dimension Numeral
, regex "-"
, financeWith TAmountOfMoney.value isJust
]
, prod = \tokens -> case tokens of
(Token Numeral (NumeralData {TNumeral.value = from}):
_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just to, TAmountOfMoney.currency = c}):
_) ->
Just . Token AmountOfMoney . withInterval (from, to) $ currencyOnly c
_ -> Nothing
}
ruleIntervalDash :: Rule
ruleIntervalDash = Rule
{ name = "<amount-of-money> - <amount-of-money>"
, pattern =
[ financeWith TAmountOfMoney.value isJust
, regex "-"
, financeWith TAmountOfMoney.value isJust
]
, prod = \tokens -> case tokens of
(Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just from, TAmountOfMoney.currency = c1}):
_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just to, TAmountOfMoney.currency = c2}):
_) | c1 == c2 ->
Just . Token AmountOfMoney . withInterval (from, to) $ currencyOnly c1
_ -> Nothing
}
ruleIntervalMax :: Rule
ruleIntervalMax = Rule
{ name = "under/less/lower/no more than <amount-of-money>"
, pattern =
[ regex "под|по-малко от|не повече от"
, financeWith TAmountOfMoney.value isJust
]
, prod = \tokens -> case tokens of
(_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just to, TAmountOfMoney.currency = c}):
_) -> Just . Token AmountOfMoney . withMax to $ currencyOnly c
_ -> Nothing
}
ruleIntervalMin :: Rule
ruleIntervalMin = Rule
{ name = "over/above/at least/more than <amount-of-money>"
, pattern =
[ regex "над|поне|повече от"
, financeWith TAmountOfMoney.value isJust
]
, prod = \tokens -> case tokens of
(_:
Token AmountOfMoney (AmountOfMoneyData {TAmountOfMoney.value = Just to, TAmountOfMoney.currency = c}):
_) -> Just . Token AmountOfMoney . withMin to $ currencyOnly c
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleBGN
, ruleDollar
, ruleCent
, ruleEUR
, ruleIntersect
, ruleIntersectAndNumeral
, ruleIntersectAndXCents
, ruleIntersectXCents
, ruleIntervalBetweenNumeral
, ruleIntervalBetween
, ruleIntervalMax
, ruleIntervalMin
, ruleIntervalNumeralDash
, ruleIntervalDash
, rulePounds
, rulePrecision
]

View File

@ -115,6 +115,9 @@ allExamples = concat
[ "42 SAR"
, "42 Saudiriyal"
]
, examples (simple BGN 42)
[ "42 BGN"
]
, examples (simple MYR 42)
[ "42 MYR"
, "42 RM"

View File

@ -34,6 +34,7 @@ currencies :: HashMap Text Currency
currencies = HashMap.fromList
[ ("aed", AED)
, ("aud", AUD)
, ("bgn", BGN)
, ("brl", BRL)
, ("\x00a2", Cent)
, ("c", Cent)
@ -86,7 +87,7 @@ ruleCurrencies :: Rule
ruleCurrencies = Rule
{ name = "currencies"
, pattern =
[ regex "(aed|aud|brl|\x00a2|c|\\$|dollars?|egp|(e|\x20ac)uro?s?|\x20ac|gbp|hrk|idr|inr|\x00a5|jpy|krw|kwd|lbp|myr|rm|nok|\x00a3|pta?s?|qar|rs\\.?|ron|rupees?|sar|sek|sgb|us(d|\\$)|vnd|yen)"
[ regex "(aed|aud|bgn|brl|\x00a2|c|\\$|dollars?|egp|(e|\x20ac)uro?s?|\x20ac|gbp|hrk|idr|inr|\x00a5|jpy|krw|kwd|lbp|myr|rm|nok|\x00a3|pta?s?|qar|rs\\.?|ron|rupees?|sar|sek|sgb|us(d|\\$)|vnd|yen)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do

View File

@ -34,6 +34,7 @@ data Currency
-- unambiguous
| AED
| AUD
| BGN
| BRL
| EGP
| EUR
@ -64,6 +65,7 @@ instance ToJSON Currency where
toJSON Unnamed = "unknown"
toJSON AED = "AED"
toJSON AUD = "AUD"
toJSON BGN = "BGN"
toJSON BRL = "BRL"
toJSON EGP = "EGP"
toJSON EUR = "EUR"

View File

@ -16,13 +16,15 @@ module Duckling.Rules.BG
import Duckling.Dimensions.Types
import Duckling.Types
import qualified Duckling.Numeral.BG.Rules as Numeral
import qualified Duckling.AmountOfMoney.BG.Rules as AmountOfMoney
rules :: Some Dimension -> [Rule]
rules (This Distance) = []
rules (This Duration) = []
rules (This Numeral) = Numeral.rules
rules (This Email) = []
rules (This AmountOfMoney) = []
rules (This AmountOfMoney) = AmountOfMoney.rules
rules (This Ordinal) = []
rules (This PhoneNumber) = []
rules (This Quantity) = []

View File

@ -146,7 +146,7 @@ toJText = Text.decodeUtf8 . LB.toStrict . encode
regex :: String -> PatternItem
regex = Regex . R.makeRegexOpts compOpts execOpts
where
compOpts = PCRE.defaultCompOpt + PCRE.compCaseless
compOpts = PCRE.defaultCompOpt + PCRE.compCaseless + PCRE.compUTF8
execOpts = PCRE.defaultExecOpt
dimension :: Typeable a => Dimension a -> PatternItem

View File

@ -144,6 +144,8 @@ library
-- AmountOfMoney
, Duckling.AmountOfMoney.EN.Corpus
, Duckling.AmountOfMoney.EN.Rules
, Duckling.AmountOfMoney.BG.Corpus
, Duckling.AmountOfMoney.BG.Rules
, Duckling.AmountOfMoney.ES.Corpus
, Duckling.AmountOfMoney.ES.Rules
, Duckling.AmountOfMoney.FR.Corpus
@ -548,6 +550,7 @@ test-suite duckling-test
-- AmountOfMoney
, Duckling.AmountOfMoney.EN.Tests
, Duckling.AmountOfMoney.BG.Tests
, Duckling.AmountOfMoney.ES.Tests
, Duckling.AmountOfMoney.FR.Tests
, Duckling.AmountOfMoney.GA.Tests

View File

@ -0,0 +1,23 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.AmountOfMoney.BG.Tests
( tests ) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.AmountOfMoney.BG.Corpus
import Duckling.Dimensions.Types
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "BG Tests"
[ makeCorpusTest [This AmountOfMoney] corpus
]

View File

@ -13,6 +13,7 @@ import Prelude
import Test.Tasty
import qualified Duckling.AmountOfMoney.EN.Tests as EN
import qualified Duckling.AmountOfMoney.BG.Tests as BG
import qualified Duckling.AmountOfMoney.ES.Tests as ES
import qualified Duckling.AmountOfMoney.FR.Tests as FR
import qualified Duckling.AmountOfMoney.GA.Tests as GA
@ -28,6 +29,7 @@ import qualified Duckling.AmountOfMoney.VI.Tests as VI
tests :: TestTree
tests = testGroup "AmountOfMoney Tests"
[ EN.tests
, BG.tests
, ES.tests
, FR.tests
, GA.tests