Quantity/PT: Extend quantity to include grams in portuguese (#631)

Summary:
I'm using Duckling in my project but I noticed that quantities in kg weren't being detected correctly, though other entities such as numeral/volume were all working as expected. Investigating more I noticed that this was just because in Portuguese the Quantity entity was only configured to detect cups and pounds, never grams. And even for cups/pounds, products weren't being detected correctly.

So I've just adapted the rules from English Quantity to work in Portuguese as well, while keeping the cups/pounds too. It's all working as expected now and it's backwards compatible.

Pull Request resolved: https://github.com/facebook/duckling/pull/631

Reviewed By: stroxler

Differential Revision: D29701339

Pulled By: chessai

fbshipit-source-id: fca08a14c50844d418f101b885ca54554d993f58
This commit is contained in:
Maíra Bello 2021-07-19 15:41:16 -07:00 committed by Facebook GitHub Bot
parent b10e1d6a78
commit 328e59ebc4
2 changed files with 83 additions and 30 deletions

View File

@ -23,13 +23,43 @@ corpus = (testContext {locale = makeLocale PT Nothing}, testOptions, allExamples
allExamples :: [Example] allExamples :: [Example]
allExamples = concat allExamples = concat
[ examples (simple Cup 2 (Just "café")) [ examples (simple Pound 2 (Just "carne"))
[ "2 copos de café" [ "duas libras de carne"
] ]
, examples (simple Cup 1 Nothing) , examples (simple Gram 2 Nothing)
[ "um Copo" [ "2 gramas"
, "0,002 kg"
, "2 g"
, "2/1000 quilogramas"
, "2/1000 quilos"
, "2000 miligramas"
, "2000 miligramas"
] ]
, examples (simple Pound 100 (Just "acucar")) , examples (simple Gram 1000 Nothing)
[ "100 Libras de acucar" [ "um quilograma"
, "um quilo"
, "um kg"
, "1 kg"
, "1000 g"
]
, examples (simple Pound 1 Nothing)
[ "uma Libra"
, "1 lb"
, "uma lb"
]
, examples (simple Cup 3 (Just "acucar"))
[ "3 copos de acucar"
, "3 copos de AcucAr"
]
, examples (simple Cup 0.75 Nothing)
[ "3/4 copo"
, "0,75 copo"
]
, examples (simple Gram 500 (Just "morangos"))
[ "500 gramas de morangos"
, "500g de morangos"
, "0,5 quilos de morangos"
, "0,5 kg de morangos"
, "500000mg de morangos"
] ]
] ]

View File

@ -7,55 +7,78 @@
{-# LANGUAGE GADTs #-} {-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
module Duckling.Quantity.PT.Rules module Duckling.Quantity.PT.Rules
( rules ) where ( rules ) where
import Data.HashMap.Strict (HashMap)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text import qualified Data.Text as Text
import Prelude import Prelude
import Data.String import Data.String
import Data.Text (Text)
import Duckling.Dimensions.Types import Duckling.Dimensions.Types
import Duckling.Numeral.Types (NumeralData (..)) import Duckling.Numeral.Types (NumeralData (..))
import qualified Duckling.Numeral.Types as TNumeral import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Numeral.Helpers
import Duckling.Quantity.Helpers import Duckling.Quantity.Helpers
import qualified Duckling.Quantity.Types as TQuantity import qualified Duckling.Quantity.Types as TQuantity
import Duckling.Regex.Types import Duckling.Regex.Types
import Duckling.Types import Duckling.Types
ruleNumeralUnits :: Rule quantities :: [(Text, String, TQuantity.Unit)]
ruleNumeralUnits = Rule quantities =
{ name = "<number> <units>" [ ("<quantity> copos", "(copos?)", TQuantity.Cup)
, pattern = , ("<quantity> gramas", "((((mili)|(quilo))?(grama)s?)|(quilos?)|((m|k)?g))", TQuantity.Gram)
[ dimension Numeral , ("<quantity> libras", "((lb|libra)s?)", TQuantity.Pound)
, regex "(libra|copo)s?" ]
]
, prod = \tokens -> case tokens of opsMap :: HashMap Text (Double -> Double)
(Token Numeral NumeralData {TNumeral.value = v}: opsMap = HashMap.fromList
Token RegexMatch (GroupMatch (match:_)): [ ( "miligrama" , (/ 1000))
_) -> case Text.toLower match of , ( "miligramas" , (/ 1000))
"copo" -> Just . Token Quantity $ quantity TQuantity.Cup v , ( "mg" , (/ 1000))
"libra" -> Just . Token Quantity $ quantity TQuantity.Pound v , ( "mgs" , (/ 1000))
_ -> Nothing , ( "quilograma" , (* 1000))
_ -> Nothing , ( "quilogramas", (* 1000))
} , ( "quilo" , (* 1000))
, ( "quilos" , (* 1000))
, ( "kg" , (* 1000))
, ( "kgs" , (* 1000))
]
ruleNumeralQuantities :: [Rule]
ruleNumeralQuantities = map go quantities
where
go :: (Text, String, TQuantity.Unit) -> Rule
go (name, regexPattern, u) = Rule
{ name = name
, pattern = [Predicate isPositive, regex regexPattern]
, prod = \case
(Token Numeral nd:
Token RegexMatch (GroupMatch (match:_)):
_) -> do
let value = getValue opsMap match $ TNumeral.value nd
Just $ Token Quantity $ quantity u value
_ -> Nothing
}
ruleQuantityOfProduct :: Rule ruleQuantityOfProduct :: Rule
ruleQuantityOfProduct = Rule ruleQuantityOfProduct = Rule
{ name = "<quantity> of product" { name = "<quantity> of product"
, pattern = , pattern =
[ dimension Quantity [ dimension Quantity
, regex "de (caf(e|é)|a(ç|c)ucar)" , regex "de (\\w+)"
] ]
, prod = \tokens -> case tokens of , prod = \case
(Token Quantity qd: (Token Quantity qd:Token RegexMatch (GroupMatch (product:_)):_) ->
Token RegexMatch (GroupMatch (match:_)): Just $ Token Quantity $ withProduct (Text.toLower product) qd
_) -> Just . Token Quantity $ withProduct match qd
_ -> Nothing _ -> Nothing
} }
rules :: [Rule] rules :: [Rule]
rules = rules =
[ ruleNumeralUnits [ ruleQuantityOfProduct ]
, ruleQuantityOfProduct ++ ruleNumeralQuantities
]