From 328e59ebc4df34ccf3276a2a54eca598bfbc7e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mai=CC=81ra=20Bello?= Date: Mon, 19 Jul 2021 15:41:16 -0700 Subject: [PATCH] Quantity/PT: Extend quantity to include grams in portuguese (#631) Summary: I'm using Duckling in my project but I noticed that quantities in kg weren't being detected correctly, though other entities such as numeral/volume were all working as expected. Investigating more I noticed that this was just because in Portuguese the Quantity entity was only configured to detect cups and pounds, never grams. And even for cups/pounds, products weren't being detected correctly. So I've just adapted the rules from English Quantity to work in Portuguese as well, while keeping the cups/pounds too. It's all working as expected now and it's backwards compatible. Pull Request resolved: https://github.com/facebook/duckling/pull/631 Reviewed By: stroxler Differential Revision: D29701339 Pulled By: chessai fbshipit-source-id: fca08a14c50844d418f101b885ca54554d993f58 --- Duckling/Quantity/PT/Corpus.hs | 42 +++++++++++++++++--- Duckling/Quantity/PT/Rules.hs | 71 ++++++++++++++++++++++------------ 2 files changed, 83 insertions(+), 30 deletions(-) diff --git a/Duckling/Quantity/PT/Corpus.hs b/Duckling/Quantity/PT/Corpus.hs index c0b21305..c63fc2cc 100644 --- a/Duckling/Quantity/PT/Corpus.hs +++ b/Duckling/Quantity/PT/Corpus.hs @@ -23,13 +23,43 @@ corpus = (testContext {locale = makeLocale PT Nothing}, testOptions, allExamples allExamples :: [Example] allExamples = concat - [ examples (simple Cup 2 (Just "café")) - [ "2 copos de café" + [ examples (simple Pound 2 (Just "carne")) + [ "duas libras de carne" ] - , examples (simple Cup 1 Nothing) - [ "um Copo" + , examples (simple Gram 2 Nothing) + [ "2 gramas" + , "0,002 kg" + , "2 g" + , "2/1000 quilogramas" + , "2/1000 quilos" + , "2000 miligramas" + , "2000 miligramas" ] - , examples (simple Pound 100 (Just "acucar")) - [ "100 Libras de acucar" + , examples (simple Gram 1000 Nothing) + [ "um quilograma" + , "um quilo" + , "um kg" + , "1 kg" + , "1000 g" + ] + , examples (simple Pound 1 Nothing) + [ "uma Libra" + , "1 lb" + , "uma lb" + ] + , examples (simple Cup 3 (Just "acucar")) + [ "3 copos de acucar" + , "3 copos de AcucAr" + ] + , examples (simple Cup 0.75 Nothing) + [ "3/4 copo" + , "0,75 copo" + ] + , examples (simple Gram 500 (Just "morangos")) + [ "500 gramas de morangos" + , "500g de morangos" + , "0,5 quilos de morangos" + , "0,5 kg de morangos" + , "500000mg de morangos" ] ] diff --git a/Duckling/Quantity/PT/Rules.hs b/Duckling/Quantity/PT/Rules.hs index 6085bc36..0eaf49ce 100644 --- a/Duckling/Quantity/PT/Rules.hs +++ b/Duckling/Quantity/PT/Rules.hs @@ -7,55 +7,78 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} module Duckling.Quantity.PT.Rules ( rules ) where +import Data.HashMap.Strict (HashMap) +import qualified Data.HashMap.Strict as HashMap import qualified Data.Text as Text import Prelude import Data.String +import Data.Text (Text) import Duckling.Dimensions.Types import Duckling.Numeral.Types (NumeralData (..)) import qualified Duckling.Numeral.Types as TNumeral +import Duckling.Numeral.Helpers import Duckling.Quantity.Helpers import qualified Duckling.Quantity.Types as TQuantity import Duckling.Regex.Types import Duckling.Types -ruleNumeralUnits :: Rule -ruleNumeralUnits = Rule - { name = " " - , pattern = - [ dimension Numeral - , regex "(libra|copo)s?" - ] - , prod = \tokens -> case tokens of - (Token Numeral NumeralData {TNumeral.value = v}: - Token RegexMatch (GroupMatch (match:_)): - _) -> case Text.toLower match of - "copo" -> Just . Token Quantity $ quantity TQuantity.Cup v - "libra" -> Just . Token Quantity $ quantity TQuantity.Pound v - _ -> Nothing - _ -> Nothing - } +quantities :: [(Text, String, TQuantity.Unit)] +quantities = + [ (" copos", "(copos?)", TQuantity.Cup) + , (" gramas", "((((mili)|(quilo))?(grama)s?)|(quilos?)|((m|k)?g))", TQuantity.Gram) + , (" libras", "((lb|libra)s?)", TQuantity.Pound) + ] + +opsMap :: HashMap Text (Double -> Double) +opsMap = HashMap.fromList + [ ( "miligrama" , (/ 1000)) + , ( "miligramas" , (/ 1000)) + , ( "mg" , (/ 1000)) + , ( "mgs" , (/ 1000)) + , ( "quilograma" , (* 1000)) + , ( "quilogramas", (* 1000)) + , ( "quilo" , (* 1000)) + , ( "quilos" , (* 1000)) + , ( "kg" , (* 1000)) + , ( "kgs" , (* 1000)) + ] + +ruleNumeralQuantities :: [Rule] +ruleNumeralQuantities = map go quantities + where + go :: (Text, String, TQuantity.Unit) -> Rule + go (name, regexPattern, u) = Rule + { name = name + , pattern = [Predicate isPositive, regex regexPattern] + , prod = \case + (Token Numeral nd: + Token RegexMatch (GroupMatch (match:_)): + _) -> do + let value = getValue opsMap match $ TNumeral.value nd + Just $ Token Quantity $ quantity u value + _ -> Nothing + } ruleQuantityOfProduct :: Rule ruleQuantityOfProduct = Rule { name = " of product" , pattern = [ dimension Quantity - , regex "de (caf(e|é)|a(ç|c)ucar)" + , regex "de (\\w+)" ] - , prod = \tokens -> case tokens of - (Token Quantity qd: - Token RegexMatch (GroupMatch (match:_)): - _) -> Just . Token Quantity $ withProduct match qd + , prod = \case + (Token Quantity qd:Token RegexMatch (GroupMatch (product:_)):_) -> + Just $ Token Quantity $ withProduct (Text.toLower product) qd _ -> Nothing } rules :: [Rule] rules = - [ ruleNumeralUnits - , ruleQuantityOfProduct - ] + [ ruleQuantityOfProduct ] + ++ ruleNumeralQuantities