Add Numeral dimension for new language TH (#399)

Summary:
Hello,
I am new to Haskell, but I would like to add Thai language (TH) to Duckling.
I have tried to extended Duckling by adding Numeral dimension for new language TH.
Please have a look at it and see what we can improve.

Thanks!
Pull Request resolved: https://github.com/facebook/duckling/pull/399

Reviewed By: patapizza

Differential Revision: D17651508

Pulled By: haoxuany

fbshipit-source-id: 4b3ee1352f239eee637958f5e9dce68430352a0a
This commit is contained in:
Kamolsit Mongkolsrisawat 2019-11-27 15:29:58 -08:00 committed by Facebook Github Bot
parent dcb537c0b0
commit 7f2450e067
16 changed files with 763 additions and 4 deletions

View File

@ -146,8 +146,7 @@ allExamples = concat
, "ti singaporske dollars"
]
, examples (simple THB 10)
[ "10 baht"
, "10 bhat"
[ "10 bhat"
, "10 thai baht"
, "10 thai bhat"
, "10 thailand baht"

View File

@ -60,6 +60,7 @@ import qualified Duckling.Dimensions.RU as RUDimensions
import qualified Duckling.Dimensions.SV as SVDimensions
import qualified Duckling.Dimensions.SW as SWDimensions
import qualified Duckling.Dimensions.TA as TADimensions
import qualified Duckling.Dimensions.TH as THDimensions
import qualified Duckling.Dimensions.TR as TRDimensions
import qualified Duckling.Dimensions.UK as UKDimensions
import qualified Duckling.Dimensions.VI as VIDimensions
@ -135,6 +136,7 @@ langDimensions RU = RUDimensions.allDimensions
langDimensions SV = SVDimensions.allDimensions
langDimensions SW = SWDimensions.allDimensions
langDimensions TA = TADimensions.allDimensions
langDimensions TH = THDimensions.allDimensions
langDimensions TR = TRDimensions.allDimensions
langDimensions UK = UKDimensions.allDimensions
langDimensions VI = VIDimensions.allDimensions

17
Duckling/Dimensions/TH.hs Normal file
View File

@ -0,0 +1,17 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
module Duckling.Dimensions.TH
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]

View File

@ -72,6 +72,7 @@ data Lang
| SV
| SW
| TA
| TH
| TR
| UK
| VI

View File

@ -0,0 +1,153 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.TH.Corpus
( corpus ) where
import Prelude
import Data.String
import Duckling.Locale
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus = (testContext {locale = makeLocale TH Nothing}, testOptions, allExamples)
allExamples :: [Example]
allExamples = concat
[
examples (NumeralValue 0)
[ "0"
, "ไม่มี"
, "ศูนย์"
]
,examples (NumeralValue 1)
[ "1"
, "หนึ่ง"
]
, examples (NumeralValue 2)
[ "2"
, "สอง"
]
, examples (NumeralValue 3)
[ "3"
, "สาม"
]
, examples (NumeralValue 10)
[ "10"
, "สิบ"
]
, examples (NumeralValue 12)
[ "12"
, "สิบสอง"
, "โหล"
]
, examples (NumeralValue 14)
[ "14"
, "สิบสี่"
]
, examples (NumeralValue 16)
[ "16"
, "สิบหก"
]
, examples (NumeralValue 17)
[ "17"
, "สิบเจ็ด"
]
, examples (NumeralValue 18)
[ "18"
, "สิบแปด"
]
, examples (NumeralValue 33)
[ "33"
, "0033"
]
, examples (NumeralValue 24)
[ "24"
, "สองโหล"
]
, examples (NumeralValue 1.1)
[ "1.1"
, "1.10"
, "01.10"
, "1 จุด 1"
, "หนึ่งจุดหนึ่ง"
]
, examples (NumeralValue 0.77)
[ ".77"
, "0.77"
, "จุด 77"
]
, examples (NumeralValue 100000)
[ "100,000"
, "100,000.0"
, "100000"
, "หนึ่งแสน"
]
, examples (NumeralValue 0.2)
[ "1/5"
, "2/10"
, "3/15"
, "20/100"
, "ศูนย์จุดสอง"
]
, examples (NumeralValue 3e6)
[ "3000000"
, "3,000,000"
, "3 ล้าน"
, "สามล้าน"
]
, examples (NumeralValue 1.2e6)
[ "1,200,000"
, "1200000"
, "1.2 ล้าน"
, "หนึ่งจุดสองล้าน"
]
, examples (NumeralValue 5000)
[ "ห้าพัน"
]
, examples (NumeralValue (-504))
[ "-504"
]
, examples (NumeralValue (-1.2e6))
[ "- 1,200,000"
, "-1200000"
, "ลบ 1,200,000"
]
, examples (NumeralValue 122)
[ "หนึ่งร้อยยี่สิบสอง"
]
, examples (NumeralValue 2e5)
[ "สองแสน"
]
, examples (NumeralValue 21011)
[ "สองหมื่นหนึ่งพันสิบเอ็ด"
, "สองหมื่นหนึ่งพันสิบหนึ่ง"
]
, examples (NumeralValue 721012)
[ "เจ็ดแสนสองหมื่นหนึ่งพันสิบสอง"
]
, examples (NumeralValue 31256721)
[ "สามสิบเอ็ดล้านสองแสนห้าหมื่นหกพันเจ็ดร้อยยี่สิบเอ็ด"
, "สามสิบหนึ่งล้านสองแสนห้าหมื่นหกพันเจ็ดร้อยยี่สิบหนึ่ง"
]
, examples (NumeralValue 2400)
[ "สองร้อยโหล"
, "200 โหล"
]
, examples (NumeralValue 2200000)
[ "สองจุดสองล้าน"
, "สองล้านสองแสน"
]
, examples (NumeralValue 3000000000)
[ "สามพันล้าน"
]
]

View File

@ -0,0 +1,460 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.TH.Rules
( rules
) where
import Control.Applicative ((<|>))
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleDozen :: Rule
ruleDozen = Rule
{ name = "a dozen of"
, pattern =
[ regex "โหล?( ของ)?"
]
, prod = \_ -> integer 12 >>= withMultipliable >>= notOkForAnyTime
}
zeroNineteenMap :: HashMap Text Integer
zeroNineteenMap = HashMap.fromList
[ ( "ไม่มี", 0 )
, ( "ศูนย์", 0 )
, ( "หนึ่ง", 1 )
, ( "เอ็ด", 1 )
, ( "สอง", 2 )
, ( "สาม", 3 )
, ( "สี่", 4 )
, ( "ห้า", 5 )
, ( "หก", 6 )
, ( "เจ็ด", 7 )
, ( "แปด", 8 )
, ( "เก้า", 9 )
, ( "สิบ", 10 )
, ( "สิบเอ็ด", 11 )
, ( "สิบหนึ่ง", 11 )
, ( "สิบสอง", 12 )
, ( "สิบสาม", 13 )
, ( "สิบสี่", 14 )
, ( "สิบห้า", 15 )
, ( "สิบหก", 16 )
, ( "สิบเจ็ด", 17 )
, ( "สิบแปด", 18 )
, ( "สิบเก้า", 19 )
]
informalMap :: HashMap Text Integer
informalMap = HashMap.fromList
[ ( "อันนึง", 1 )
, ( "คู่นึง", 2 )
, ( "คู่ของ", 2 )
]
ruleToNineteen :: Rule
ruleToNineteen = Rule
{ name = "integer (0..19)"
-- e.g. fourteen must be before four, otherwise four will always shadow fourteen
, pattern =
[ regex "(ไม่มี|ศูนย์|สิบหนึ่ง|หนึ่ง|(คู่)s?( ของ)?|(คู่)s?( นึง)?|สิบเอ็ด|เอ็ด|สิบสอง|สิบสาม|สิบสี่|สิบห้า|สิบหก|สิบเจ็ด|สิบแปด|สิบเก้า|สอง|สาม|สี่|ห้า|หก|เจ็ด|แปด|เก้า|สิบ)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
let x = Text.toLower match in
(HashMap.lookup x zeroNineteenMap >>= integer) <|>
(HashMap.lookup x informalMap >>= integer >>= notOkForAnyTime)
_ -> Nothing
}
tensMap :: HashMap Text Integer
tensMap = HashMap.fromList
[ ( "ยี่สิบ", 20 )
, ( "สามสิบ", 30 )
, ( "สี่สิบ", 40 )
, ( "ห้าสิบ", 50 )
, ( "หกสิบ", 60 )
, ( "เจ็ดสิบ", 70 )
, ( "แปดสิบ", 80 )
, ( "เก้าสิบ", 90 )
]
ruleTens :: Rule
ruleTens = singleStringLookupRule tensMap "integer (20..90)" integer
digitsHundredTwentyToTwentyNineMap :: HashMap Text Integer
digitsHundredTwentyToTwentyNineMap = HashMap.fromList
[ ( "ร้อยยี่สิบ", 120 )
, ( "ร้อยยี่สิบเอ็ด", 121 )
, ( "ร้อยยี่สิบหนึ่ง", 121 )
, ( "ร้อยยี่สิบสอง", 122 )
, ( "ร้อยยี่สิบสาม", 123 )
, ( "ร้อยยี่สิบสี่", 124 )
, ( "ร้อยยี่สิบห้า", 125 )
, ( "ร้อยยี่สิบหก", 126 )
, ( "ร้อยยี่สิบเจ็ด", 127 )
, ( "ร้อยยี่สิบแปด", 128 )
, ( "ร้อยยี่สิบเก้า", 129 )
, ( "หนึ่งร้อยยี่สิบ", 120 )
, ( "หนึ่งร้อยยี่สิบเอ็ด", 121 )
, ( "หนึ่งร้อยยี่สิบหนึ่ง", 121 )
, ( "หนึ่งร้อยยี่สิบสอง", 122 )
, ( "หนึ่งร้อยยี่สิบสาม", 123 )
, ( "หนึ่งร้อยยี่สิบสี่", 124 )
, ( "หนึ่งร้อยยี่สิบห้า", 125 )
, ( "หนึ่งร้อยยี่สิบหก", 126 )
, ( "หนึ่งร้อยยี่สิบเจ็ด", 127 )
, ( "หนึ่งร้อยยี่สิบแปด", 128 )
, ( "หนึ่งร้อยยี่สิบเก้า", 129 )
, ( "สองร้อยยี่สิบ", 220 )
, ( "สองร้อยยี่สิบเอ็ด", 221 )
, ( "สองร้อยยี่สิบหนึ่ง", 221 )
, ( "สองร้อยยี่สิบสอง", 222 )
, ( "สองร้อยยี่สิบสาม", 223 )
, ( "สองร้อยยี่สิบสี่", 224 )
, ( "สองร้อยยี่สิบห้า", 225 )
, ( "สองร้อยยี่สิบหก", 226 )
, ( "สองร้อยยี่สิบเจ็ด", 227 )
, ( "สองร้อยยี่สิบแปด", 228 )
, ( "สองร้อยยี่สิบเก้า", 229 )
, ( "สามร้อยยี่สิบ", 320 )
, ( "สามร้อยยี่สิบเอ็ด", 321 )
, ( "สามร้อยยี่สิบหนึ่ง", 321 )
, ( "สามร้อยยี่สิบสอง", 322 )
, ( "สามร้อยยี่สิบสาม", 323 )
, ( "สามร้อยยี่สิบสี่", 324 )
, ( "สามร้อยยี่สิบห้า", 325 )
, ( "สามร้อยยี่สิบหก", 326 )
, ( "สามร้อยยี่สิบเจ็ด", 327 )
, ( "สามร้อยยี่สิบแปด", 328 )
, ( "สามร้อยยี่สิบเก้า", 329 )
, ( "สี่ร้อยยี่สิบ", 420 )
, ( "สี่ร้อยยี่สิบเอ็ด", 421 )
, ( "สี่ร้อยยี่สิบหนึ่ง", 421 )
, ( "สี่ร้อยยี่สิบสอง", 422 )
, ( "สี่ร้อยยี่สิบสาม", 423 )
, ( "สี่ร้อยยี่สิบสี่", 424 )
, ( "สี่ร้อยยี่สิบห้า", 425 )
, ( "สี่ร้อยยี่สิบหก", 426 )
, ( "สี่ร้อยยี่สิบเจ็ด", 427 )
, ( "สี่ร้อยยี่สิบแปด", 428 )
, ( "สี่ร้อยยี่สิบเก้า", 429 )
, ( "ห้าร้อยยี่สิบ", 520 )
, ( "ห้าร้อยยี่สิบเอ็ด", 521 )
, ( "ห้าร้อยยี่สิบหนึ่ง", 521 )
, ( "ห้าร้อยยี่สิบสอง", 522 )
, ( "ห้าร้อยยี่สิบสาม", 523 )
, ( "ห้าร้อยยี่สิบสี่", 524 )
, ( "ห้าร้อยยี่สิบห้า", 525 )
, ( "ห้าร้อยยี่สิบหก", 526 )
, ( "ห้าร้อยยี่สิบเจ็ด", 527 )
, ( "ห้าร้อยยี่สิบแปด", 528 )
, ( "ห้าร้อยยี่สิบเก้า", 529 )
, ( "หกร้อยยี่สิบ", 620 )
, ( "หกร้อยยี่สิบเอ็ด", 621 )
, ( "หกร้อยยี่สิบหนึ่ง", 621 )
, ( "หกร้อยยี่สิบสอง", 622 )
, ( "หกร้อยยี่สิบสาม", 623 )
, ( "หกร้อยยี่สิบสี่", 624 )
, ( "หกร้อยยี่สิบห้า", 625 )
, ( "หกร้อยยี่สิบหก", 626 )
, ( "หกร้อยยี่สิบเจ็ด", 627 )
, ( "หกร้อยยี่สิบแปด", 628 )
, ( "หกร้อยยี่สิบเก้า", 629 )
, ( "เจ็ดร้อยยี่สิบ", 720 )
, ( "เจ็ดร้อยยี่สิบเอ็ด", 721 )
, ( "เจ็ดร้อยยี่สิบหนึ่ง", 721 )
, ( "เจ็ดร้อยยี่สิบสอง", 722 )
, ( "เจ็ดร้อยยี่สิบสาม", 723 )
, ( "เจ็ดร้อยยี่สิบสี่", 724 )
, ( "เจ็ดร้อยยี่สิบห้า", 725 )
, ( "เจ็ดร้อยยี่สิบหก", 726 )
, ( "เจ็ดร้อยยี่สิบเจ็ด", 727 )
, ( "เจ็ดร้อยยี่สิบแปด", 728 )
, ( "เจ็ดร้อยยี่สิบเก้า", 729 )
, ( "แปดร้อยยี่สิบ", 820 )
, ( "แปดร้อยยี่สิบเอ็ด", 821 )
, ( "แปดร้อยยี่สิบหนึ่ง", 821 )
, ( "แปดร้อยยี่สิบสอง", 822 )
, ( "แปดร้อยยี่สิบสาม", 823 )
, ( "แปดร้อยยี่สิบสี่", 824 )
, ( "แปดร้อยยี่สิบห้า", 825 )
, ( "แปดร้อยยี่สิบหก", 826 )
, ( "แปดร้อยยี่สิบเจ็ด", 827 )
, ( "แปดร้อยยี่สิบแปด", 828 )
, ( "แปดร้อยยี่สิบเก้า", 829 )
, ( "เก้าร้อยยี่สิบ", 920 )
, ( "เก้าร้อยยี่สิบเอ็ด", 921 )
, ( "เก้าร้อยยี่สิบหนึ่ง", 921 )
, ( "เก้าร้อยยี่สิบสอง", 922 )
, ( "เก้าร้อยยี่สิบสาม", 923 )
, ( "เก้าร้อยยี่สิบสี่", 924 )
, ( "เก้าร้อยยี่สิบห้า", 925 )
, ( "เก้าร้อยยี่สิบหก", 926 )
, ( "เก้าร้อยยี่สิบเจ็ด", 927 )
, ( "เก้าร้อยยี่สิบแปด", 928 )
, ( "เก้าร้อยยี่สิบเก้า", 929 )
]
ruleXHundredTwentyToXHundredTwentyNine :: Rule
ruleXHundredTwentyToXHundredTwentyNine = singleStringLookupRule
digitsHundredTwentyToTwentyNineMap "integer (x20,x21,...,x29)" integer
rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern = [regex "(ร้อย|พัน|หมื่น|แสน|ล้าน|สิบล้าน|ร้อยล้าน|พันล้าน)"]
, prod = \case
(Token RegexMatch (GroupMatch (match : _)) : _) ->
case Text.toLower match of
"ร้อย" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"พัน" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"หมื่น" -> double 1e4 >>= withGrain 4 >>= withMultipliable
"แสน" -> double 1e5 >>= withGrain 5 >>= withMultipliable
"ล้าน" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"สิบล้าน" -> double 1e7 >>= withGrain 7 >>= withMultipliable
"ร้อยล้าน" -> double 1e8 >>= withGrain 8 >>= withMultipliable
"พันล้าน" -> double 1e9 >>= withGrain 9 >>= withMultipliable
_ -> Nothing
_ -> Nothing
}
ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer 21..99"
, pattern =
[ oneOf [20,30..90]
, regex "[\\s\\-]+"
, numberBetween 1 10
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = tens}:
_:
Token Numeral NumeralData{TNumeral.value = units}:
_) -> double $ tens + units
_ -> Nothing
}
ruleSumTenDigits :: Rule
ruleSumTenDigits = Rule
{ name = "สามสิบสี่"
, pattern =
[ regex "(ยี่สิบ|สามสิบ|สี่สิบ|ห้าสิบ|หกสิบ|เจ็ดสิบ|แปดสิบ|เก้าสิบ)"
, regex "(หนึ่ง|เอ็ด|สอง|สาม|สี่|ห้า|หก|เจ็ด|แปด|เก้า|สิบ)"
]
, prod = \case
(Token RegexMatch (GroupMatch (m1:_)):
Token RegexMatch (GroupMatch (m2:_)):
_) -> do
let x1 = Text.toLower m1
let x2 = Text.toLower m2
hundreds <- HashMap.lookup x1 tensMap
rest <- HashMap.lookup x2 zeroNineteenMap
integer (hundreds + rest)
_ -> Nothing
}
ruleSkipHundreds1 :: Rule
ruleSkipHundreds1 = Rule
{ name = "one eleven"
, pattern =
[ regex "(หนึ่ง|สอง|สาม|สี่|ห้า|หก|เจ็ด|แปด|เก้า)"
, regex "(สิบ|สิบเอ็ด|สิบสอง|สิบสาม|สิบสี่|สิบห้า|สิบหก|สิบเจ็ด|สิบแปด|สิบเก้า|ยี่สิบ|สามสิบ|สี่สิบ|ห้าสิบ|หกสิบ|เจ็ดสิบ|แปดสิบ|เก้าสิบ)"
]
, prod = \case
(Token RegexMatch (GroupMatch (m1:_)):
Token RegexMatch (GroupMatch (m2:_)):
_) -> do
let x1 = Text.toLower m1
let x2 = Text.toLower m2
hundreds <- HashMap.lookup x1 zeroNineteenMap
rest <- HashMap.lookup x2 zeroNineteenMap <|> HashMap.lookup x2 tensMap
integer (hundreds * 100 + rest)
_ -> Nothing
}
ruleSkipHundreds2 :: Rule
ruleSkipHundreds2 = Rule
{ name = "one twenty two"
, pattern =
[ regex "(หนึ่ง|สอง|สาม|สี่|ห้า|หก|เจ็ด|แปด|เก้า)"
, regex "(ยี่สิบ|สามสิบ|สี่สิบ|ห้าสิบ|หกสิบ|เจ็ดสิบ|แปดสิบ|เก้าสิบ)"
, regex "(หนึ่ง|สอง|สาม|สี่|ห้า|หก|เจ็ด|แปด|เก้า)"
]
, prod = \case
(Token RegexMatch (GroupMatch (m1:_)):
Token RegexMatch (GroupMatch (m2:_)):
Token RegexMatch (GroupMatch (m3:_)):
_) -> do
let x1 = Text.toLower m1
let x2 = Text.toLower m2
let x3 = Text.toLower m3
hundreds <- HashMap.lookup x1 zeroNineteenMap
tens <- HashMap.lookup x2 tensMap
rest <- HashMap.lookup x3 zeroNineteenMap
integer (hundreds * 100 + tens + rest)
_ -> Nothing
}
ruleDotSpelledOut :: Rule
ruleDotSpelledOut = Rule
{ name = "one point 2"
, pattern =
[ dimension Numeral
, regex "จุด"
, Predicate $ not . hasGrain
]
, prod = \case
(Token Numeral nd1:_:Token Numeral nd2:_) ->
double $ TNumeral.value nd1 + decimalsToDouble (TNumeral.value nd2)
_ -> Nothing
}
ruleLeadingDotSpelledOut :: Rule
ruleLeadingDotSpelledOut = Rule
{ name = "point 77"
, pattern =
[ regex "จุด"
, Predicate $ not . hasGrain
]
, prod = \case
(_:Token Numeral nd:_) -> double $ decimalsToDouble $ TNumeral.value nd
_ -> Nothing
}
ruleDecimals :: Rule
ruleDecimals = Rule
{ name = "decimal number"
, pattern =
[ regex "(\\d*\\.\\d+)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) -> parseDecimal True match
_ -> Nothing
}
ruleCommas :: Rule
ruleCommas = Rule
{ name = "comma-separated numbers"
, pattern =
[ regex "(\\d+(,\\d\\d\\d)+(\\.\\d+)?)"
]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
parseDouble (Text.replace "," Text.empty match) >>= double
_ -> Nothing
}
ruleSuffixes :: Rule
ruleSuffixes = Rule
{ name = "suffixes (กิโลกรัม,กรัม))"
, pattern =
[ dimension Numeral
, regex "(กิโลกรัม|กรัม)(?=[\\W$€¢£]|$)"
]
, prod = \case
(Token Numeral nd : Token RegexMatch (GroupMatch (match : _)):_) -> do
x <- case Text.toLower match of
"กิโลกรัม" -> Just 1e3
"กรัม" -> Just 1e1
_ -> Nothing
double $ TNumeral.value nd * x
_ -> Nothing
}
ruleNegative :: Rule
ruleNegative = Rule
{ name = "negative numbers"
, pattern =
[ regex "(-|ลบ)(?!\\s*-)"
, Predicate isPositive
]
, prod = \case
(_:Token Numeral nd:_) -> double (TNumeral.value nd * (-1))
_ -> Nothing
}
ruleSum :: Rule
ruleSum = Rule
{ name = "intersect 2 numbers"
, pattern =
[ Predicate $ and . sequence [hasGrain, isPositive]
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleSumAnd :: Rule
ruleSumAnd = Rule
{ name = "intersect 2 numbers (with and)"
, pattern =
[ Predicate $ and . sequence [hasGrain, isPositive]
, regex "และ"
, Predicate $ and . sequence [not . isMultipliable, isPositive]
]
, prod = \case
(Token Numeral NumeralData{TNumeral.value = val1, TNumeral.grain = Just g}:
_:
Token Numeral NumeralData{TNumeral.value = val2}:
_) | (10 ** fromIntegral g) > val2 -> double $ val1 + val2
_ -> Nothing
}
ruleMultiply :: Rule
ruleMultiply = Rule
{ name = "compose by multiplication"
, pattern =
[ dimension Numeral
, Predicate isMultipliable
]
, prod = \case
(token1:token2:_) -> multiply token1 token2
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleXHundredTwentyToXHundredTwentyNine
, ruleSkipHundreds1
, ruleSkipHundreds2
, ruleToNineteen
, ruleTens
, rulePowersOfTen
, ruleCompositeTens
, ruleSumTenDigits
, ruleDotSpelledOut
, ruleLeadingDotSpelledOut
, ruleDecimals
, ruleCommas
, ruleSuffixes
, ruleNegative
, ruleSum
, ruleSumAnd
, ruleMultiply
, ruleDozen
]

View File

@ -54,6 +54,7 @@ import qualified Duckling.Ranking.Classifiers.RU_XX as RU_XXClassifiers
import qualified Duckling.Ranking.Classifiers.SV_XX as SV_XXClassifiers
import qualified Duckling.Ranking.Classifiers.SW_XX as SW_XXClassifiers
import qualified Duckling.Ranking.Classifiers.TA_XX as TA_XXClassifiers
import qualified Duckling.Ranking.Classifiers.TH_XX as TH_XXClassifiers
import qualified Duckling.Ranking.Classifiers.TR_XX as TR_XXClassifiers
import qualified Duckling.Ranking.Classifiers.UK_XX as UK_XXClassifiers
import qualified Duckling.Ranking.Classifiers.VI_XX as VI_XXClassifiers
@ -101,6 +102,7 @@ classifiers (Locale RU _) = RU_XXClassifiers.classifiers
classifiers (Locale SV _) = SV_XXClassifiers.classifiers
classifiers (Locale SW _) = SW_XXClassifiers.classifiers
classifiers (Locale TA _) = TA_XXClassifiers.classifiers
classifiers (Locale TH _) = TH_XXClassifiers.classifiers
classifiers (Locale TR _) = TR_XXClassifiers.classifiers
classifiers (Locale UK _) = UK_XXClassifiers.classifiers
classifiers (Locale VI _) = VI_XXClassifiers.classifiers

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.TH_XX (classifiers) where
import Data.String
import Prelude
import qualified Data.HashMap.Strict as HashMap
import Duckling.Ranking.Types
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -61,6 +61,7 @@ import qualified Duckling.Rules.RU as RURules
import qualified Duckling.Rules.SV as SVRules
import qualified Duckling.Rules.SW as SWRules
import qualified Duckling.Rules.TA as TARules
import qualified Duckling.Rules.TH as THRules
import qualified Duckling.Rules.TR as TRRules
import qualified Duckling.Rules.UK as UKRules
import qualified Duckling.Rules.VI as VIRules
@ -126,6 +127,7 @@ defaultRules RU = RURules.defaultRules
defaultRules SV = SVRules.defaultRules
defaultRules SW = SWRules.defaultRules
defaultRules TA = TARules.defaultRules
defaultRules TH = THRules.defaultRules
defaultRules TR = TRRules.defaultRules
defaultRules UK = UKRules.defaultRules
defaultRules VI = VIRules.defaultRules
@ -171,6 +173,7 @@ localeRules RU = RURules.localeRules
localeRules SV = SVRules.localeRules
localeRules SW = SWRules.localeRules
localeRules TA = TARules.localeRules
localeRules TH = THRules.localeRules
localeRules TR = TRRules.localeRules
localeRules UK = UKRules.localeRules
localeRules VI = VIRules.localeRules
@ -216,6 +219,7 @@ langRules RU = RURules.langRules
langRules SV = SVRules.langRules
langRules SW = SWRules.langRules
langRules TA = TARules.langRules
langRules TH = THRules.langRules
langRules TR = TRRules.langRules
langRules UK = UKRules.langRules
langRules VI = VIRules.langRules

45
Duckling/Rules/TH.hs Normal file
View File

@ -0,0 +1,45 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
{-# LANGUAGE GADTs #-}
module Duckling.Rules.TH
( defaultRules
, langRules
, localeRules
) where
import Duckling.Dimensions.Types
import Duckling.Locale
import Duckling.Types
import qualified Duckling.Numeral.TH.Rules as Numeral
defaultRules :: Some Dimension -> [Rule]
defaultRules = langRules
localeRules :: Region -> Some Dimension -> [Rule]
localeRules region (This (CustomDimension dim)) = dimLocaleRules region dim
localeRules _ _ = []
langRules :: Some Dimension -> [Rule]
langRules (This AmountOfMoney) = []
langRules (This CreditCardNumber) = []
langRules (This Distance) = []
langRules (This Duration) = []
langRules (This Email) = []
langRules (This Numeral) = Numeral.rules
langRules (This Ordinal) = []
langRules (This PhoneNumber) = []
langRules (This Quantity) = []
langRules (This RegexMatch) = []
langRules (This Temperature) = []
langRules (This Time) = []
langRules (This TimeGrain) = []
langRules (This Url) = []
langRules (This Volume) = []
langRules (This (CustomDimension dim)) = dimLangRules TH dim

View File

@ -12,7 +12,9 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NoRebindableSyntax #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StandaloneDeriving #-}
@ -26,15 +28,19 @@ import Data.Aeson
import Data.GADT.Compare
import Data.GADT.Show
import Data.Hashable
import Data.HashMap.Strict (HashMap)
import Data.HashSet (HashSet)
import Data.List (intersperse, sortOn)
import Data.Maybe
import Data.Some
import Data.Text (Text)
import Data.Text (Text, toLower, unpack)
import Data.Typeable ((:~:)(Refl), eqT, Typeable)
import GHC.Generics
import Prelude
import TextShow (TextShow(..))
import qualified Data.ByteString.Lazy as LB
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as TT
import qualified Data.Text.Encoding as Text
import qualified Text.Regex.Base as R
import qualified Text.Regex.PCRE as PCRE
@ -50,7 +56,7 @@ import Duckling.Numeral.Types (NumeralData)
import Duckling.Ordinal.Types (OrdinalData)
import Duckling.PhoneNumber.Types (PhoneNumberData)
import Duckling.Quantity.Types (QuantityData)
import Duckling.Regex.Types (GroupMatch)
import Duckling.Regex.Types
import Duckling.Resolve
import Duckling.Temperature.Types (TemperatureData)
import Duckling.Time.Types (TimeData)
@ -314,3 +320,21 @@ regex = Regex . R.makeRegexOpts compOpts execOpts
dimension :: Typeable a => Dimension a -> PatternItem
dimension value = Predicate $ isDimension value
-- -----------------------------------------------------------------
-- Rule Construction helpers
singleStringLookupRule :: HashMap Text a -> Text -> (a -> Maybe Token) -> Rule
singleStringLookupRule hashMap name production = Rule
{ name = name
, pattern = [ regex $ unpack regexString ]
, prod = \case
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (toLower match) hashMap >>= production
_ -> Nothing
}
where
regexString =
"(" <> mconcat
(intersperse "|" $ sortOn (negate . TT.length) $ HashMap.keys hashMap)
<> ")"

BIN
dist-newstyle/cache/config vendored Normal file

Binary file not shown.

View File

@ -87,6 +87,7 @@ library
, Duckling.Rules.SV
, Duckling.Rules.SW
, Duckling.Rules.TA
, Duckling.Rules.TH
, Duckling.Rules.TR
, Duckling.Rules.UK
, Duckling.Rules.VI
@ -142,6 +143,7 @@ library
, Duckling.Ranking.Classifiers.SV_XX
, Duckling.Ranking.Classifiers.SW_XX
, Duckling.Ranking.Classifiers.TA_XX
, Duckling.Ranking.Classifiers.TH_XX
, Duckling.Ranking.Classifiers.TR_XX
, Duckling.Ranking.Classifiers.UK_XX
, Duckling.Ranking.Classifiers.VI_XX
@ -195,6 +197,7 @@ library
, Duckling.Dimensions.SV
, Duckling.Dimensions.SW
, Duckling.Dimensions.TA
, Duckling.Dimensions.TH
, Duckling.Dimensions.TR
, Duckling.Dimensions.UK
, Duckling.Dimensions.VI
@ -461,6 +464,8 @@ library
, Duckling.Numeral.SW.Rules
, Duckling.Numeral.TA.Corpus
, Duckling.Numeral.TA.Rules
, Duckling.Numeral.TH.Corpus
, Duckling.Numeral.TH.Rules
, Duckling.Numeral.TR.Corpus
, Duckling.Numeral.TR.Rules
, Duckling.Numeral.UK.Corpus
@ -955,6 +960,7 @@ test-suite duckling-test
, Duckling.Numeral.SV.Tests
, Duckling.Numeral.SW.Tests
, Duckling.Numeral.TA.Tests
, Duckling.Numeral.TH.Tests
, Duckling.Numeral.TR.Tests
, Duckling.Numeral.UK.Tests
, Duckling.Numeral.VI.Tests

View File

@ -208,6 +208,7 @@ getCorpusForLang RU = (testContext, testOptions, [])
getCorpusForLang SV = SVTime.corpus
getCorpusForLang SW = (testContext, testOptions, [])
getCorpusForLang TA = (testContext, testOptions, [])
getCorpusForLang TH = (testContext, testOptions, [])
getCorpusForLang TR = (testContext, testOptions, [])
getCorpusForLang UK = UKTime.corpus
getCorpusForLang VI = VITime.corpus

View File

@ -0,0 +1,21 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
module Duckling.Numeral.TH.Tests
( tests ) where
import Data.String
import Prelude
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.TH.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "TH Tests"
[ makeCorpusTest [This Numeral] corpus
]

View File

@ -50,6 +50,7 @@ import qualified Duckling.Numeral.RU.Tests as RU
import qualified Duckling.Numeral.SV.Tests as SV
import qualified Duckling.Numeral.SW.Tests as SW
import qualified Duckling.Numeral.TA.Tests as TA
import qualified Duckling.Numeral.TH.Tests as TH
import qualified Duckling.Numeral.TR.Tests as TR
import qualified Duckling.Numeral.UK.Tests as UK
import qualified Duckling.Numeral.VI.Tests as VI
@ -96,6 +97,7 @@ tests = testGroup "Numeral Tests"
, SV.tests
, SW.tests
, TA.tests
, TH.tests
, TR.tests
, UK.tests
, VI.tests