Numeral: common rule + supporting hindu-arabic numerals for Burmese

Summary:
* `ruleIntegerNumeric` was used in all languages but Burmese.
* it seems like the hindu-arabic numerals are slowly getting in Burmese (e.g. recent car plates)
* Moving the rule in `Duckling/Numeral/Common.hs`

Reviewed By: blandinw

Differential Revision: D6498349

fbshipit-source-id: e868dc9960f18f0781e4aa98a0dfcd14969537c9
This commit is contained in:
Julien Odent 2017-12-06 15:52:15 -08:00 committed by Facebook Github Bot
parent 56b57df153
commit 6df3b26707
39 changed files with 435 additions and 685 deletions

View File

@ -14,8 +14,9 @@ import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Email
, This AmountOfMoney
[ This AmountOfMoney
, This Email
, This Numeral
, This PhoneNumber
, This Url
]

View File

@ -59,19 +59,6 @@ ruleInteger18 = Rule
, prod = \_ -> integer 12
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleInteger19 :: Rule
ruleInteger19 = Rule
{ name = "integer (20..90)"
@ -363,7 +350,6 @@ rules =
, ruleInteger5
, ruleInteger7
, ruleInteger9
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleMultiply
, ruleNumeralDotNumeral

View File

@ -29,19 +29,6 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegers :: Rule
ruleIntegers = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
zeroNineteenMap :: HashMap Text Integer
zeroNineteenMap = HashMap.fromList
[ ( "нула", 0 )
@ -258,8 +245,7 @@ ruleNegative = Rule
rules :: [Rule]
rules =
[ ruleIntegers
, ruleToNineteen
[ ruleToNineteen
, ruleTens
, rulePowersOfTen
, ruleCompositeTens

View File

@ -28,20 +28,6 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleNumeralMap :: HashMap Text Integer
ruleNumeralMap = HashMap.fromList
[ ( "nula", 0 )
@ -74,6 +60,5 @@ ruleNumeral = Rule
rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleNumeral
[ ruleNumeral
]

View File

@ -41,19 +41,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
@ -314,7 +301,6 @@ rules =
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleMultiply

View File

@ -42,19 +42,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
@ -338,7 +325,6 @@ rules =
, ruleFew
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleMultiply

View File

@ -30,20 +30,6 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
oneOrTwoDigitsMap :: HashMap Text Integer
oneOrTwoDigitsMap = HashMap.fromList
[ ( "μηδέν" , 0 )
@ -267,7 +253,6 @@ ruleDots = Rule
rules :: [Rule]
rules =
[ ruleFew
, ruleIntegerNumeric
, ruleNumeral
, ruleCompositeTens
, rulePowersOfTen

View File

@ -30,18 +30,6 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegers :: Rule
ruleIntegers = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
toInteger <$> parseInt match >>= integer
_ -> Nothing
}
ruleDozen :: Rule
ruleDozen = Rule
{ name = "a dozen of"
@ -320,8 +308,7 @@ ruleMultiply = Rule
rules :: [Rule]
rules =
[ ruleIntegers
, ruleToNineteen
[ ruleToNineteen
, ruleTens
, rulePowersOfTen
, ruleCompositeTens

View File

@ -39,19 +39,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
@ -299,7 +286,6 @@ rules :: [Rule]
rules =
[ ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeral
, ruleNumeral2

View File

@ -42,20 +42,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleACoupleOf :: Rule
ruleACoupleOf = Rule
{ name = "a couple of"
@ -293,7 +279,6 @@ rules =
, ruletwentyNinety
, ruleInteger3
, ruleInteger4
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleMultiply

View File

@ -52,20 +52,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleNumerals2 :: Rule
ruleNumerals2 = Rule
{ name = "numbers 22..29 32..39 .. 52..59"
@ -310,7 +296,6 @@ rules :: [Rule]
rules =
[ ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeral
, ruleNumeral2

View File

@ -42,19 +42,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
oneToTenMap :: HashMap Text Integer
oneToTenMap = HashMap.fromList
[ ("aon", 1)
@ -256,7 +243,6 @@ rules =
, ruleDag
, ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumerals
, ruleNumerals2

View File

@ -106,19 +106,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleInteger10 :: Rule
ruleInteger10 = Rule
{ name = "integer 9"
@ -379,7 +366,6 @@ rules =
, ruleInteger7
, ruleInteger8
, ruleInteger9
, ruleIntegerNumeric
, ruleIntersectNumerals
, ruleIntersectWithAnd
, ruleMultiply

View File

@ -37,19 +37,6 @@ ruleNumbersPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
@ -367,7 +354,7 @@ rules =
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleInteger4 , ruleIntegerNumeric
, ruleInteger4
, ruleIntegerWithThousandsSeparator
, ruleMultiply
, ruleNumberDotNumber

View File

@ -29,20 +29,6 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleNumeralMap :: HashMap Text Integer
ruleNumeralMap = HashMap.fromList
[ ( "nulla", 0 )
@ -161,8 +147,7 @@ ruleCompositeTens = Rule
rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleNumeral
[ ruleNumeral
, ruleElevenToNineteen
, ruleTwentyoneToTwentynine
, ruleTens

View File

@ -82,19 +82,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleTen :: Rule
ruleTen = Rule
{ name = "ten"
@ -266,7 +253,6 @@ rules =
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleMultiply

View File

@ -39,19 +39,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
@ -329,7 +316,6 @@ rules :: [Rule]
rules =
[ ruleDecimalNumeral
, ruleDecimalWithThousandsSeparator
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeral
, ruleNumeral2

View File

@ -48,19 +48,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleInteger10 :: Rule
ruleInteger10 = Rule
{ name = "integer (1000..1999)"
@ -348,7 +335,6 @@ rules =
, ruleInteger7
, ruleInteger8
, ruleInteger9
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeral
, ruleNumeralsPrefixWithNegativeOrMinus

View File

@ -28,20 +28,6 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleNumeralMap :: HashMap Text Integer
ruleNumeralMap = HashMap.fromList
[ ( "ნული", 0 )
@ -71,6 +57,5 @@ ruleNumeral = Rule
rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleNumeral
[ ruleNumeral
]

View File

@ -42,19 +42,6 @@ ruleIntegerForOrdinals = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few 몇"
@ -321,7 +308,6 @@ rules =
, ruleHalf
, ruleInteger
, ruleIntegerForOrdinals
, ruleIntegerNumeric
, ruleIntegerType1
, ruleIntegerType1PowersOfTen
, ruleSum

View File

@ -43,6 +43,7 @@ allExamples = concat
[ ""
, "သုံး"
, "တတိယ"
, "3"
]
, examples (NumeralValue 30)
[ "သုံးဆယ်"

View File

@ -39,8 +39,8 @@ ruleInteger5 = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
ruleInteger09 :: Rule
ruleInteger09 = Rule
{ name = "integer (0..9) - numeric"
, pattern =
[ regex "(|၁|၂|၃|၄|၅|၆|၇|၈|၉)"
@ -177,6 +177,6 @@ rules =
, ruleInteger6
, ruleInteger7
, ruleInteger8
, ruleIntegerNumeric
, ruleInteger09
, ruleIntegerPali
]

View File

@ -56,19 +56,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
@ -311,7 +298,6 @@ rules =
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleIntersectWithAnd

View File

@ -41,19 +41,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
@ -288,7 +275,6 @@ rules =
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
, ruleIntersect
, ruleMultiply
, ruleNumeralsEn

View File

@ -98,19 +98,6 @@ ruleOne = Rule
, prod = \_ -> integer 1
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleTen :: Rule
ruleTen = Rule
{ name = "ten"
@ -574,7 +561,6 @@ rules =
, ruleFourteen
, ruleFourty
, ruleInteger2
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleIntersectWithAnd

View File

@ -134,6 +134,7 @@ allExamples = concat
, "100000"
, "100K"
, "100k"
, "100.000,00"
]
, examples (NumeralValue 100)
[ "100"
@ -188,4 +189,7 @@ allExamples = concat
, "um ponto cinco"
, "1,5"
]
, examples (NumeralValue 1200000.42)
[ "1.200.000,42"
]
]

View File

@ -30,18 +30,6 @@ import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegers :: Rule
ruleIntegers = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
toInteger <$> parseInt match >>= integer
_ -> Nothing
}
ruleDozen :: Rule
ruleDozen = Rule
{ name = "a dozen of"
@ -392,8 +380,7 @@ ruleMultiply = Rule
rules :: [Rule]
rules =
[ ruleIntegers
, ruleToNineteen
[ ruleToNineteen
, ruleTens
, ruleCent
, rulePowersOfTen

View File

@ -40,19 +40,6 @@ ruleNumeralsPrefixWithOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo :: Rule
ruleSpecialCompositionForMissingHundredsLikeInOneTwentyTwo = Rule
{ name = "special composition for missing hundreds like in one twenty two"
@ -304,7 +291,6 @@ rules =
, ruleInteger2
, ruleInteger3
, ruleIntegerCuSeparatorDeMiiDot
, ruleIntegerNumeric
, ruleIntersect
, ruleIntersectCuI
, ruleMultiply

View File

@ -52,19 +52,6 @@ ruleInteger5 = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- toInteger <$> parseInt match
integer v
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
@ -295,7 +282,6 @@ rules =
, ruleInteger8
, ruleIntegerAndAHalf
, ruleDecimalOneAndAHalf
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeralDotNumeral
, ruleNumeralsPrefixWithMinus

39
Duckling/Numeral/Rules.hs Normal file
View File

@ -0,0 +1,39 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Numeral.Rules
( rules
) where
import Prelude
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Regex.Types
import Duckling.Types
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
toInteger <$> parseInt match >>= integer
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleIntegerNumeric
]

View File

@ -58,20 +58,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleFew :: Rule
ruleFew = Rule
{ name = "few"
@ -309,7 +295,6 @@ rules =
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleIntersectWithAnd

View File

@ -67,19 +67,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleACoupleOf :: Rule
ruleACoupleOf = Rule
{ name = "a couple (of)"
@ -719,7 +706,6 @@ rules =
, ruleInteger7
, ruleInteger8
, ruleInteger9
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleNumeralDotNumeral

View File

@ -52,19 +52,6 @@ ruleInteger5 = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
@ -271,7 +258,6 @@ rules =
, ruleInteger6
, ruleInteger7
, ruleInteger8
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeralDotNumeral
, ruleNumeralsPrefixWithMinus

View File

@ -68,19 +68,6 @@ ruleIntersectWithAnd = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleNumeralsPrefixWithM :: Rule
ruleNumeralsPrefixWithM = Rule
{ name = "numbers prefix with -, âm"
@ -335,7 +322,6 @@ rules =
, ruleInteger
, ruleInteger2
, ruleInteger3
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleIntersect
, ruleIntersectWithAnd

View File

@ -69,19 +69,6 @@ ruleNumeralsPrefixWithNegativeOrMinus = Rule
_ -> Nothing
}
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleDecimalWithThousandsSeparator :: Rule
ruleDecimalWithThousandsSeparator = Rule
{ name = "decimal with thousands separator"
@ -194,7 +181,6 @@ rules =
, ruleInteger3
, ruleInteger4
, ruleInteger5
, ruleIntegerNumeric
, ruleIntegerWithThousandsSeparator
, ruleNumeral
, ruleNumeralsPrefixWithNegativeOrMinus

View File

@ -52,6 +52,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("S\225bada",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("<dim time> da tarde",
Classifier{okData =
ClassData{prior = -0.24686007793152578, unseen = -4.0943445622221,
@ -117,14 +124,21 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("<named-month|named-day> past",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
ClassData{prior = 0.0, unseen = -2.3025850929940455,
likelihoods =
HashMap.fromList
[("day", -0.6931471805599453),
("named-day", -0.6931471805599453)],
[("Ter\231a-feira", -1.5040773967762742),
("Domingo", -1.0986122886681098), ("day", -0.8109302162163288)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("Abril",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [("", 0.0)], n = 2},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("proximo <cycle> ",
Classifier{okData =
@ -157,86 +171,109 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Julho",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("intersect by `da` or `de`",
Classifier{okData =
ClassData{prior = -1.8325814637483102, unseen = -4.574710978503383,
ClassData{prior = -1.8325814637483102, unseen = -4.736198448394496,
likelihoods =
HashMap.fromList
[("day of month (1st)named-month", -3.1780538303479458),
("daymonth", -2.772588722239781),
[("daymonth", -2.9356283494842854),
("<day-of-month> de <named-month>two time tokens separated by \",\"2",
-3.871201010907891),
-4.034240638152395),
("<day-of-month> de <named-month>intersect",
-3.871201010907891),
-4.034240638152395),
("<day-of-month> de <named-month>two time tokens separated by \",\"",
-3.871201010907891),
("dayday", -3.1780538303479458),
("dia <day-of-month> (non ordinal)named-month",
-3.4657359027997265),
("dayyear", -1.9252908618525775),
("named-dayproximo <cycle> ", -3.871201010907891),
("dd-dd <month>(interval)year", -3.871201010907891),
("named-day<cycle> (que vem)", -3.871201010907891),
("two time tokens separated by \",\"2year",
-3.4657359027997265),
("intersectyear", -3.871201010907891),
("<day-of-month> de <named-month>year", -2.4849066497880004),
("two time tokens separated by \",\"year", -3.4657359027997265),
("dayweek", -3.1780538303479458),
("named-day<cycle> passado", -3.871201010907891)],
-4.034240638152395),
("dayday", -3.34109345759245),
("Quart-feira<cycle> (que vem)", -4.034240638152395),
("dayyear", -2.088330489097082),
("dd-dd <month>(interval)year", -4.034240638152395),
("day of month (1st)Mar\231o", -3.34109345759245),
("two time tokens separated by \",\"2year", -3.628775530044231),
("intersectyear", -4.034240638152395),
("<day-of-month> de <named-month>year", -2.6479462770325046),
("dia <day-of-month> (non ordinal)Fevereiro",
-3.628775530044231),
("two time tokens separated by \",\"year", -3.628775530044231),
("Domingo<cycle> passado", -4.034240638152395),
("dayweek", -3.34109345759245),
("Quart-feiraproximo <cycle> ", -4.034240638152395)],
n = 24},
koData =
ClassData{prior = -0.1743533871447778, unseen = -5.707110264748875,
ClassData{prior = -0.1743533871447778, unseen = -5.762051382780177,
likelihoods =
HashMap.fromList
[("time-of-day (latent)<datetime> - <datetime> (interval)",
-4.605170185988091),
("hourday", -2.870569130599985),
("monthday", -4.31748811353631),
("monthyear", -3.506557897319982),
("yearhour", -3.624340932976365),
("houryear", -3.0647251450409425),
-4.660289485209171),
("hourday", -2.9256884298210646),
("year (latent)Abril", -4.660289485209171),
("year (latent)Julho", -3.967142304649226),
("time-of-day (latent)Fevereiro", -3.8129916248219673),
("monthday", -4.37260741275739),
("monthyear", -3.561677196541061),
("yearhour", -3.6794602321974446),
("houryear", -3.119844444262022),
("year (latent)Maio", -4.149463861443181),
("time-of-day (latent)two time tokens separated by \",\"2",
-4.605170185988091),
("time-of-day (latent)intersect", -3.0647251450409425),
-4.660289485209171),
("Setembroyear", -3.967142304649226),
("time-of-day (latent)Setembro", -3.967142304649226),
("time-of-day (latent)intersect", -3.119844444262022),
("year (latent)<time-of-day> <part-of-day>",
-5.0106352940962555),
("year (latent)intersect", -4.31748811353631),
-5.065754593317335),
("\224s <time-of-day>Julho", -5.065754593317335),
("year (latent)intersect", -4.37260741275739),
("Setembrointersect", -5.065754593317335),
("intersect by `da` or `de`two time tokens separated by \",\"",
-5.0106352940962555),
-5.065754593317335),
("year (latent)amanh\227 pela <part-of-day>",
-5.0106352940962555),
("hourmonth", -1.8325814637483102),
-5.065754593317335),
("hourmonth", -1.8877007629693896),
("time-of-day (latent)intersect by `da` or `de`",
-3.2188758248682006),
("monthmonth", -5.0106352940962555),
("dayyear", -5.0106352940962555),
("year (latent)named-month", -2.3025850929940455),
("named-monthyear", -3.506557897319982),
("year (latent)<dim time> da manha", -5.0106352940962555),
("de <datetime> - <datetime> (interval)named-month",
-5.0106352940962555),
("\224s <time-of-day>named-month", -4.605170185988091),
("yearmonth", -2.3025850929940455),
("two time tokens separated by \",\"2year", -4.605170185988091),
("intersect by `da` or `de`intersect", -5.0106352940962555),
("intersectyear", -5.0106352940962555),
-3.2739951240892804),
("monthmonth", -5.065754593317335),
("Setembrotwo time tokens separated by \",\"2",
-5.065754593317335),
("year (latent)Fevereiro", -3.8129916248219673),
("time-of-day (latent)Dezembro", -5.065754593317335),
("dayyear", -5.065754593317335),
("time-of-day (latent)Julho", -4.149463861443181),
("time-of-day (latent)Abril", -4.660289485209171),
("de <datetime> - <datetime> (interval)Janeiro",
-5.065754593317335),
("year (latent)<dim time> da manha", -5.065754593317335),
("time-of-day (latent)Mar\231o", -3.967142304649226),
("year (latent)Janeiro", -5.065754593317335),
("year (latent)Dezembro", -5.065754593317335),
("yearmonth", -2.3577043922151253),
("two time tokens separated by \",\"2year", -4.660289485209171),
("Setembrotwo time tokens separated by \",\"",
-5.065754593317335),
("intersect by `da` or `de`intersect", -5.065754593317335),
("intersectyear", -5.065754593317335),
("intersect by `da` or `de`two time tokens separated by \",\"2",
-5.0106352940962555),
("<day-of-month> de <named-month>year", -5.0106352940962555),
("two time tokens separated by \",\"year", -4.605170185988091),
-5.065754593317335),
("time-of-day (latent)Maio", -4.149463861443181),
("<day-of-month> de <named-month>year", -5.065754593317335),
("Maioyear", -4.660289485209171),
("year (latent)Setembro", -3.967142304649226),
("\224s <time-of-day>Janeiro", -5.065754593317335),
("two time tokens separated by \",\"year", -4.660289485209171),
("time-of-day (latent)two time tokens separated by \",\"",
-4.605170185988091),
("intersect by `da` or `de`year", -3.506557897319982),
("named-monthtwo time tokens separated by \",\"2",
-5.0106352940962555),
-4.660289485209171),
("intersect by `da` or `de`year", -3.561677196541061),
("year (latent)<datetime> - <datetime> (interval)",
-4.605170185988091),
("yearday", -5.0106352940962555),
("named-monthintersect", -5.0106352940962555),
("named-monthtwo time tokens separated by \",\"",
-5.0106352940962555),
("time-of-day (latent)named-month", -2.336486644669727)],
-4.660289485209171),
("yearday", -5.065754593317335),
("time-of-day (latent)Janeiro", -5.065754593317335),
("Julhoyear", -5.065754593317335),
("year (latent)Mar\231o", -3.967142304649226)],
n = 126}}),
("<hour-of-day> and half",
Classifier{okData =
@ -259,34 +296,44 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("two time tokens separated by \",\"",
Classifier{okData =
ClassData{prior = -1.0360919316867756, unseen = -3.58351893845611,
ClassData{prior = -1.0360919316867756,
unseen = -3.6375861597263857,
likelihoods =
HashMap.fromList
[("intersectnamed-day", -2.8622008809294686),
("intersect by `da` or `de`named-day", -2.8622008809294686),
("dayday", -1.252762968495368),
("de <year>named-day", -2.8622008809294686),
("named-dayintersect", -2.456735772821304),
("named-dayintersect by `da` or `de`", -2.456735772821304),
("yearnamed-day", -2.8622008809294686),
("named-day<day-of-month> de <named-month>",
-2.169053700369523),
("yearday", -2.456735772821304)],
[("yearSexta-feira", -2.917770732084279),
("dayday", -1.3083328196501787),
("de <year>Sexta-feira", -2.917770732084279),
("Sexta-feiraintersect", -2.512305623976115),
("intersectSexta-feira", -2.917770732084279),
("Sexta-feira<day-of-month> de <named-month>",
-2.512305623976115),
("Sexta-feiraintersect by `da` or `de`", -2.512305623976115),
("intersect by `da` or `de`Sexta-feira", -2.917770732084279),
("yearday", -2.512305623976115),
("Segunda-feira<day-of-month> de <named-month>",
-2.917770732084279)],
n = 11},
koData =
ClassData{prior = -0.4382549309311553,
unseen = -3.9889840465642745,
ClassData{prior = -0.4382549309311553, unseen = -4.02535169073515,
likelihoods =
HashMap.fromList
[("hourday", -2.3608540011180215),
("dayhour", -1.4853852637641216),
("daymonth", -2.583997552432231),
("monthday", -2.871679624884012),
("intersectnamed-day", -2.871679624884012),
("intersect by `da` or `de`named-day", -2.3608540011180215),
("named-dayintersect", -2.871679624884012),
("named-dayintersect by `da` or `de`", -1.405342556090585)],
[("hourday", -2.3978952727983707),
("dayhour", -1.5224265354444706),
("daymonth", -2.6210388241125804),
("monthday", -2.908720896564361),
("Segunda-feiraintersect by `da` or `de`", -2.908720896564361),
("Sexta-feiraintersect", -2.908720896564361),
("intersectSexta-feira", -2.908720896564361),
("Sexta-feiraintersect by `da` or `de`", -1.6094379124341003),
("intersect by `da` or `de`Sexta-feira", -2.3978952727983707)],
n = 20}}),
("Sexta-feira",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004,
likelihoods = HashMap.fromList [("", 0.0)], n = 10},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("integer (0..19)",
Classifier{okData =
ClassData{prior = -1.9048194970694474e-2,
@ -305,19 +352,29 @@ classifiers
("<day-of-month> de <named-month>",
Classifier{okData =
ClassData{prior = -7.410797215372185e-2,
unseen = -4.02535169073515,
unseen = -4.189654742026425,
likelihoods =
HashMap.fromList
[("integer (0..19)named-month", -1.927891643552635),
("integer (numeric)named-month", -1.0116009116784799),
("month", -0.7114963192281418)],
[("integer (numeric)Fevereiro", -2.228477120840324),
("integer (numeric)Mar\231o", -3.0757749812275272),
("integer (0..19)Maio", -3.0757749812275272),
("integer (numeric)Abril", -3.481240089335692),
("integer (numeric)Julho", -3.481240089335692),
("integer (numeric)Dezembro", -3.481240089335692),
("integer (numeric)Janeiro", -3.481240089335692),
("month", -0.878550403891308),
("integer (0..19)Julho", -3.481240089335692),
("integer (0..19)Abril", -3.481240089335692),
("integer (numeric)Maio", -3.0757749812275272),
("integer (0..19)Mar\231o", -2.7880929087757464),
("integer (numeric)Setembro", -2.382627800667582)],
n = 26},
koData =
ClassData{prior = -2.639057329615259, unseen = -2.0794415416798357,
ClassData{prior = -2.639057329615259, unseen = -2.890371757896165,
likelihoods =
HashMap.fromList
[("integer (numeric)named-month", -0.8472978603872037),
("month", -0.8472978603872037)],
[("integer (numeric)Julho", -1.7346010553881064),
("month", -1.7346010553881064)],
n = 2}}),
("<time-of-day> <part-of-day>",
Classifier{okData =
@ -352,7 +409,7 @@ classifiers
("hourhour", -2.6026896854443837),
("time-of-day (latent)morning", -3.295836866004329),
("year (latent)afternoon", -2.1972245773362196),
("named-monthmorning", -3.295836866004329),
("Fevereiromorning", -3.295836866004329),
("year (latent)evening", -2.890371757896165),
("time-of-day (latent)afternoon", -2.890371757896165),
("year (latent)morning", -2.379546134130174),
@ -370,12 +427,19 @@ classifiers
("monthyear", -2.1972245773362196),
("monthhour", -1.791759469228055),
("monthmonth", -2.1972245773362196),
("named-monthyear (latent)", -2.1972245773362196),
("named-monthtime-of-day (latent)", -2.1972245773362196),
("named-month<day-of-month> de <named-month>",
("Dezembrotime-of-day (latent)", -2.1972245773362196),
("Dezembro<day-of-month> de <named-month>",
-2.1972245773362196),
("named-monthintersect by `da` or `de`", -1.791759469228055)],
("Dezembroyear (latent)", -2.1972245773362196),
("Dezembrointersect by `da` or `de`", -1.791759469228055)],
n = 5}}),
("Maio",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.791759469228055,
likelihoods = HashMap.fromList [("", 0.0)], n = 4},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("number (21..29 31..39 .. 91..99)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -407,119 +471,121 @@ classifiers
n = 7}}),
("intersect",
Classifier{okData =
ClassData{prior = -0.702716631576096, unseen = -5.1298987149230735,
ClassData{prior = -0.702716631576096, unseen = -5.14166355650266,
likelihoods =
HashMap.fromList
[("<day-of-month> de <named-month>in the <part-of-day>",
-4.02535169073515),
("dayhour", -2.3513752571634776),
-4.037186148382152),
("dayhour", -2.3632097148104805),
("<day-of-month> de <named-month>two time tokens separated by \",\"2",
-4.430816798843313),
-4.442651256490317),
("nowquinze para as <hour-of-day> (as relative minutes)",
-4.430816798843313),
("intersectnamed-day", -4.430816798843313),
("now\224s <time-of-day>", -4.02535169073515),
("<day-of-month> de <named-month>intersect",
-4.430816798843313),
("now<hour-of-day> and 3/4", -4.430816798843313),
("intersect by `da` or `de`named-day", -4.430816798843313),
("<day-of-month> de <named-month>two time tokens separated by \",\"",
-4.430816798843313),
("dayday", -2.9267394020670396),
("hourhour", -4.430816798843313),
("named-day\224s <time-of-day>", -4.430816798843313),
("dayyear", -2.4159137783010487),
("minutehour", -3.1780538303479458),
("<hour-of-day> and quinzein the <part-of-day>",
-4.02535169073515),
("de <year>named-day", -4.430816798843313),
("intersect by `da` or `de`in the <part-of-day>",
-4.02535169073515),
("now<hour-of-day> and <relative minutes>", -4.430816798843313),
("named-dayin the <part-of-day>", -4.430816798843313),
("tomorrow<time-of-day> horas", -4.02535169073515),
("now<integer> para as <hour-of-day> (as relative minutes)",
-4.430816798843313),
("\224s <time-of-day>in the <part-of-day>",
-3.7376696182833684),
("named-dayintersect", -4.430816798843313),
("named-dayamanh\227 pela <part-of-day>", -4.430816798843313),
("dayminute", -3.1780538303479458),
("named-dayintersect by `da` or `de`", -4.430816798843313),
("yearnamed-day", -4.430816798843313),
("dd-dd <month>(interval)de <year>", -4.430816798843313),
("named-day<day-of-month> de <named-month>",
-4.430816798843313),
("named-day<time-of-day> <part-of-day>", -4.430816798843313),
("dia <day-of-month> de <named-month>in the <part-of-day>",
-4.02535169073515),
("yearday", -4.02535169073515),
("named-day<dim time> da manha", -4.430816798843313),
("two time tokens separated by \",\"de <year>",
-4.02535169073515),
("dd[/-]mmyear", -4.430816798843313),
("<day-of-month> de <named-month>de <year>",
-3.044522437723423),
("tomorrowdepois das <time-of-day>", -4.02535169073515),
("<hour-of-day> and <relative minutes>in the <part-of-day>",
-4.02535169073515),
("two time tokens separated by \",\"2de <year>",
-4.02535169073515),
("intersectde <year>", -4.430816798843313)],
n = 52},
koData =
ClassData{prior = -0.6836684366054016, unseen = -5.14166355650266,
likelihoods =
HashMap.fromList
[("hourday", -2.9385738597140425),
("dayhour", -3.056356895370426),
("daymonth", -4.442651256490317),
("monthday", -3.3440389678222067),
("named-month\224s <time-of-day>", -4.442651256490317),
("monthyear", -2.9385738597140425),
("intersectnamed-day", -4.037186148382152),
-4.442651256490317),
("Quart-feiraamanh\227 pela <part-of-day>", -4.442651256490317),
("now\224s <time-of-day>", -4.037186148382152),
("houryear", -2.496741107435003),
("<time-of-day> am|pm<day-of-month> de <named-month>",
("<day-of-month> de <named-month>intersect",
-4.442651256490317),
("intersect by `da` or `de`named-day", -3.5263605246161616),
("monthhour", -3.1898882879949486),
("intersect by `da` or `de`two time tokens separated by \",\"",
-4.442651256490317),
("hourmonth", -4.442651256490317),
("<time-of-day> am|pmintersect by `da` or `de`",
-4.442651256490317),
("dayyear", -4.442651256490317),
("intersect by `da` or `de`\224s <time-of-day>",
("now<hour-of-day> and 3/4", -4.442651256490317),
("<day-of-month> de <named-month>two time tokens separated by \",\"",
-4.442651256490317),
("Segunda-feirain the <part-of-day>", -4.442651256490317),
("Quart-feira<time-of-day> <part-of-day>", -4.442651256490317),
("yearSexta-feira", -4.442651256490317),
("dayday", -2.9385738597140425),
("hourhour", -4.442651256490317),
("dayyear", -2.4277482359480516),
("de <year>Sexta-feira", -4.442651256490317),
("minutehour", -3.1898882879949486),
("<hour-of-day> and quinzein the <part-of-day>",
-4.037186148382152),
("intersect by `da` or `de`in the <part-of-day>",
-4.037186148382152),
("now<hour-of-day> and <relative minutes>", -4.442651256490317),
("daysecond", -4.442651256490317),
("named-dayright now", -4.442651256490317),
("named-dayintersect", -4.442651256490317),
("dayminute", -4.037186148382152),
("named-monthde <year>", -2.9385738597140425),
("intersect by `da` or `de`intersect", -4.442651256490317),
("intersect by `da` or `de`two time tokens separated by \",\"2",
("tomorrow<time-of-day> horas", -4.037186148382152),
("now<integer> para as <hour-of-day> (as relative minutes)",
-4.442651256490317),
("named-dayintersect by `da` or `de`", -3.3440389678222067),
("named-monthin the <part-of-day>", -4.037186148382152),
("named-monthtwo time tokens separated by \",\"2",
("\224s <time-of-day>in the <part-of-day>",
-3.7495040759303713),
("Sexta-feiraintersect", -4.442651256490317),
("dayminute", -3.1898882879949486),
("intersectSexta-feira", -4.442651256490317),
("Sexta-feira<day-of-month> de <named-month>",
-4.442651256490317),
("dd-dd <month>(interval)de <year>", -4.442651256490317),
("Sexta-feiraintersect by `da` or `de`", -4.442651256490317),
("intersect by `da` or `de`Sexta-feira", -4.442651256490317),
("dia <day-of-month> de <named-month>in the <part-of-day>",
-4.037186148382152),
("yearday", -4.037186148382152),
("two time tokens separated by \",\"de <year>",
-4.037186148382152),
("intersect by `da` or `de`de <year>", -2.9385738597140425),
("named-monthintersect", -4.442651256490317),
("<day-of-month> de <named-month>\224s <time-of-day>",
-4.442651256490317),
("dd[/-]mmyear", -4.442651256490317),
("Quart-feira\224s <time-of-day>", -4.442651256490317),
("<day-of-month> de <named-month>de <year>",
-4.442651256490317),
-3.056356895370426),
("Quart-feira<dim time> da manha", -4.442651256490317),
("tomorrowdepois das <time-of-day>", -4.037186148382152),
("<hour-of-day> and <relative minutes>in the <part-of-day>",
-4.037186148382152),
("two time tokens separated by \",\"2de <year>",
-4.037186148382152),
("intersectde <year>", -4.442651256490317),
("named-monthtwo time tokens separated by \",\"",
-4.442651256490317)],
("intersectde <year>", -4.442651256490317)],
n = 52},
koData =
ClassData{prior = -0.6836684366054016, unseen = -5.153291594497779,
likelihoods =
HashMap.fromList
[("hourday", -2.9502698994772336),
("dayhour", -3.068052935133617),
("daymonth", -4.454347296253507),
("monthday", -3.355735007585398),
("monthyear", -2.9502698994772336),
("now\224s <time-of-day>", -4.048882188145344),
("houryear", -2.5084371471981943),
("Dezembro\224s <time-of-day>", -4.454347296253507),
("<time-of-day> am|pm<day-of-month> de <named-month>",
-4.454347296253507),
("Fevereiroin the <part-of-day>", -4.048882188145344),
("monthhour", -3.2015843277581397),
("Setembrointersect", -4.454347296253507),
("intersect by `da` or `de`two time tokens separated by \",\"",
-4.454347296253507),
("hourmonth", -4.454347296253507),
("Julhode <year>", -4.454347296253507),
("<time-of-day> am|pmintersect by `da` or `de`",
-4.454347296253507),
("Setembrotwo time tokens separated by \",\"2",
-4.454347296253507),
("dayyear", -4.454347296253507),
("intersect by `da` or `de`\224s <time-of-day>",
-4.454347296253507),
("intersect by `da` or `de`in the <part-of-day>",
-4.048882188145344),
("Setembrode <year>", -3.355735007585398),
("now<hour-of-day> and <relative minutes>", -4.454347296253507),
("daysecond", -4.454347296253507),
("Ter\231a-feiraright now", -4.454347296253507),
("Sexta-feiraintersect", -4.454347296253507),
("dayminute", -4.048882188145344),
("Setembrotwo time tokens separated by \",\"",
-4.454347296253507),
("intersect by `da` or `de`intersect", -4.454347296253507),
("intersectSexta-feira", -4.048882188145344),
("intersect by `da` or `de`two time tokens separated by \",\"2",
-4.454347296253507),
("Sexta-feiraintersect by `da` or `de`", -3.355735007585398),
("intersect by `da` or `de`Sexta-feira", -3.5380565643793527),
("two time tokens separated by \",\"de <year>",
-4.048882188145344),
("intersect by `da` or `de`de <year>", -2.9502698994772336),
("<day-of-month> de <named-month>\224s <time-of-day>",
-4.454347296253507),
("Maiode <year>", -4.048882188145344),
("<day-of-month> de <named-month>de <year>",
-4.454347296253507),
("two time tokens separated by \",\"2de <year>",
-4.048882188145344),
("intersectde <year>", -4.454347296253507)],
n = 53}}),
("season",
Classifier{okData =
@ -537,33 +603,36 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("two time tokens separated by \",\"2",
Classifier{okData =
ClassData{prior = -1.0360919316867756, unseen = -3.58351893845611,
ClassData{prior = -1.0360919316867756,
unseen = -3.6375861597263857,
likelihoods =
HashMap.fromList
[("intersectnamed-day", -2.8622008809294686),
("intersect by `da` or `de`named-day", -2.8622008809294686),
("dayday", -1.252762968495368),
("de <year>named-day", -2.8622008809294686),
("named-dayintersect", -2.456735772821304),
("named-dayintersect by `da` or `de`", -2.456735772821304),
("yearnamed-day", -2.8622008809294686),
("named-day<day-of-month> de <named-month>",
-2.169053700369523),
("yearday", -2.456735772821304)],
[("yearSexta-feira", -2.917770732084279),
("dayday", -1.3083328196501787),
("de <year>Sexta-feira", -2.917770732084279),
("Sexta-feiraintersect", -2.512305623976115),
("intersectSexta-feira", -2.917770732084279),
("Sexta-feira<day-of-month> de <named-month>",
-2.512305623976115),
("Sexta-feiraintersect by `da` or `de`", -2.512305623976115),
("intersect by `da` or `de`Sexta-feira", -2.917770732084279),
("yearday", -2.512305623976115),
("Segunda-feira<day-of-month> de <named-month>",
-2.917770732084279)],
n = 11},
koData =
ClassData{prior = -0.4382549309311553,
unseen = -3.9889840465642745,
ClassData{prior = -0.4382549309311553, unseen = -4.02535169073515,
likelihoods =
HashMap.fromList
[("hourday", -2.3608540011180215),
("dayhour", -1.4853852637641216),
("daymonth", -2.583997552432231),
("monthday", -2.871679624884012),
("intersectnamed-day", -2.871679624884012),
("intersect by `da` or `de`named-day", -2.3608540011180215),
("named-dayintersect", -2.871679624884012),
("named-dayintersect by `da` or `de`", -1.405342556090585)],
[("hourday", -2.3978952727983707),
("dayhour", -1.5224265354444706),
("daymonth", -2.6210388241125804),
("monthday", -2.908720896564361),
("Segunda-feiraintersect by `da` or `de`", -2.908720896564361),
("Sexta-feiraintersect", -2.908720896564361),
("intersectSexta-feira", -2.908720896564361),
("Sexta-feiraintersect by `da` or `de`", -1.6094379124341003),
("intersect by `da` or `de`Sexta-feira", -2.3978952727983707)],
n = 20}}),
("year (latent)",
Classifier{okData =
@ -584,8 +653,7 @@ classifiers
likelihoods =
HashMap.fromList
[("dia <day-of-month> (non ordinal)", -1.252762968495368),
("day", -0.8472978603872037),
("named-day", -1.252762968495368)],
("Domingo", -1.252762968495368), ("day", -0.8472978603872037)],
n = 2},
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
@ -634,7 +702,7 @@ classifiers
("hourhour", -2.639057329615259),
("time-of-day (latent)morning", -3.332204510175204),
("year (latent)afternoon", -2.2335922215070942),
("named-monthmorning", -3.332204510175204),
("Fevereiromorning", -3.332204510175204),
("year (latent)evening", -2.9267394020670396),
("time-of-day (latent)afternoon", -2.9267394020670396),
("year (latent)morning", -2.4159137783010487),
@ -659,6 +727,20 @@ classifiers
("hour", -2.2512917986064953), ("month", -2.9444389791664407),
("minute", -1.845826690498331)],
n = 15}}),
("Segunda-feira",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004,
likelihoods = HashMap.fromList [("", 0.0)], n = 10},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Fevereiro",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("dia <day-of-month> (non ordinal)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -667,13 +749,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("named-month",
Classifier{okData =
ClassData{prior = 0.0, unseen = -3.4657359027997265,
likelihoods = HashMap.fromList [("", 0.0)], n = 30},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("now",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -686,7 +761,7 @@ classifiers
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods =
HashMap.fromList
[("integer (numeric)named-month", -0.6931471805599453),
[("integer (numeric)Fevereiro", -0.6931471805599453),
("month", -0.6931471805599453)],
n = 2},
koData =
@ -703,6 +778,20 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("Ter\231a-feira",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.9444389791664407,
likelihoods = HashMap.fromList [("", 0.0)], n = 17},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Domingo",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods = HashMap.fromList [("", 0.0)], n = 6},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("antes das <time-of-day>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.4849066497880004,
@ -719,8 +808,7 @@ classifiers
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods =
HashMap.fromList
[("named-month", -0.6931471805599453),
("month", -0.6931471805599453)],
[("Julho", -0.6931471805599453), ("month", -0.6931471805599453)],
n = 2},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
@ -786,7 +874,7 @@ classifiers
likelihoods =
HashMap.fromList
[("intersect by `da` or `de`", -2.0794415416798357),
("named-month", -2.0794415416798357),
("Fevereiro", -2.0794415416798357),
("time-of-day (latent)", -2.0794415416798357),
("hour", -2.0794415416798357), ("month", -1.6739764335716716)],
n = 3}}),
@ -825,6 +913,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [], n = 0}}),
("Janeiro",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("afternoon",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.0794415416798357,
@ -844,6 +939,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("Dezembro",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("time-of-day (latent)",
Classifier{okData =
ClassData{prior = -0.9328200338253656, unseen = -3.332204510175204,
@ -999,10 +1101,17 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("named-day",
("Quart-feira",
Classifier{okData =
ClassData{prior = 0.0, unseen = -4.174387269895637,
likelihoods = HashMap.fromList [("", 0.0)], n = 63},
ClassData{prior = 0.0, unseen = -2.3978952727983707,
likelihoods = HashMap.fromList [("", 0.0)], n = 9},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Quinta-feira",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
@ -1044,18 +1153,25 @@ classifiers
likelihoods =
HashMap.fromList
[("day", -0.6931471805599453),
("named-day", -0.6931471805599453)],
("Quart-feira", -0.6931471805599453)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [], n = 0}}),
("Setembro",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
likelihoods = HashMap.fromList [("", 0.0)], n = 5},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("<part-of-day> dessa semana",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods =
HashMap.fromList
[("day", -0.6931471805599453),
("named-day", -0.6931471805599453)],
[("Ter\231a-feira", -0.6931471805599453),
("day", -0.6931471805599453)],
n = 3},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
@ -1170,14 +1286,14 @@ classifiers
("monthmonth", -2.9444389791664407),
("hourhour", -2.9444389791664407),
("dayyear", -2.9444389791664407),
("named-month<day-of-month> de <named-month>",
("Dezembro<day-of-month> de <named-month>",
-2.9444389791664407),
("intersect by `da` or `de`<day-of-month> de <named-month>",
-2.538973871058276),
("intersect by `da` or `de`intersect by `da` or `de`",
-2.538973871058276),
("dd[/-]mmyear", -2.9444389791664407),
("named-monthintersect by `da` or `de`", -2.538973871058276)],
("Dezembrointersect by `da` or `de`", -2.538973871058276),
("dd[/-]mmyear", -2.9444389791664407)],
n = 10}}),
("segundo (grain)",
Classifier{okData =
@ -1186,6 +1302,13 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("Mar\231o",
Classifier{okData =
ClassData{prior = 0.0, unseen = -2.1972245773362196,
likelihoods = HashMap.fromList [("", 0.0)], n = 7},
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("tens (20..90)",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.9459101490553135,
@ -1215,14 +1338,15 @@ classifiers
likelihoods = HashMap.fromList [], n = 0}}),
("this|next <day-of-week>",
Classifier{okData =
ClassData{prior = 0.0, unseen = -3.044522437723423,
ClassData{prior = 0.0, unseen = -3.0910424533583156,
likelihoods =
HashMap.fromList
[("day", -0.6931471805599453),
("named-day", -0.6931471805599453)],
[("Segunda-feira", -1.6582280766035324),
("Ter\231a-feira", -1.0986122886681098),
("day", -0.7419373447293773)],
n = 9},
koData =
ClassData{prior = -infinity, unseen = -1.0986122886681098,
ClassData{prior = -infinity, unseen = -1.3862943611198906,
likelihoods = HashMap.fromList [], n = 0}}),
("natal",
Classifier{okData =
@ -1331,10 +1455,10 @@ classifiers
likelihoods =
HashMap.fromList
[("season", -2.1102132003465894),
("Segunda-feira", -2.1102132003465894),
("dia <day-of-month> (non ordinal)", -2.1102132003465894),
("day", -1.1939224684724346),
("named-day", -2.1102132003465894),
("hour", -1.8870696490323797), ("evening", -2.3978952727983707),
("day", -1.1939224684724346), ("hour", -1.8870696490323797),
("evening", -2.3978952727983707),
("week-end", -2.3978952727983707)],
n = 13},
koData =

View File

@ -19,6 +19,7 @@ import qualified Duckling.AmountOfMoney.Rules as AmountOfMoney
import qualified Duckling.Distance.Rules as Distance
import qualified Duckling.Duration.Rules as Duration
import qualified Duckling.Email.Rules as Email
import qualified Duckling.Numeral.Rules as Numeral
import qualified Duckling.PhoneNumber.Rules as PhoneNumber
import qualified Duckling.Temperature.Rules as Temperature
import qualified Duckling.Url.Rules as Url
@ -29,7 +30,7 @@ rules (This AmountOfMoney) = AmountOfMoney.rules
rules (This Distance) = Distance.rules
rules (This Duration) = Duration.rules
rules (This Email) = Email.rules
rules (This Numeral) = []
rules (This Numeral) = Numeral.rules
rules (This Ordinal) = []
rules (This PhoneNumber) = PhoneNumber.rules
rules (This Quantity) = []

View File

@ -330,6 +330,7 @@ library
, Duckling.Numeral.RO.Corpus
, Duckling.Numeral.RO.Rules
, Duckling.Numeral.Helpers
, Duckling.Numeral.Rules
, Duckling.Numeral.Types
-- Ordinal

View File

@ -10,23 +10,25 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Duckling.Api.Tests (tests) where
module Duckling.Api.Tests
( tests
) where
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import Data.List (sortOn)
import Data.Text (Text)
import Prelude
import Test.Tasty
import Test.Tasty.HUnit
import qualified Data.HashMap.Strict as HashMap
import qualified Data.HashSet as HashSet
import Duckling.Api
import Duckling.Dimensions.Types
import Duckling.Locale
import qualified Duckling.Numeral.Types as TNumeral
import Duckling.Testing.Asserts
import Duckling.Testing.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
tests :: TestTree
tests = testGroup "API Tests"
@ -134,4 +136,5 @@ supportedDimensionsTest = testCase "Supported Dimensions Test" $ do
check (l, expected) = case HashMap.lookup l supportedDimensions of
Nothing -> assertFailure $ "no dimensions for " ++ show l
Just actual ->
assertEqual ("wrong dimensions for " ++ show l) expected actual
assertEqual ("wrong dimensions for " ++ show l)
(HashSet.fromList expected) (HashSet.fromList actual)