Add support for larger spelled-out Danish ordinals (#346)

Summary:
Adds support for larger spelled-out Danish ordinal number expressions, like
treoghalvfemsindstyvende (93rd)
or
tohundrede og femogfyrrende (245th)
Pull Request resolved: https://github.com/facebook/duckling/pull/346

Reviewed By: patapizza

Differential Revision: D14476918

Pulled By: chinmay87

fbshipit-source-id: eb20ee8d304f291ff4ab2b28c4e272a9d447396e
This commit is contained in:
Jens Persson 2019-05-23 15:58:12 -07:00 committed by Facebook Github Bot
parent bf89e34365
commit 41f140992d
5 changed files with 176 additions and 73 deletions

View File

@ -141,14 +141,16 @@ rulePowersOfTen :: Rule
rulePowersOfTen = Rule
{ name = "powers of tens"
, pattern =
[ regex "(hundrede?|tusinde?|million(er)?)"
[ regex "(hundrede?|tohundrede|tusinde?|totusinde|million(er)?)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"hundred" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"hundrede" -> double 1e2 >>= withGrain 2 >>= withMultipliable
"tohundrede" -> double (2 * 1e2) >>= withGrain 2 >>= withMultipliable
"tusind" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"tusinde" -> double 1e3 >>= withGrain 3 >>= withMultipliable
"totusinde" -> double (2 * 1e3) >>= withGrain 3 >>= withMultipliable
"million" -> double 1e6 >>= withGrain 6 >>= withMultipliable
"millioner" -> double 1e6 >>= withGrain 6 >>= withMultipliable
_ -> Nothing

View File

@ -22,9 +22,24 @@ corpus :: Corpus
corpus = (testContext {locale = makeLocale DA Nothing}, testOptions, allExamples)
allExamples :: [Example]
allExamples =
examples (OrdinalData 4)
[ "4."
, "fjerde"
, "Fjerde"
]
allExamples = concat
[ examples (OrdinalData 4)
[ "4."
, "fjerde"
, "Fjerde"
]
, examples (OrdinalData 41)
[ "enogfyrrende"
]
, examples (OrdinalData 78)
[ "otteoghalvfjerdsindstyvende"
]
, examples (OrdinalData 263)
[ "to hundrede og treogtresindstyvende"
, "tohundrede og treogtresindstyvende"
]
, examples (OrdinalData 70)
[ "halvfjerdsende"
, "halvfjerdsindstyvende"
]
]

View File

@ -7,60 +7,143 @@
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE NoRebindableSyntax #-}
module Duckling.Ordinal.DA.Rules
( rules ) where
import qualified Data.Text as Text
import Data.HashMap.Strict (HashMap)
import Data.Text (Text)
import Prelude
import Data.String
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers (parseInt)
import Duckling.Numeral.Helpers (parseInt, numberWith)
import Duckling.Numeral.Types (NumeralData (..), getIntValue)
import Duckling.Ordinal.Helpers
import Duckling.Ordinal.Types (OrdinalData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ordinalsMap :: HashMap Text Int
ordinalsMap = HashMap.fromList
[ ( "første", 1 )
, ( "anden", 2 )
, ( "tredje", 3 )
, ( "fjerde", 4 )
, ( "femte", 5 )
, ( "sjette", 6 )
, ( "syvende", 7 )
, ( "ottende", 8 )
, ( "niende", 9 )
, ( "tiende", 10 )
, ( "elfte", 11 )
, ( "tolvte", 12 )
, ( "trettende", 13 )
, ( "fjortende", 14 )
, ( "femtende", 15 )
, ( "sekstende", 16 )
, ( "syttende", 17 )
, ( "attende", 18 )
, ( "nittende", 19 )
, ( "tyvende", 20 )
, ( "tenogtyvende", 21 )
, ( "toogtyvende", 22 )
, ( "treogtyvende", 23 )
, ( "fireogtyvende", 24 )
, ( "femogtyvende", 25 )
, ( "seksogtyvende", 26 )
, ( "syvogtyvende", 27 )
, ( "otteogtyvende", 28 )
, ( "niogtyvende", 29 )
, ( "tredivte", 30 )
, ( "enogtredivte", 31 )
]
cardinalsMap :: HashMap Text Int
cardinalsMap = HashMap.fromList
[ ( "tyvende", 20 )
, ( "tredivte", 30 )
, ( "fyrrende", 40 )
, ( "fyrretyvende", 40 )
, ( "halvtredsende", 50 )
, ( "halvtredsindstyvende", 50 )
, ( "tressende", 60 )
, ( "tresindstyvende", 60 )
, ( "halvfjerdsende", 70 )
, ( "halvfjerdsindstyvende", 70 )
, ( "firsende", 80 )
, ( "firsindsstyvende", 80 )
, ( "halvfemsende", 90 )
, ( "halvfemsindstyvende", 90 )
]
oneValMap :: HashMap Text Int
oneValMap = HashMap.fromList
[ ( "", 0 )
, ( "enog", 1 )
, ( "toog", 2 )
, ( "treog", 3 )
, ( "fireog", 4 )
, ( "femog", 5 )
, ( "seksog", 6 )
, ( "syvog", 7 )
, ( "otteog", 8 )
, ( "niog", 9 )
]
ruleOrdinalsFirstst :: Rule
ruleOrdinalsFirstst = Rule
{ name = "ordinals (first..31st)"
{ name = "ordinals (first..19st)"
, pattern =
[ regex "(første|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende|tyvende|tenogtyvende|toogtyvende|treogtyvende|fireogtyvende|femogtyvende|seksogtyvende|syvogtyvende|otteogtyvende|niogtyvende|tredivte|enogtredivte)"
[ regex "(første|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
"første" -> Just $ ordinal 1
"anden" -> Just $ ordinal 2
"tredje" -> Just $ ordinal 3
"fjerde" -> Just $ ordinal 4
"femte" -> Just $ ordinal 5
"sjette" -> Just $ ordinal 6
"syvende" -> Just $ ordinal 7
"ottende" -> Just $ ordinal 8
"niende" -> Just $ ordinal 9
"tiende" -> Just $ ordinal 10
"elfte" -> Just $ ordinal 11
"tolvte" -> Just $ ordinal 12
"trettende" -> Just $ ordinal 13
"fjortende" -> Just $ ordinal 14
"femtende" -> Just $ ordinal 15
"sekstende" -> Just $ ordinal 16
"syttende" -> Just $ ordinal 17
"attende" -> Just $ ordinal 18
"nittende" -> Just $ ordinal 19
"tyvende" -> Just $ ordinal 20
"tenogtyvende" -> Just $ ordinal 21
"toogtyvende" -> Just $ ordinal 22
"treogtyvende" -> Just $ ordinal 23
"fireogtyvende" -> Just $ ordinal 24
"femogtyvende" -> Just $ ordinal 25
"seksogtyvende" -> Just $ ordinal 26
"syvogtyvende" -> Just $ ordinal 27
"otteogtyvende" -> Just $ ordinal 28
"niogtyvende" -> Just $ ordinal 29
"tredivte" -> Just $ ordinal 30
"enogtredivte" -> Just $ ordinal 31
_ -> Nothing
(Token RegexMatch (GroupMatch (match:_)):_) ->
ordinal <$> HashMap.lookup (Text.toLower match) ordinalsMap
_ -> Nothing
}
ruleSpelledOutOrdinals :: Rule
ruleSpelledOutOrdinals = Rule
{ name = "ordinals, 20 to 99, spelled-out"
, pattern =
[ regex (concat ["((?:en|to|tre|fire|fem|seks|syv|otte|ni)og)?",
"(tyvende",
"|tredivte",
"|fyrr(?:etyv)?ende",
"|halvtreds(?:indstyv)?ende",
"|tres(?:indstyv|s)?ende",
"|halvfjerds(?:indstyv)?ende",
"|firs(?:indstyv)?ende",
"|halvfems(?:indstyv)?ende)"])
]
, prod = \case
(Token RegexMatch (GroupMatch (ones:tens:_)):_) -> do
oneVal <- HashMap.lookup (Text.toLower ones) oneValMap
tenVal <- HashMap.lookup (Text.toLower tens) cardinalsMap
Just $ ordinal (oneVal + tenVal)
_ -> Nothing
}
ruleSpelledOutBigOrdinals :: Rule
ruleSpelledOutBigOrdinals = Rule
{ name = "ordinals, above 99, spelled out"
, pattern =
[ numberWith TNumeral.value (> 99)
, regex "og"
, dimension Ordinal
]
, prod = \case
Token Numeral NumeralData {TNumeral.value=maybenumnum}:_:Token Ordinal (OrdinalData ordnum):_ ->
case getIntValue maybenumnum of
Just numnum -> Just $ ordinal (numnum + ordnum)
Nothing -> Nothing
_ -> Nothing
}
@ -81,4 +164,6 @@ rules :: [Rule]
rules =
[ ruleOrdinalDigits
, ruleOrdinalsFirstst
, ruleSpelledOutOrdinals
, ruleSpelledOutBigOrdinals
]

View File

@ -2,7 +2,8 @@
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree.
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
@ -276,7 +277,7 @@ classifiers
HashMap.fromList
[("ordinal (digits)quarter (grain)", -1.252762968495368),
("quarter", -0.8472978603872037),
("ordinals (first..31st)quarter (grain)", -1.252762968495368)],
("ordinals (first..19st)quarter (grain)", -1.252762968495368)],
n = 2},
koData =
ClassData{prior = -0.6931471805599453,
@ -285,7 +286,7 @@ classifiers
HashMap.fromList
[("ordinal (digits)quarter (grain)", -1.252762968495368),
("quarter", -0.8472978603872037),
("ordinals (first..31st)quarter (grain)", -1.252762968495368)],
("ordinals (first..19st)quarter (grain)", -1.252762968495368)],
n = 2}}),
("intersect",
Classifier{okData =
@ -419,12 +420,12 @@ classifiers
likelihoods =
HashMap.fromList
[("daymonth", -1.7346010553881064),
("ordinals (first..31st)week (grain)October",
("ordinals (first..19st)week (grain)intersect",
-1.7346010553881064),
("ordinals (first..31st)week (grain)intersect",
("ordinals (first..19st)week (grain)October",
-1.7346010553881064),
("weekmonth", -1.2237754316221157),
("ordinals (first..31st)day (grain)October",
("ordinals (first..19st)day (grain)October",
-1.7346010553881064)],
n = 6},
koData =
@ -566,7 +567,7 @@ classifiers
ClassData{prior = 0.0, unseen = -2.0794415416798357,
likelihoods =
HashMap.fromList
[("ordinals (first..31st)quarter (grain)year",
[("ordinals (first..19st)quarter (grain)year",
-1.252762968495368),
("quarteryear", -0.8472978603872037),
("ordinal (digits)quarter (grain)year", -1.252762968495368)],
@ -624,9 +625,9 @@ classifiers
likelihoods =
HashMap.fromList
[("daymonth", -0.8938178760220964),
("ordinals (first..31st)TuesdayOctober", -1.9924301646902063),
("ordinals (first..31st)Tuesdayintersect", -1.9924301646902063),
("ordinals (first..31st)Wednesdayintersect",
("ordinals (first..19st)Tuesdayintersect", -1.9924301646902063),
("ordinals (first..19st)TuesdayOctober", -1.9924301646902063),
("ordinals (first..19st)Wednesdayintersect",
-1.4816045409242156)],
n = 8},
koData =
@ -635,8 +636,8 @@ classifiers
likelihoods =
HashMap.fromList
[("daymonth", -0.9444616088408514),
("ordinals (first..31st)WednesdayOctober", -1.2809338454620642),
("ordinals (first..31st)TuesdaySeptember", -1.791759469228055)],
("ordinals (first..19st)WednesdayOctober", -1.2809338454620642),
("ordinals (first..19st)TuesdaySeptember", -1.791759469228055)],
n = 6}}),
("the <day-of-month> (non ordinal)",
Classifier{okData =
@ -646,15 +647,6 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("ordinals (first..31st)",
Classifier{okData =
ClassData{prior = -5.406722127027582e-2,
unseen = -2.995732273553991,
likelihoods = HashMap.fromList [("", 0.0)], n = 18},
koData =
ClassData{prior = -2.9444389791664407,
unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("April",
Classifier{okData =
ClassData{prior = 0.0, unseen = -1.6094379124341003,
@ -707,7 +699,7 @@ classifiers
unseen = -3.258096538021482,
likelihoods =
HashMap.fromList
[("ordinals (first..31st)March", -1.8325814637483102),
[("ordinals (first..19st)March", -1.8325814637483102),
("ordinal (digits)February", -1.8325814637483102),
("month", -0.8209805520698302),
("ordinal (digits)March", -1.6094379124341003)],
@ -717,7 +709,7 @@ classifiers
unseen = -2.0794415416798357,
likelihoods =
HashMap.fromList
[("ordinals (first..31st)April", -1.252762968495368),
[("ordinals (first..19st)April", -1.252762968495368),
("month", -1.252762968495368)],
n = 1}}),
("numbers prefix with -, negative or minus",
@ -802,7 +794,7 @@ classifiers
ClassData{prior = 0.0, unseen = -2.3978952727983707,
likelihoods =
HashMap.fromList
[("ordinals (first..31st)", -1.2039728043259361),
[("ordinals (first..19st)", -1.2039728043259361),
("ordinal (digits)", -0.35667494393873245)],
n = 8},
koData =
@ -878,6 +870,15 @@ classifiers
koData =
ClassData{prior = -infinity, unseen = -0.6931471805599453,
likelihoods = HashMap.fromList [], n = 0}}),
("ordinals (first..19st)",
Classifier{okData =
ClassData{prior = -5.406722127027582e-2,
unseen = -2.995732273553991,
likelihoods = HashMap.fromList [("", 0.0)], n = 18},
koData =
ClassData{prior = -2.9444389791664407,
unseen = -1.0986122886681098,
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
("about <time-of-day>",
Classifier{okData =
ClassData{prior = -0.6931471805599453,
@ -1544,8 +1545,8 @@ classifiers
ClassData{prior = 0.0, unseen = -2.3978952727983707,
likelihoods =
HashMap.fromList
[("ordinals (first..31st)April", -1.6094379124341003),
("ordinals (first..31st)March", -1.6094379124341003),
[("ordinals (first..19st)April", -1.6094379124341003),
("ordinals (first..19st)March", -1.6094379124341003),
("month", -0.916290731874155),
("ordinal (digits)March", -1.6094379124341003)],
n = 3},

View File

@ -319,10 +319,10 @@ classifiers
unseen = -4.31748811353631,
likelihoods =
HashMap.fromList
[("<integer> (latent time-of-day)", -0.9718605830289658),
[("<integer> (latent time-of-day)", -0.9718605830289657),
("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
("hour", -0.9718605830289658),
("hour", -0.9718605830289657),
("two time tokens separated by `di`", -3.20545280453606),
("Domenica", -3.6109179126442243)],
n = 33}}),