mirror of
https://github.com/facebook/duckling.git
synced 2025-01-07 14:29:37 +03:00
Add support for larger spelled-out Danish ordinals (#346)
Summary: Adds support for larger spelled-out Danish ordinal number expressions, like treoghalvfemsindstyvende (93rd) or tohundrede og femogfyrrende (245th) Pull Request resolved: https://github.com/facebook/duckling/pull/346 Reviewed By: patapizza Differential Revision: D14476918 Pulled By: chinmay87 fbshipit-source-id: eb20ee8d304f291ff4ab2b28c4e272a9d447396e
This commit is contained in:
parent
bf89e34365
commit
41f140992d
@ -141,14 +141,16 @@ rulePowersOfTen :: Rule
|
||||
rulePowersOfTen = Rule
|
||||
{ name = "powers of tens"
|
||||
, pattern =
|
||||
[ regex "(hundrede?|tusinde?|million(er)?)"
|
||||
[ regex "(hundrede?|tohundrede|tusinde?|totusinde|million(er)?)"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
||||
"hundred" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
||||
"hundrede" -> double 1e2 >>= withGrain 2 >>= withMultipliable
|
||||
"tohundrede" -> double (2 * 1e2) >>= withGrain 2 >>= withMultipliable
|
||||
"tusind" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
||||
"tusinde" -> double 1e3 >>= withGrain 3 >>= withMultipliable
|
||||
"totusinde" -> double (2 * 1e3) >>= withGrain 3 >>= withMultipliable
|
||||
"million" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
||||
"millioner" -> double 1e6 >>= withGrain 6 >>= withMultipliable
|
||||
_ -> Nothing
|
||||
|
@ -22,9 +22,24 @@ corpus :: Corpus
|
||||
corpus = (testContext {locale = makeLocale DA Nothing}, testOptions, allExamples)
|
||||
|
||||
allExamples :: [Example]
|
||||
allExamples =
|
||||
examples (OrdinalData 4)
|
||||
[ "4."
|
||||
, "fjerde"
|
||||
, "Fjerde"
|
||||
]
|
||||
allExamples = concat
|
||||
[ examples (OrdinalData 4)
|
||||
[ "4."
|
||||
, "fjerde"
|
||||
, "Fjerde"
|
||||
]
|
||||
, examples (OrdinalData 41)
|
||||
[ "enogfyrrende"
|
||||
]
|
||||
, examples (OrdinalData 78)
|
||||
[ "otteoghalvfjerdsindstyvende"
|
||||
]
|
||||
, examples (OrdinalData 263)
|
||||
[ "to hundrede og treogtresindstyvende"
|
||||
, "tohundrede og treogtresindstyvende"
|
||||
]
|
||||
, examples (OrdinalData 70)
|
||||
[ "halvfjerdsende"
|
||||
, "halvfjerdsindstyvende"
|
||||
]
|
||||
]
|
||||
|
@ -7,60 +7,143 @@
|
||||
|
||||
{-# LANGUAGE GADTs #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE LambdaCase #-}
|
||||
{-# LANGUAGE NoRebindableSyntax #-}
|
||||
|
||||
module Duckling.Ordinal.DA.Rules
|
||||
( rules ) where
|
||||
|
||||
import qualified Data.Text as Text
|
||||
|
||||
import Data.HashMap.Strict (HashMap)
|
||||
import Data.Text (Text)
|
||||
import Prelude
|
||||
import Data.String
|
||||
import qualified Data.HashMap.Strict as HashMap
|
||||
import qualified Data.Text as Text
|
||||
|
||||
import Duckling.Dimensions.Types
|
||||
import Duckling.Numeral.Helpers (parseInt)
|
||||
import Duckling.Numeral.Helpers (parseInt, numberWith)
|
||||
import Duckling.Numeral.Types (NumeralData (..), getIntValue)
|
||||
import Duckling.Ordinal.Helpers
|
||||
import Duckling.Ordinal.Types (OrdinalData (..))
|
||||
import Duckling.Regex.Types
|
||||
import Duckling.Types
|
||||
import qualified Duckling.Numeral.Types as TNumeral
|
||||
|
||||
ordinalsMap :: HashMap Text Int
|
||||
ordinalsMap = HashMap.fromList
|
||||
[ ( "første", 1 )
|
||||
, ( "anden", 2 )
|
||||
, ( "tredje", 3 )
|
||||
, ( "fjerde", 4 )
|
||||
, ( "femte", 5 )
|
||||
, ( "sjette", 6 )
|
||||
, ( "syvende", 7 )
|
||||
, ( "ottende", 8 )
|
||||
, ( "niende", 9 )
|
||||
, ( "tiende", 10 )
|
||||
, ( "elfte", 11 )
|
||||
, ( "tolvte", 12 )
|
||||
, ( "trettende", 13 )
|
||||
, ( "fjortende", 14 )
|
||||
, ( "femtende", 15 )
|
||||
, ( "sekstende", 16 )
|
||||
, ( "syttende", 17 )
|
||||
, ( "attende", 18 )
|
||||
, ( "nittende", 19 )
|
||||
, ( "tyvende", 20 )
|
||||
, ( "tenogtyvende", 21 )
|
||||
, ( "toogtyvende", 22 )
|
||||
, ( "treogtyvende", 23 )
|
||||
, ( "fireogtyvende", 24 )
|
||||
, ( "femogtyvende", 25 )
|
||||
, ( "seksogtyvende", 26 )
|
||||
, ( "syvogtyvende", 27 )
|
||||
, ( "otteogtyvende", 28 )
|
||||
, ( "niogtyvende", 29 )
|
||||
, ( "tredivte", 30 )
|
||||
, ( "enogtredivte", 31 )
|
||||
]
|
||||
|
||||
cardinalsMap :: HashMap Text Int
|
||||
cardinalsMap = HashMap.fromList
|
||||
[ ( "tyvende", 20 )
|
||||
, ( "tredivte", 30 )
|
||||
, ( "fyrrende", 40 )
|
||||
, ( "fyrretyvende", 40 )
|
||||
, ( "halvtredsende", 50 )
|
||||
, ( "halvtredsindstyvende", 50 )
|
||||
, ( "tressende", 60 )
|
||||
, ( "tresindstyvende", 60 )
|
||||
, ( "halvfjerdsende", 70 )
|
||||
, ( "halvfjerdsindstyvende", 70 )
|
||||
, ( "firsende", 80 )
|
||||
, ( "firsindsstyvende", 80 )
|
||||
, ( "halvfemsende", 90 )
|
||||
, ( "halvfemsindstyvende", 90 )
|
||||
]
|
||||
|
||||
oneValMap :: HashMap Text Int
|
||||
oneValMap = HashMap.fromList
|
||||
[ ( "", 0 )
|
||||
, ( "enog", 1 )
|
||||
, ( "toog", 2 )
|
||||
, ( "treog", 3 )
|
||||
, ( "fireog", 4 )
|
||||
, ( "femog", 5 )
|
||||
, ( "seksog", 6 )
|
||||
, ( "syvog", 7 )
|
||||
, ( "otteog", 8 )
|
||||
, ( "niog", 9 )
|
||||
]
|
||||
|
||||
ruleOrdinalsFirstst :: Rule
|
||||
ruleOrdinalsFirstst = Rule
|
||||
{ name = "ordinals (first..31st)"
|
||||
{ name = "ordinals (first..19st)"
|
||||
, pattern =
|
||||
[ regex "(første|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende|tyvende|tenogtyvende|toogtyvende|treogtyvende|fireogtyvende|femogtyvende|seksogtyvende|syvogtyvende|otteogtyvende|niogtyvende|tredivte|enogtredivte)"
|
||||
[ regex "(første|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende)"
|
||||
]
|
||||
, prod = \tokens -> case tokens of
|
||||
(Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
|
||||
"første" -> Just $ ordinal 1
|
||||
"anden" -> Just $ ordinal 2
|
||||
"tredje" -> Just $ ordinal 3
|
||||
"fjerde" -> Just $ ordinal 4
|
||||
"femte" -> Just $ ordinal 5
|
||||
"sjette" -> Just $ ordinal 6
|
||||
"syvende" -> Just $ ordinal 7
|
||||
"ottende" -> Just $ ordinal 8
|
||||
"niende" -> Just $ ordinal 9
|
||||
"tiende" -> Just $ ordinal 10
|
||||
"elfte" -> Just $ ordinal 11
|
||||
"tolvte" -> Just $ ordinal 12
|
||||
"trettende" -> Just $ ordinal 13
|
||||
"fjortende" -> Just $ ordinal 14
|
||||
"femtende" -> Just $ ordinal 15
|
||||
"sekstende" -> Just $ ordinal 16
|
||||
"syttende" -> Just $ ordinal 17
|
||||
"attende" -> Just $ ordinal 18
|
||||
"nittende" -> Just $ ordinal 19
|
||||
"tyvende" -> Just $ ordinal 20
|
||||
"tenogtyvende" -> Just $ ordinal 21
|
||||
"toogtyvende" -> Just $ ordinal 22
|
||||
"treogtyvende" -> Just $ ordinal 23
|
||||
"fireogtyvende" -> Just $ ordinal 24
|
||||
"femogtyvende" -> Just $ ordinal 25
|
||||
"seksogtyvende" -> Just $ ordinal 26
|
||||
"syvogtyvende" -> Just $ ordinal 27
|
||||
"otteogtyvende" -> Just $ ordinal 28
|
||||
"niogtyvende" -> Just $ ordinal 29
|
||||
"tredivte" -> Just $ ordinal 30
|
||||
"enogtredivte" -> Just $ ordinal 31
|
||||
_ -> Nothing
|
||||
(Token RegexMatch (GroupMatch (match:_)):_) ->
|
||||
ordinal <$> HashMap.lookup (Text.toLower match) ordinalsMap
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
ruleSpelledOutOrdinals :: Rule
|
||||
ruleSpelledOutOrdinals = Rule
|
||||
{ name = "ordinals, 20 to 99, spelled-out"
|
||||
, pattern =
|
||||
[ regex (concat ["((?:en|to|tre|fire|fem|seks|syv|otte|ni)og)?",
|
||||
"(tyvende",
|
||||
"|tredivte",
|
||||
"|fyrr(?:etyv)?ende",
|
||||
"|halvtreds(?:indstyv)?ende",
|
||||
"|tres(?:indstyv|s)?ende",
|
||||
"|halvfjerds(?:indstyv)?ende",
|
||||
"|firs(?:indstyv)?ende",
|
||||
"|halvfems(?:indstyv)?ende)"])
|
||||
]
|
||||
, prod = \case
|
||||
(Token RegexMatch (GroupMatch (ones:tens:_)):_) -> do
|
||||
oneVal <- HashMap.lookup (Text.toLower ones) oneValMap
|
||||
tenVal <- HashMap.lookup (Text.toLower tens) cardinalsMap
|
||||
Just $ ordinal (oneVal + tenVal)
|
||||
_ -> Nothing
|
||||
|
||||
}
|
||||
|
||||
ruleSpelledOutBigOrdinals :: Rule
|
||||
ruleSpelledOutBigOrdinals = Rule
|
||||
{ name = "ordinals, above 99, spelled out"
|
||||
, pattern =
|
||||
[ numberWith TNumeral.value (> 99)
|
||||
, regex "og"
|
||||
, dimension Ordinal
|
||||
]
|
||||
, prod = \case
|
||||
Token Numeral NumeralData {TNumeral.value=maybenumnum}:_:Token Ordinal (OrdinalData ordnum):_ ->
|
||||
case getIntValue maybenumnum of
|
||||
Just numnum -> Just $ ordinal (numnum + ordnum)
|
||||
Nothing -> Nothing
|
||||
_ -> Nothing
|
||||
}
|
||||
|
||||
@ -81,4 +164,6 @@ rules :: [Rule]
|
||||
rules =
|
||||
[ ruleOrdinalDigits
|
||||
, ruleOrdinalsFirstst
|
||||
, ruleSpelledOutOrdinals
|
||||
, ruleSpelledOutBigOrdinals
|
||||
]
|
||||
|
@ -2,7 +2,8 @@
|
||||
-- All rights reserved.
|
||||
--
|
||||
-- This source code is licensed under the BSD-style license found in the
|
||||
-- LICENSE file in the root directory of this source tree.
|
||||
-- LICENSE file in the root directory of this source tree. An additional grant
|
||||
-- of patent rights can be found in the PATENTS file in the same directory.
|
||||
|
||||
-----------------------------------------------------------------
|
||||
-- Auto-generated by regenClassifiers
|
||||
@ -276,7 +277,7 @@ classifiers
|
||||
HashMap.fromList
|
||||
[("ordinal (digits)quarter (grain)", -1.252762968495368),
|
||||
("quarter", -0.8472978603872037),
|
||||
("ordinals (first..31st)quarter (grain)", -1.252762968495368)],
|
||||
("ordinals (first..19st)quarter (grain)", -1.252762968495368)],
|
||||
n = 2},
|
||||
koData =
|
||||
ClassData{prior = -0.6931471805599453,
|
||||
@ -285,7 +286,7 @@ classifiers
|
||||
HashMap.fromList
|
||||
[("ordinal (digits)quarter (grain)", -1.252762968495368),
|
||||
("quarter", -0.8472978603872037),
|
||||
("ordinals (first..31st)quarter (grain)", -1.252762968495368)],
|
||||
("ordinals (first..19st)quarter (grain)", -1.252762968495368)],
|
||||
n = 2}}),
|
||||
("intersect",
|
||||
Classifier{okData =
|
||||
@ -419,12 +420,12 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("daymonth", -1.7346010553881064),
|
||||
("ordinals (first..31st)week (grain)October",
|
||||
("ordinals (first..19st)week (grain)intersect",
|
||||
-1.7346010553881064),
|
||||
("ordinals (first..31st)week (grain)intersect",
|
||||
("ordinals (first..19st)week (grain)October",
|
||||
-1.7346010553881064),
|
||||
("weekmonth", -1.2237754316221157),
|
||||
("ordinals (first..31st)day (grain)October",
|
||||
("ordinals (first..19st)day (grain)October",
|
||||
-1.7346010553881064)],
|
||||
n = 6},
|
||||
koData =
|
||||
@ -566,7 +567,7 @@ classifiers
|
||||
ClassData{prior = 0.0, unseen = -2.0794415416798357,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("ordinals (first..31st)quarter (grain)year",
|
||||
[("ordinals (first..19st)quarter (grain)year",
|
||||
-1.252762968495368),
|
||||
("quarteryear", -0.8472978603872037),
|
||||
("ordinal (digits)quarter (grain)year", -1.252762968495368)],
|
||||
@ -624,9 +625,9 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("daymonth", -0.8938178760220964),
|
||||
("ordinals (first..31st)TuesdayOctober", -1.9924301646902063),
|
||||
("ordinals (first..31st)Tuesdayintersect", -1.9924301646902063),
|
||||
("ordinals (first..31st)Wednesdayintersect",
|
||||
("ordinals (first..19st)Tuesdayintersect", -1.9924301646902063),
|
||||
("ordinals (first..19st)TuesdayOctober", -1.9924301646902063),
|
||||
("ordinals (first..19st)Wednesdayintersect",
|
||||
-1.4816045409242156)],
|
||||
n = 8},
|
||||
koData =
|
||||
@ -635,8 +636,8 @@ classifiers
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("daymonth", -0.9444616088408514),
|
||||
("ordinals (first..31st)WednesdayOctober", -1.2809338454620642),
|
||||
("ordinals (first..31st)TuesdaySeptember", -1.791759469228055)],
|
||||
("ordinals (first..19st)WednesdayOctober", -1.2809338454620642),
|
||||
("ordinals (first..19st)TuesdaySeptember", -1.791759469228055)],
|
||||
n = 6}}),
|
||||
("the <day-of-month> (non ordinal)",
|
||||
Classifier{okData =
|
||||
@ -646,15 +647,6 @@ classifiers
|
||||
koData =
|
||||
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("ordinals (first..31st)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -5.406722127027582e-2,
|
||||
unseen = -2.995732273553991,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 18},
|
||||
koData =
|
||||
ClassData{prior = -2.9444389791664407,
|
||||
unseen = -1.0986122886681098,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
|
||||
("April",
|
||||
Classifier{okData =
|
||||
ClassData{prior = 0.0, unseen = -1.6094379124341003,
|
||||
@ -707,7 +699,7 @@ classifiers
|
||||
unseen = -3.258096538021482,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("ordinals (first..31st)March", -1.8325814637483102),
|
||||
[("ordinals (first..19st)March", -1.8325814637483102),
|
||||
("ordinal (digits)February", -1.8325814637483102),
|
||||
("month", -0.8209805520698302),
|
||||
("ordinal (digits)March", -1.6094379124341003)],
|
||||
@ -717,7 +709,7 @@ classifiers
|
||||
unseen = -2.0794415416798357,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("ordinals (first..31st)April", -1.252762968495368),
|
||||
[("ordinals (first..19st)April", -1.252762968495368),
|
||||
("month", -1.252762968495368)],
|
||||
n = 1}}),
|
||||
("numbers prefix with -, negative or minus",
|
||||
@ -802,7 +794,7 @@ classifiers
|
||||
ClassData{prior = 0.0, unseen = -2.3978952727983707,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("ordinals (first..31st)", -1.2039728043259361),
|
||||
[("ordinals (first..19st)", -1.2039728043259361),
|
||||
("ordinal (digits)", -0.35667494393873245)],
|
||||
n = 8},
|
||||
koData =
|
||||
@ -878,6 +870,15 @@ classifiers
|
||||
koData =
|
||||
ClassData{prior = -infinity, unseen = -0.6931471805599453,
|
||||
likelihoods = HashMap.fromList [], n = 0}}),
|
||||
("ordinals (first..19st)",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -5.406722127027582e-2,
|
||||
unseen = -2.995732273553991,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 18},
|
||||
koData =
|
||||
ClassData{prior = -2.9444389791664407,
|
||||
unseen = -1.0986122886681098,
|
||||
likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
|
||||
("about <time-of-day>",
|
||||
Classifier{okData =
|
||||
ClassData{prior = -0.6931471805599453,
|
||||
@ -1544,8 +1545,8 @@ classifiers
|
||||
ClassData{prior = 0.0, unseen = -2.3978952727983707,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("ordinals (first..31st)April", -1.6094379124341003),
|
||||
("ordinals (first..31st)March", -1.6094379124341003),
|
||||
[("ordinals (first..19st)April", -1.6094379124341003),
|
||||
("ordinals (first..19st)March", -1.6094379124341003),
|
||||
("month", -0.916290731874155),
|
||||
("ordinal (digits)March", -1.6094379124341003)],
|
||||
n = 3},
|
||||
|
@ -319,10 +319,10 @@ classifiers
|
||||
unseen = -4.31748811353631,
|
||||
likelihoods =
|
||||
HashMap.fromList
|
||||
[("<integer> (latent time-of-day)", -0.9718605830289658),
|
||||
[("<integer> (latent time-of-day)", -0.9718605830289657),
|
||||
("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
|
||||
("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
|
||||
("hour", -0.9718605830289658),
|
||||
("hour", -0.9718605830289657),
|
||||
("two time tokens separated by `di`", -3.20545280453606),
|
||||
("Domenica", -3.6109179126442243)],
|
||||
n = 33}}),
|
||||
|
Loading…
Reference in New Issue
Block a user