From 41f140992d88d0d32da28b7801c5f9af6e74dabf Mon Sep 17 00:00:00 2001
From: Jens Persson <jens.persson@ipsoft.com>
Date: Thu, 23 May 2019 15:58:12 -0700
Subject: [PATCH] Add support for larger spelled-out Danish ordinals (#346)

Summary:
Adds support for larger spelled-out Danish ordinal number expressions, like
treoghalvfemsindstyvende (93rd)
or
tohundrede og femogfyrrende (245th)
Pull Request resolved: https://github.com/facebook/duckling/pull/346

Reviewed By: patapizza

Differential Revision: D14476918

Pulled By: chinmay87

fbshipit-source-id: eb20ee8d304f291ff4ab2b28c4e272a9d447396e
---
 Duckling/Numeral/DA/Rules.hs          |   4 +-
 Duckling/Ordinal/DA/Corpus.hs         |  27 ++++-
 Duckling/Ordinal/DA/Rules.hs          | 161 ++++++++++++++++++++------
 Duckling/Ranking/Classifiers/DA_XX.hs |  53 ++++-----
 Duckling/Ranking/Classifiers/IT_XX.hs |   4 +-
 5 files changed, 176 insertions(+), 73 deletions(-)

diff --git a/Duckling/Numeral/DA/Rules.hs b/Duckling/Numeral/DA/Rules.hs
index 22438bd4..e60dd886 100644
--- a/Duckling/Numeral/DA/Rules.hs
+++ b/Duckling/Numeral/DA/Rules.hs
@@ -141,14 +141,16 @@ rulePowersOfTen :: Rule
 rulePowersOfTen = Rule
   { name = "powers of tens"
   , pattern =
-    [ regex "(hundrede?|tusinde?|million(er)?)"
+    [ regex "(hundrede?|tohundrede|tusinde?|totusinde|million(er)?)"
     ]
   , prod = \tokens -> case tokens of
       (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
         "hundred"   -> double 1e2 >>= withGrain 2 >>= withMultipliable
         "hundrede"  -> double 1e2 >>= withGrain 2 >>= withMultipliable
+        "tohundrede"  -> double (2 * 1e2) >>= withGrain 2 >>= withMultipliable
         "tusind"    -> double 1e3 >>= withGrain 3 >>= withMultipliable
         "tusinde"   -> double 1e3 >>= withGrain 3 >>= withMultipliable
+        "totusinde"   -> double (2 * 1e3) >>= withGrain 3 >>= withMultipliable
         "million"   -> double 1e6 >>= withGrain 6 >>= withMultipliable
         "millioner" -> double 1e6 >>= withGrain 6 >>= withMultipliable
         _           -> Nothing
diff --git a/Duckling/Ordinal/DA/Corpus.hs b/Duckling/Ordinal/DA/Corpus.hs
index d63c6cd1..8368fb05 100644
--- a/Duckling/Ordinal/DA/Corpus.hs
+++ b/Duckling/Ordinal/DA/Corpus.hs
@@ -22,9 +22,24 @@ corpus :: Corpus
 corpus = (testContext {locale = makeLocale DA Nothing}, testOptions, allExamples)
 
 allExamples :: [Example]
-allExamples =
-  examples (OrdinalData 4)
-           [ "4."
-           , "fjerde"
-           , "Fjerde"
-           ]
+allExamples = concat
+  [ examples (OrdinalData 4)
+             [ "4."
+             , "fjerde"
+             , "Fjerde"
+             ]
+  , examples (OrdinalData 41)
+             [ "enogfyrrende"
+             ]
+  , examples (OrdinalData 78)
+             [ "otteoghalvfjerdsindstyvende"
+             ]
+  , examples (OrdinalData 263)
+             [ "to hundrede og treogtresindstyvende"
+             , "tohundrede og treogtresindstyvende"
+             ]
+  , examples (OrdinalData 70)
+             [ "halvfjerdsende"
+             , "halvfjerdsindstyvende"
+             ]
+  ]
diff --git a/Duckling/Ordinal/DA/Rules.hs b/Duckling/Ordinal/DA/Rules.hs
index 4fe9678f..e700a6e6 100644
--- a/Duckling/Ordinal/DA/Rules.hs
+++ b/Duckling/Ordinal/DA/Rules.hs
@@ -7,60 +7,143 @@
 
 {-# LANGUAGE GADTs #-}
 {-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE LambdaCase #-}
+{-# LANGUAGE NoRebindableSyntax #-}
 
 module Duckling.Ordinal.DA.Rules
   ( rules ) where
 
-import qualified Data.Text as Text
+
+import Data.HashMap.Strict (HashMap)
+import Data.Text (Text)
 import Prelude
-import Data.String
+import qualified Data.HashMap.Strict as HashMap
+import qualified Data.Text as Text
 
 import Duckling.Dimensions.Types
-import Duckling.Numeral.Helpers (parseInt)
+import Duckling.Numeral.Helpers (parseInt, numberWith)
+import Duckling.Numeral.Types (NumeralData (..), getIntValue)
 import Duckling.Ordinal.Helpers
+import Duckling.Ordinal.Types (OrdinalData (..))
 import Duckling.Regex.Types
 import Duckling.Types
+import qualified Duckling.Numeral.Types as TNumeral
+
+ordinalsMap :: HashMap Text Int
+ordinalsMap = HashMap.fromList
+  [ ( "første", 1 )
+  , ( "anden", 2 )
+  , ( "tredje", 3 )
+  , ( "fjerde", 4 )
+  , ( "femte", 5 )
+  , ( "sjette", 6 )
+  , ( "syvende", 7 )
+  , ( "ottende", 8 )
+  , ( "niende", 9 )
+  , ( "tiende", 10 )
+  , ( "elfte", 11 )
+  , ( "tolvte", 12 )
+  , ( "trettende", 13 )
+  , ( "fjortende", 14 )
+  , ( "femtende", 15 )
+  , ( "sekstende", 16 )
+  , ( "syttende", 17 )
+  , ( "attende", 18 )
+  , ( "nittende", 19 )
+  , ( "tyvende", 20 )
+  , ( "tenogtyvende", 21 )
+  , ( "toogtyvende", 22 )
+  , ( "treogtyvende", 23 )
+  , ( "fireogtyvende", 24 )
+  , ( "femogtyvende", 25 )
+  , ( "seksogtyvende", 26 )
+  , ( "syvogtyvende", 27 )
+  , ( "otteogtyvende", 28 )
+  , ( "niogtyvende", 29 )
+  , ( "tredivte", 30 )
+  , ( "enogtredivte", 31 )
+  ]
+
+cardinalsMap :: HashMap Text Int
+cardinalsMap = HashMap.fromList
+  [ ( "tyvende", 20 )
+  , ( "tredivte", 30 )
+  , ( "fyrrende", 40 )
+  , ( "fyrretyvende", 40 )
+  , ( "halvtredsende", 50 )
+  , ( "halvtredsindstyvende", 50 )
+  , ( "tressende", 60 )
+  , ( "tresindstyvende", 60 )
+  , ( "halvfjerdsende", 70 )
+  , ( "halvfjerdsindstyvende", 70 )
+  , ( "firsende", 80 )
+  , ( "firsindsstyvende", 80 )
+  , ( "halvfemsende", 90 )
+  , ( "halvfemsindstyvende", 90 )
+  ]
+
+oneValMap :: HashMap Text Int
+oneValMap = HashMap.fromList
+  [ ( "", 0 )
+  , ( "enog", 1 )
+  , ( "toog", 2 )
+  , ( "treog", 3 )
+  , ( "fireog", 4 )
+  , ( "femog", 5 )
+  , ( "seksog", 6 )
+  , ( "syvog", 7 )
+  , ( "otteog", 8 )
+  , ( "niog", 9 )
+  ]
 
 ruleOrdinalsFirstst :: Rule
 ruleOrdinalsFirstst = Rule
-  { name = "ordinals (first..31st)"
+  { name = "ordinals (first..19st)"
   , pattern =
-    [ regex "(første|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende|tyvende|tenogtyvende|toogtyvende|treogtyvende|fireogtyvende|femogtyvende|seksogtyvende|syvogtyvende|otteogtyvende|niogtyvende|tredivte|enogtredivte)"
+    [ regex "(første|anden|tredje|fjerde|femte|sjette|syvende|ottende|niende|tiende|elfte|tolvte|trettende|fjortende|femtende|sekstende|syttende|attende|nittende)"
     ]
   , prod = \tokens -> case tokens of
-      (Token RegexMatch (GroupMatch (match:_)):_) -> case Text.toLower match of
-        "første" -> Just $ ordinal 1
-        "anden" -> Just $ ordinal 2
-        "tredje" -> Just $ ordinal 3
-        "fjerde" -> Just $ ordinal 4
-        "femte" -> Just $ ordinal 5
-        "sjette" -> Just $ ordinal 6
-        "syvende" -> Just $ ordinal 7
-        "ottende" -> Just $ ordinal 8
-        "niende" -> Just $ ordinal 9
-        "tiende" -> Just $ ordinal 10
-        "elfte" -> Just $ ordinal 11
-        "tolvte" -> Just $ ordinal 12
-        "trettende" -> Just $ ordinal 13
-        "fjortende" -> Just $ ordinal 14
-        "femtende" -> Just $ ordinal 15
-        "sekstende" -> Just $ ordinal 16
-        "syttende" -> Just $ ordinal 17
-        "attende" -> Just $ ordinal 18
-        "nittende" -> Just $ ordinal 19
-        "tyvende" -> Just $ ordinal 20
-        "tenogtyvende" -> Just $ ordinal 21
-        "toogtyvende" -> Just $ ordinal 22
-        "treogtyvende" -> Just $ ordinal 23
-        "fireogtyvende" -> Just $ ordinal 24
-        "femogtyvende" -> Just $ ordinal 25
-        "seksogtyvende" -> Just $ ordinal 26
-        "syvogtyvende" -> Just $ ordinal 27
-        "otteogtyvende" -> Just $ ordinal 28
-        "niogtyvende" -> Just $ ordinal 29
-        "tredivte" -> Just $ ordinal 30
-        "enogtredivte" -> Just $ ordinal 31
-        _ -> Nothing
+      (Token RegexMatch (GroupMatch (match:_)):_) ->
+        ordinal <$> HashMap.lookup (Text.toLower match) ordinalsMap
+      _ -> Nothing
+  }
+
+ruleSpelledOutOrdinals :: Rule
+ruleSpelledOutOrdinals = Rule
+  { name = "ordinals, 20 to 99, spelled-out"
+  , pattern =
+    [ regex (concat ["((?:en|to|tre|fire|fem|seks|syv|otte|ni)og)?",
+                     "(tyvende",
+                     "|tredivte",
+                     "|fyrr(?:etyv)?ende",
+                     "|halvtreds(?:indstyv)?ende",
+                     "|tres(?:indstyv|s)?ende",
+                     "|halvfjerds(?:indstyv)?ende",
+                     "|firs(?:indstyv)?ende",
+                     "|halvfems(?:indstyv)?ende)"])
+    ]
+  , prod = \case
+    (Token RegexMatch (GroupMatch (ones:tens:_)):_) -> do
+      oneVal <- HashMap.lookup (Text.toLower ones) oneValMap
+      tenVal <- HashMap.lookup (Text.toLower tens) cardinalsMap
+      Just $ ordinal (oneVal + tenVal)
+    _ -> Nothing
+
+  }
+
+ruleSpelledOutBigOrdinals :: Rule
+ruleSpelledOutBigOrdinals = Rule
+  { name = "ordinals, above 99, spelled out"
+  , pattern =
+    [ numberWith TNumeral.value (> 99)
+    , regex "og"
+    , dimension Ordinal
+    ]
+  , prod = \case
+      Token Numeral NumeralData {TNumeral.value=maybenumnum}:_:Token Ordinal (OrdinalData ordnum):_ ->
+            case getIntValue maybenumnum of
+              Just numnum -> Just $ ordinal (numnum + ordnum)
+              Nothing -> Nothing
       _ -> Nothing
   }
 
@@ -81,4 +164,6 @@ rules :: [Rule]
 rules =
   [ ruleOrdinalDigits
   , ruleOrdinalsFirstst
+  , ruleSpelledOutOrdinals
+  , ruleSpelledOutBigOrdinals
   ]
diff --git a/Duckling/Ranking/Classifiers/DA_XX.hs b/Duckling/Ranking/Classifiers/DA_XX.hs
index 89c9547d..4ae6c932 100644
--- a/Duckling/Ranking/Classifiers/DA_XX.hs
+++ b/Duckling/Ranking/Classifiers/DA_XX.hs
@@ -2,7 +2,8 @@
 -- All rights reserved.
 --
 -- This source code is licensed under the BSD-style license found in the
--- LICENSE file in the root directory of this source tree.
+-- LICENSE file in the root directory of this source tree. An additional grant
+-- of patent rights can be found in the PATENTS file in the same directory.
 
 -----------------------------------------------------------------
 -- Auto-generated by regenClassifiers
@@ -276,7 +277,7 @@ classifiers
                                  HashMap.fromList
                                    [("ordinal (digits)quarter (grain)", -1.252762968495368),
                                     ("quarter", -0.8472978603872037),
-                                    ("ordinals (first..31st)quarter (grain)", -1.252762968495368)],
+                                    ("ordinals (first..19st)quarter (grain)", -1.252762968495368)],
                                n = 2},
                    koData =
                      ClassData{prior = -0.6931471805599453,
@@ -285,7 +286,7 @@ classifiers
                                  HashMap.fromList
                                    [("ordinal (digits)quarter (grain)", -1.252762968495368),
                                     ("quarter", -0.8472978603872037),
-                                    ("ordinals (first..31st)quarter (grain)", -1.252762968495368)],
+                                    ("ordinals (first..19st)quarter (grain)", -1.252762968495368)],
                                n = 2}}),
        ("intersect",
         Classifier{okData =
@@ -419,12 +420,12 @@ classifiers
                                likelihoods =
                                  HashMap.fromList
                                    [("daymonth", -1.7346010553881064),
-                                    ("ordinals (first..31st)week (grain)October",
+                                    ("ordinals (first..19st)week (grain)intersect",
                                      -1.7346010553881064),
-                                    ("ordinals (first..31st)week (grain)intersect",
+                                    ("ordinals (first..19st)week (grain)October",
                                      -1.7346010553881064),
                                     ("weekmonth", -1.2237754316221157),
-                                    ("ordinals (first..31st)day (grain)October",
+                                    ("ordinals (first..19st)day (grain)October",
                                      -1.7346010553881064)],
                                n = 6},
                    koData =
@@ -566,7 +567,7 @@ classifiers
                      ClassData{prior = 0.0, unseen = -2.0794415416798357,
                                likelihoods =
                                  HashMap.fromList
-                                   [("ordinals (first..31st)quarter (grain)year",
+                                   [("ordinals (first..19st)quarter (grain)year",
                                      -1.252762968495368),
                                     ("quarteryear", -0.8472978603872037),
                                     ("ordinal (digits)quarter (grain)year", -1.252762968495368)],
@@ -624,9 +625,9 @@ classifiers
                                likelihoods =
                                  HashMap.fromList
                                    [("daymonth", -0.8938178760220964),
-                                    ("ordinals (first..31st)TuesdayOctober", -1.9924301646902063),
-                                    ("ordinals (first..31st)Tuesdayintersect", -1.9924301646902063),
-                                    ("ordinals (first..31st)Wednesdayintersect",
+                                    ("ordinals (first..19st)Tuesdayintersect", -1.9924301646902063),
+                                    ("ordinals (first..19st)TuesdayOctober", -1.9924301646902063),
+                                    ("ordinals (first..19st)Wednesdayintersect",
                                      -1.4816045409242156)],
                                n = 8},
                    koData =
@@ -635,8 +636,8 @@ classifiers
                                likelihoods =
                                  HashMap.fromList
                                    [("daymonth", -0.9444616088408514),
-                                    ("ordinals (first..31st)WednesdayOctober", -1.2809338454620642),
-                                    ("ordinals (first..31st)TuesdaySeptember", -1.791759469228055)],
+                                    ("ordinals (first..19st)WednesdayOctober", -1.2809338454620642),
+                                    ("ordinals (first..19st)TuesdaySeptember", -1.791759469228055)],
                                n = 6}}),
        ("the <day-of-month> (non ordinal)",
         Classifier{okData =
@@ -646,15 +647,6 @@ classifiers
                    koData =
                      ClassData{prior = -infinity, unseen = -0.6931471805599453,
                                likelihoods = HashMap.fromList [], n = 0}}),
-       ("ordinals (first..31st)",
-        Classifier{okData =
-                     ClassData{prior = -5.406722127027582e-2,
-                               unseen = -2.995732273553991,
-                               likelihoods = HashMap.fromList [("", 0.0)], n = 18},
-                   koData =
-                     ClassData{prior = -2.9444389791664407,
-                               unseen = -1.0986122886681098,
-                               likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
        ("April",
         Classifier{okData =
                      ClassData{prior = 0.0, unseen = -1.6094379124341003,
@@ -707,7 +699,7 @@ classifiers
                                unseen = -3.258096538021482,
                                likelihoods =
                                  HashMap.fromList
-                                   [("ordinals (first..31st)March", -1.8325814637483102),
+                                   [("ordinals (first..19st)March", -1.8325814637483102),
                                     ("ordinal (digits)February", -1.8325814637483102),
                                     ("month", -0.8209805520698302),
                                     ("ordinal (digits)March", -1.6094379124341003)],
@@ -717,7 +709,7 @@ classifiers
                                unseen = -2.0794415416798357,
                                likelihoods =
                                  HashMap.fromList
-                                   [("ordinals (first..31st)April", -1.252762968495368),
+                                   [("ordinals (first..19st)April", -1.252762968495368),
                                     ("month", -1.252762968495368)],
                                n = 1}}),
        ("numbers prefix with -, negative or minus",
@@ -802,7 +794,7 @@ classifiers
                      ClassData{prior = 0.0, unseen = -2.3978952727983707,
                                likelihoods =
                                  HashMap.fromList
-                                   [("ordinals (first..31st)", -1.2039728043259361),
+                                   [("ordinals (first..19st)", -1.2039728043259361),
                                     ("ordinal (digits)", -0.35667494393873245)],
                                n = 8},
                    koData =
@@ -878,6 +870,15 @@ classifiers
                    koData =
                      ClassData{prior = -infinity, unseen = -0.6931471805599453,
                                likelihoods = HashMap.fromList [], n = 0}}),
+       ("ordinals (first..19st)",
+        Classifier{okData =
+                     ClassData{prior = -5.406722127027582e-2,
+                               unseen = -2.995732273553991,
+                               likelihoods = HashMap.fromList [("", 0.0)], n = 18},
+                   koData =
+                     ClassData{prior = -2.9444389791664407,
+                               unseen = -1.0986122886681098,
+                               likelihoods = HashMap.fromList [("", 0.0)], n = 1}}),
        ("about <time-of-day>",
         Classifier{okData =
                      ClassData{prior = -0.6931471805599453,
@@ -1544,8 +1545,8 @@ classifiers
                      ClassData{prior = 0.0, unseen = -2.3978952727983707,
                                likelihoods =
                                  HashMap.fromList
-                                   [("ordinals (first..31st)April", -1.6094379124341003),
-                                    ("ordinals (first..31st)March", -1.6094379124341003),
+                                   [("ordinals (first..19st)April", -1.6094379124341003),
+                                    ("ordinals (first..19st)March", -1.6094379124341003),
                                     ("month", -0.916290731874155),
                                     ("ordinal (digits)March", -1.6094379124341003)],
                                n = 3},
diff --git a/Duckling/Ranking/Classifiers/IT_XX.hs b/Duckling/Ranking/Classifiers/IT_XX.hs
index 139753f8..7aa8a9e5 100644
--- a/Duckling/Ranking/Classifiers/IT_XX.hs
+++ b/Duckling/Ranking/Classifiers/IT_XX.hs
@@ -319,10 +319,10 @@ classifiers
                                unseen = -4.31748811353631,
                                likelihoods =
                                  HashMap.fromList
-                                   [("<integer> (latent time-of-day)", -0.9718605830289658),
+                                   [("<integer> (latent time-of-day)", -0.9718605830289657),
                                     ("intersect by \"di\", \"della\", \"del\"", -3.20545280453606),
                                     ("day", -2.3581549441488563), ("Lunedi", -3.6109179126442243),
-                                    ("hour", -0.9718605830289658),
+                                    ("hour", -0.9718605830289657),
                                     ("two time tokens separated by `di`", -3.20545280453606),
                                     ("Domenica", -3.6109179126442243)],
                                n = 33}}),