diff --git a/Duckling/Dimensions/TA.hs b/Duckling/Dimensions/TA.hs index 3895032d..1b90051d 100644 --- a/Duckling/Dimensions/TA.hs +++ b/Duckling/Dimensions/TA.hs @@ -15,4 +15,5 @@ import Duckling.Dimensions.Types allDimensions :: [Some Dimension] allDimensions = [ This Numeral + , This Ordinal ] diff --git a/Duckling/Ordinal/TA/Corpus.hs b/Duckling/Ordinal/TA/Corpus.hs new file mode 100644 index 00000000..fcae9288 --- /dev/null +++ b/Duckling/Ordinal/TA/Corpus.hs @@ -0,0 +1,120 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Ordinal.TA.Corpus + ( corpus + ) where + +import Data.String +import Prelude + +import Duckling.Locale +import Duckling.Ordinal.Types +import Duckling.Resolve +import Duckling.Testing.Types + +corpus :: Corpus +corpus = (testContext {locale = makeLocale TA Nothing}, testOptions, allExamples) + +allExamples :: [Example] +allExamples = concat + [ examples (OrdinalData 1) + [ "முதல்" + , "1." + ] + , examples (OrdinalData 2) + [ "இரண்டாம்" + , "2." + ] + , examples (OrdinalData 3) + [ "மூன்றாம்" + , "3." + ] + , examples (OrdinalData 4) + [ "நான்காம்" + , "4." + ] + , examples (OrdinalData 5) + [ "ஐந்தாம்" + , "5." + ] + , examples (OrdinalData 6) + [ "ஆறாம்" + , "6." + ] + , examples (OrdinalData 7) + [ "ஏழாம்" + , "7." + ] + , examples (OrdinalData 8) + [ "எட்டாம்" + , "8." + ] + , examples (OrdinalData 9) + [ "ஒன்பதாம்" + , "9." + ] + , examples (OrdinalData 10) + [ "பத்தாம்" + , "10." + ] + , examples (OrdinalData 11) + [ "பதினொன்றாம்" + , "11." + ] + , examples (OrdinalData 12) + [ "பன்னிரண்டாம்" + , "12." + ] + , examples (OrdinalData 20) + [ "இருபதாம்" + , "20." + ] + , examples (OrdinalData 21) + [ "இருபத்திஒன்றாம்" + , "21." + ] + , examples (OrdinalData 22) + [ "இருபத்திஇரண்டாம்" + , "22." + ] + , examples (OrdinalData 26) + [ "இருபத்திஆறாம்" + , "26." + ] + , examples (OrdinalData 30) + [ "முப்பதாம்" + , "30." + ] + , examples (OrdinalData 33) + [ "முப்பத்துமூன்றாம்" + , "33." + ] + , examples (OrdinalData 50) + [ "ஐம்பதாம்" + , "50." + ] + , examples (OrdinalData 54) + [ "ஐம்பத்திநான்காம்" + , "54." + ] + , examples (OrdinalData 65) + [ "அறுபத்ஐந்தாம்" + , "65." + ] + , examples (OrdinalData 76) + [ "எழுபத்திஆறாம்" + , "76." + ] + , examples (OrdinalData 87) + [ "எண்பத்திஏழாம்" + , "87." + ] + ] diff --git a/Duckling/Ordinal/TA/Rules.hs b/Duckling/Ordinal/TA/Rules.hs new file mode 100644 index 00000000..9e4d21b9 --- /dev/null +++ b/Duckling/Ordinal/TA/Rules.hs @@ -0,0 +1,162 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} + +module Duckling.Ordinal.TA.Rules + ( rules ) where + +import Control.Monad (join) +import Data.HashMap.Strict ( HashMap) +import Data.String +import Data.Text (Text) +import Prelude +import qualified Data.HashMap.Strict as HashMap +import qualified Data.Text as Text + +import Duckling.Dimensions.Types +import Duckling.Numeral.Helpers (parseInt) +import Duckling.Ordinal.Helpers +import Duckling.Regex.Types +import Duckling.Types + +oneToNineMap :: HashMap Text Int +oneToNineMap = HashMap.fromList + [ ( "முதல்", 1 ) + , ( "இரண்டாம்", 2 ) + , ( "மூன்றாம்", 3 ) + , ( "நான்காம்", 4 ) + , ( "ஐந்தாம்", 5 ) + , ( "ஆறாம்", 6 ) + , ( "ஏழாம்", 7 ) + , ( "எட்டாம்", 8 ) + , ( "ஒன்பதாம்", 9 ) + ] + + +ruleOneToNine :: Rule +ruleOneToNine = Rule + { name = "integer (1..9)" + , pattern = + [ regex "(முதல்|இரண்டாம்|மூன்றாம்|நான்காம்|ஐந்தாம்|ஆறாம்|ஏழாம்|எட்டாம்|ஒன்பதாம்)" + ] + , prod = \case + (Token RegexMatch (GroupMatch (match:_)):_) -> + ordinal <$> HashMap.lookup (Text.toLower match) oneToNineMap + _ -> Nothing + } + +tenToNineteenMap :: HashMap Text Int +tenToNineteenMap = HashMap.fromList + [ ( "பத்தாம்", 10 ) + , ( "பதினொன்றாம்", 11 ) + , ( "பன்னிரண்டாம்", 12 ) + , ( "பதின்மூன்றாம்", 13 ) + , ( "பதினான்காம்", 14 ) + , ( "பதினைந்தாம்", 15 ) + , ( "பதினாறாம்", 16 ) + , ( "பதினேழாம்", 17 ) + , ( "பதினெட்டாம்", 18 ) + , ( "பத்தொன்பதாம்", 19 ) + ] + +ruleTenToNineteen :: Rule +ruleTenToNineteen = Rule + { name = "integer (10..19)" + , pattern = + [ regex "(பத்தாம்|பதினொன்றாம்|பன்னிரண்டாம்|பதின்மூன்றாம்|பதினான்காம்|பதினைந்தாம்|பதினாறாம்|பதினேழாம்|பதினெட்டாம்|பத்தொன்பதாம்)" + ] + , prod = \case + (Token RegexMatch (GroupMatch (match:_)):_) -> + ordinal <$> HashMap.lookup (Text.toLower match) tenToNineteenMap + _ -> Nothing + } + +tensMap :: HashMap Text Int +tensMap = HashMap.fromList + [ ( "இருபதாம்", 20 ) + , ( "முப்பதாம்", 30 ) + , ( "நாற்பதாம்", 40 ) + , ( "ஐம்பதாம்", 50 ) + , ( "அறுபதாம்", 60 ) + , ( "எழுபதாம்", 70 ) + , ( "எண்பதாம்", 80 ) + , ( "தொண்ணூறாம்", 90 ) + ] + +ruleTens :: Rule +ruleTens = Rule + { name = "integer (20..90)" + , pattern = + [ regex "(இருபதாம்|முப்பதாம்|நாற்பதாம்|ஐம்பதாம்|அறுபதாம்|எழுபதாம்|எண்பதாம்|தொண்ணூறாம்)" + ] + , prod = \case + (Token RegexMatch (GroupMatch (match:_)):_) -> + ordinal <$> HashMap.lookup (Text.toLower match) tensMap + _ -> Nothing + } +tensOrdinalMap :: HashMap Text Int +tensOrdinalMap = HashMap.fromList + [ ( "இருபத்தி", 20 ) + , ( "முப்பத்து", 30 ) + , ( "நாற்பத்து", 40 ) + , ( "ஐம்பத்தி", 50 ) + , ( "அறுபத்", 60 ) + , ( "எழுபத்தி", 70 ) + , ( "எண்பத்தி", 80 ) + , ( "தொண்ணுற்று", 90 ) + ] + +oneToNineOrdinalMap :: HashMap Text Int +oneToNineOrdinalMap = HashMap.fromList + [ ( "ஒன்றாம்", 1 ) + , ( "இரண்டாம்", 2 ) + , ( "மூன்றாம்", 3 ) + , ( "நான்காம்", 4 ) + , ( "ஐந்தாம்", 5 ) + , ( "ஆறாம்", 6 ) + , ( "ஏழாம்", 7 ) + , ( "எட்டாம்", 8 ) + , ( "ஒன்பதாம்", 9 ) + ] + +ruleCompositeTens :: Rule +ruleCompositeTens = Rule + { name = "integer ([2-9][1-9])" + , pattern = + [ regex "(இருபத்தி|முப்பத்து|நாற்பத்து|ஐம்பத்தி|அறுபத்|எழுபத்தி|எண்பத்தி|தொண்ணுற்று)(ஒன்றாம்|இரண்டாம்|மூன்றாம்|நான்காம்|ஐந்தாம்|ஆறாம்|ஏழாம்|எட்டாம்|ஒன்பதாம்)" + ] + , prod = \case + (Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do + v1 <- HashMap.lookup (Text.toLower m1) tensOrdinalMap + v2 <- HashMap.lookup (Text.toLower m2) oneToNineOrdinalMap + Just $ ordinal $ (v1 + v2) + _ -> Nothing + } + +ruleOrdinalDigits :: Rule +ruleOrdinalDigits = Rule + { name = "ordinal (digits)" + , pattern = + [ regex "0*(\\d+)\\." + ] + , prod = \case + ( Token RegexMatch (GroupMatch (match :_)) : _) -> ordinal <$> parseInt match + _ -> Nothing + } + +rules :: [Rule] +rules = + [ ruleOrdinalDigits + , ruleOneToNine + , ruleTenToNineteen + , ruleTens + , ruleCompositeTens + ] diff --git a/Duckling/Rules/TA.hs b/Duckling/Rules/TA.hs index 80f2120d..9e435b13 100644 --- a/Duckling/Rules/TA.hs +++ b/Duckling/Rules/TA.hs @@ -18,6 +18,7 @@ import Duckling.Dimensions.Types import Duckling.Locale import Duckling.Types import qualified Duckling.Numeral.TA.Rules as Numeral +import qualified Duckling.Ordinal.TA.Rules as Ordinal defaultRules :: Some Dimension -> [Rule] defaultRules = langRules @@ -32,7 +33,7 @@ langRules (This Distance) = [] langRules (This Duration) = [] langRules (This Numeral) = Numeral.rules langRules (This Email) = [] -langRules (This Ordinal) = [] +langRules (This Ordinal) = Ordinal.rules langRules (This PhoneNumber) = [] langRules (This Quantity) = [] langRules (This RegexMatch) = [] diff --git a/duckling.cabal b/duckling.cabal index 6122d6e7..72d5f108 100644 --- a/duckling.cabal +++ b/duckling.cabal @@ -454,6 +454,8 @@ library , Duckling.Ordinal.RU.Rules , Duckling.Ordinal.SV.Corpus , Duckling.Ordinal.SV.Rules + , Duckling.Ordinal.TA.Corpus + , Duckling.Ordinal.TA.Rules , Duckling.Ordinal.TR.Corpus , Duckling.Ordinal.TR.Rules , Duckling.Ordinal.UK.Corpus @@ -854,6 +856,7 @@ test-suite duckling-test , Duckling.Ordinal.RO.Tests , Duckling.Ordinal.RU.Tests , Duckling.Ordinal.SV.Tests + , Duckling.Ordinal.TA.Tests , Duckling.Ordinal.TR.Tests , Duckling.Ordinal.UK.Tests , Duckling.Ordinal.VI.Tests diff --git a/tests/Duckling/Ordinal/TA/Tests.hs b/tests/Duckling/Ordinal/TA/Tests.hs new file mode 100644 index 00000000..22fa3825 --- /dev/null +++ b/tests/Duckling/Ordinal/TA/Tests.hs @@ -0,0 +1,23 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + +module Duckling.Ordinal.TA.Tests + ( tests + ) where + +import Data.String +import Prelude +import Test.Tasty + +import Duckling.Dimensions.Types +import Duckling.Ordinal.TA.Corpus +import Duckling.Testing.Asserts + +tests :: TestTree +tests = testGroup "TA Tests" + [ makeCorpusTest [This Ordinal] corpus + ] diff --git a/tests/Duckling/Ordinal/Tests.hs b/tests/Duckling/Ordinal/Tests.hs index cef7c44d..425009bd 100644 --- a/tests/Duckling/Ordinal/Tests.hs +++ b/tests/Duckling/Ordinal/Tests.hs @@ -37,6 +37,7 @@ import qualified Duckling.Ordinal.PT.Tests as PT import qualified Duckling.Ordinal.RO.Tests as RO import qualified Duckling.Ordinal.RU.Tests as RU import qualified Duckling.Ordinal.SV.Tests as SV +import qualified Duckling.Ordinal.TA.Tests as TA import qualified Duckling.Ordinal.TR.Tests as TR import qualified Duckling.Ordinal.UK.Tests as UK import qualified Duckling.Ordinal.VI.Tests as VI @@ -69,6 +70,7 @@ tests = testGroup "Ordinal Tests" , RO.tests , RU.tests , SV.tests + , TA.tests , TR.tests , UK.tests , VI.tests