CS: Setup + basic Numeral

Summary:
* Setup for Czech
* Basic `Numeral` (0-10 integers + digits) from http://www.omniglot.com/language/numbers/czech.htm

Reviewed By: JonCoens

Differential Revision: D5044775

fbshipit-source-id: b5cd9d2
This commit is contained in:
Julien Odent 2017-05-11 09:30:45 -07:00 committed by Facebook Github Bot
parent 0cd42e9a32
commit 37829902b7
13 changed files with 246 additions and 8 deletions

View File

@ -21,6 +21,7 @@ import qualified Data.HashSet as HashSet
import Duckling.Dimensions.Types
import qualified Duckling.Dimensions.Common as CommonDimensions
import qualified Duckling.Dimensions.AR as ARDimensions
import qualified Duckling.Dimensions.CS as CSDimensions
import qualified Duckling.Dimensions.DA as DADimensions
import qualified Duckling.Dimensions.DE as DEDimensions
import qualified Duckling.Dimensions.EN as ENDimensions
@ -77,6 +78,7 @@ dependents (This Volume) = HashSet.singleton (This Numeral)
langDimensions :: Lang -> [Some Dimension]
langDimensions AR = ARDimensions.allDimensions
langDimensions CS = CSDimensions.allDimensions
langDimensions DA = DADimensions.allDimensions
langDimensions DE = DEDimensions.allDimensions
langDimensions EN = ENDimensions.allDimensions

18
Duckling/Dimensions/CS.hs Normal file
View File

@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Dimensions.CS
( allDimensions
) where
import Duckling.Dimensions.Types
allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]

View File

@ -23,6 +23,7 @@ import qualified TextShow as TS
data Lang
= AR
| CS
| DA
| DE
| EN

View File

@ -0,0 +1,47 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.CS.Corpus
( corpus ) where
import Data.String
import Prelude
import Duckling.Lang
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types
corpus :: Corpus
corpus = (testContext {lang = CS}, allExamples)
allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "0"
, "nula"
]
, examples (NumeralValue 1)
[ "1"
, "jeden"
, "jedna"
, "jedno"
]
, examples (NumeralValue 2)
[ "dva"
, "dvĕ"
]
, examples (NumeralValue 3)
[ "tři"
]
, examples (NumeralValue 4)
[ "čtyři"
]
]

View File

@ -0,0 +1,78 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Numeral.CS.Rules
( rules ) where
import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral
ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}
ruleNumeralMap :: HashMap Text Integer
ruleNumeralMap = HashMap.fromList
[ ( "nula", 0 )
, ( "jeden", 1 )
, ( "jedna", 1 )
, ( "jedno", 1 )
, ( "dva", 2 )
, ( "dv\x0115", 2 )
, ( "t\x0159i", 3 )
, ( "\x010dty\x0159i", 4 )
, ( "p\x0115t", 5)
, ( "\x0161est", 6)
, ( "sedm", 7)
, ( "osm", 8)
, ( "dev\x0115t", 9)
, ( "deset", 10)
]
ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number (0..10)"
, pattern =
[ regex "(nula|jed(en|n[ao])|dv(a|\x0115)|t(\x0159)i|(\x010d)ty(\x0159)i|p(\x0115)t|(\x0161)est|sedm|osm|dev(\x0115)t|deset)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) ruleNumeralMap >>= integer
_ -> Nothing
}
rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleNumeral
]

View File

@ -12,6 +12,7 @@ module Duckling.Ranking.Classifiers
import Duckling.Lang
import qualified Duckling.Ranking.Classifiers.AR as ARClassifiers
import qualified Duckling.Ranking.Classifiers.CS as CSClassifiers
import qualified Duckling.Ranking.Classifiers.DA as DAClassifiers
import qualified Duckling.Ranking.Classifiers.DE as DEClassifiers
import qualified Duckling.Ranking.Classifiers.EN as ENClassifiers
@ -41,6 +42,7 @@ import Duckling.Ranking.Types
classifiers :: Lang -> Classifiers
classifiers AR = ARClassifiers.classifiers
classifiers CS = CSClassifiers.classifiers
classifiers DA = DAClassifiers.classifiers
classifiers DE = DEClassifiers.classifiers
classifiers EN = ENClassifiers.classifiers

View File

@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.CS (classifiers) where
import Prelude
import Duckling.Ranking.Types
import qualified Data.HashMap.Strict as HashMap
import Data.String
classifiers :: Classifiers
classifiers = HashMap.fromList []

View File

@ -23,6 +23,7 @@ import Duckling.Dimensions.Types
import Duckling.Lang
import qualified Duckling.Rules.AR as ARRules
import qualified Duckling.Rules.Common as CommonRules
import qualified Duckling.Rules.CS as CSRules
import qualified Duckling.Rules.DA as DARules
import qualified Duckling.Rules.DE as DERules
import qualified Duckling.Rules.EN as ENRules
@ -68,6 +69,7 @@ rulesFor' lang dim = CommonRules.rules dim ++ langRules lang dim
langRules :: Lang -> Some Dimension -> [Rule]
langRules AR = ARRules.rules
langRules CS = CSRules.rules
langRules DA = DARules.rules
langRules DE = DERules.rules
langRules EN = ENRules.rules

34
Duckling/Rules/CS.hs Normal file
View File

@ -0,0 +1,34 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Rules.CS
( rules
) where
import Duckling.Dimensions.Types
import qualified Duckling.Numeral.CS.Rules as Numeral
import Duckling.Types
rules :: Some Dimension -> [Rule]
rules (This Distance) = []
rules (This Duration) = []
rules (This Numeral) = Numeral.rules
rules (This Email) = []
rules (This AmountOfMoney) = []
rules (This Ordinal) = []
rules (This PhoneNumber) = []
rules (This Quantity) = []
rules (This RegexMatch) = []
rules (This Temperature) = []
rules (This Time) = []
rules (This TimeGrain) = []
rules (This Url) = []
rules (This Volume) = []

View File

@ -40,6 +40,7 @@ library
, Duckling.Rules
, Duckling.Rules.Common
, Duckling.Rules.AR
, Duckling.Rules.CS
, Duckling.Rules.DA
, Duckling.Rules.DE
, Duckling.Rules.EN
@ -72,32 +73,33 @@ library
, Duckling.Ranking.Extraction
, Duckling.Ranking.Rank
, Duckling.Ranking.Classifiers
, Duckling.Ranking.Classifiers.AR
, Duckling.Ranking.Classifiers.CS
, Duckling.Ranking.Classifiers.DA
, Duckling.Ranking.Classifiers.DE
, Duckling.Ranking.Classifiers.EN
, Duckling.Ranking.Classifiers.ES
, Duckling.Ranking.Classifiers.ET
, Duckling.Ranking.Classifiers.FR
, Duckling.Ranking.Classifiers.GA
, Duckling.Ranking.Classifiers.HE
, Duckling.Ranking.Classifiers.HR
, Duckling.Ranking.Classifiers.ID
, Duckling.Ranking.Classifiers.IT
, Duckling.Ranking.Classifiers.JA
, Duckling.Ranking.Classifiers.KO
, Duckling.Ranking.Classifiers.MY
, Duckling.Ranking.Classifiers.NB
, Duckling.Ranking.Classifiers.NL
, Duckling.Ranking.Classifiers.PL
, Duckling.Ranking.Classifiers.PT
, Duckling.Ranking.Classifiers.RO
, Duckling.Ranking.Classifiers.SV
, Duckling.Ranking.Classifiers.ZH
, Duckling.Ranking.Classifiers.AR
, Duckling.Ranking.Classifiers.ET
, Duckling.Ranking.Classifiers.ID
, Duckling.Ranking.Classifiers.JA
, Duckling.Ranking.Classifiers.MY
, Duckling.Ranking.Classifiers.NL
, Duckling.Ranking.Classifiers.RU
, Duckling.Ranking.Classifiers.SV
, Duckling.Ranking.Classifiers.TR
, Duckling.Ranking.Classifiers.UK
, Duckling.Ranking.Classifiers.VI
, Duckling.Ranking.Classifiers.ZH
-- ------------------------------------------------------------------
-- Dimensions
@ -105,6 +107,7 @@ library
, Duckling.Dimensions.Common
, Duckling.Dimensions.Types
, Duckling.Dimensions.AR
, Duckling.Dimensions.CS
, Duckling.Dimensions.DA
, Duckling.Dimensions.DE
, Duckling.Dimensions.EN
@ -226,6 +229,8 @@ library
-- Numeral
, Duckling.Numeral.AR.Corpus
, Duckling.Numeral.AR.Rules
, Duckling.Numeral.CS.Corpus
, Duckling.Numeral.CS.Rules
, Duckling.Numeral.DA.Corpus
, Duckling.Numeral.DA.Rules
, Duckling.Numeral.DE.Corpus
@ -568,6 +573,7 @@ test-suite duckling-test
-- Numeral
, Duckling.Numeral.AR.Tests
, Duckling.Numeral.CS.Tests
, Duckling.Numeral.DA.Tests
, Duckling.Numeral.DE.Tests
, Duckling.Numeral.EN.Tests

View File

@ -69,6 +69,7 @@ regenClassifiers lang = do
-- | The training set (corpus)
trainSet = case lang of
AR -> (testContext, [])
CS -> (testContext, [])
DA -> DATime.corpus
DE -> DETime.corpus
EN -> ENTime.corpus

View File

@ -0,0 +1,23 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.
module Duckling.Numeral.CS.Tests
( tests ) where
import Prelude
import Data.String
import Test.Tasty
import Duckling.Dimensions.Types
import Duckling.Numeral.CS.Corpus
import Duckling.Testing.Asserts
tests :: TestTree
tests = testGroup "CS Tests"
[ makeCorpusTest [This Numeral] corpus
]

View File

@ -13,6 +13,7 @@ import Prelude
import Test.Tasty
import qualified Duckling.Numeral.AR.Tests as AR
import qualified Duckling.Numeral.CS.Tests as CS
import qualified Duckling.Numeral.DA.Tests as DA
import qualified Duckling.Numeral.DE.Tests as DE
import qualified Duckling.Numeral.EN.Tests as EN
@ -42,6 +43,7 @@ import qualified Duckling.Numeral.ZH.Tests as ZH
tests :: TestTree
tests = testGroup "Numeral Tests"
[ AR.tests
, CS.tests
, DA.tests
, DE.tests
, EN.tests