Merge pull request #209 from andrewthad/analysis_setting

Support analyzer with custom tokenizer, only ngram for now. Also, add…
This commit is contained in:
Chris Allen 2017-12-05 12:52:15 -06:00 committed by GitHub
commit 9092939db2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 146 additions and 2 deletions

View File

@ -25,6 +25,7 @@ module Database.V5.Bloodhound.Client
withBH
-- ** Indices
, createIndex
, createIndexWith
, deleteIndex
, updateIndexSettings
, getIndexSettings
@ -512,6 +513,28 @@ createIndex indexSettings (IndexName indexName) =
where url = joinPath [indexName]
body = Just $ encode indexSettings
-- | Create an index, providing it with any number of settings. This
-- is more expressive than 'createIndex' but makes is more verbose
-- for the common case of configuring only the shard count and
-- replica count.
createIndexWith :: MonadBH m
=> [UpdatableIndexSetting]
-> Int -- ^ shard count
-> IndexName
-> m Reply
createIndexWith updates shards (IndexName indexName) =
bindM2 put url (return (Just body))
where url = joinPath [indexName]
body = encode $ object
["settings" .= deepMerge
( HM.singleton "index.number_of_shards" (toJSON shards) :
[u | Object u <- toJSON <$> updates]
)
]
oPath :: ToJSON a => NonEmpty Text -> a -> Value
oPath (k :| []) v = object [k .= v]
oPath (k:| (h:t)) v = object [k .= oPath (h :| t) v]
-- | 'deleteIndex' will delete an index given a 'Server', and an 'IndexName'.
--
@ -545,7 +568,6 @@ getIndexSettings (IndexName indexName) = do
parseEsResponse =<< get =<< url
where url = joinPath [indexName, "_settings"]
-- | 'forceMergeIndex'
--
-- The force merge API allows to force merging of one or more indices through

View File

@ -202,6 +202,7 @@ module Database.V5.Bloodhound.Types
, ZeroTermsQuery(..)
, CutoffFrequency(..)
, Analyzer(..)
, Tokenizer(..)
, MaxExpansions(..)
, Lenient(..)
, MatchQueryType(..)
@ -376,6 +377,12 @@ module Database.V5.Bloodhound.Types
, EsUsername(..)
, EsPassword(..)
, Analysis(..)
, AnalyzerDefinition(..)
, TokenizerDefinition(..)
, Ngram(..)
, TokenChar(..)
) where
import Control.Applicative as A
@ -583,8 +590,89 @@ data UpdatableIndexSetting = NumberOfReplicas ReplicaCount
| IndexCompoundOnFlush Bool
| WarmerEnabled Bool
| MappingTotalFieldsLimit Int
| AnalysisSetting Analysis
-- ^ Analysis is not a dynamic setting and can only be performed on a closed index.
deriving (Eq, Show, Generic, Typeable)
data Analysis = Analysis
{ analysisAnalyzer :: M.Map Text AnalyzerDefinition
, analysisTokenizer :: M.Map Text TokenizerDefinition
} deriving (Eq,Show,Generic,Typeable)
instance ToJSON Analysis where
toJSON (Analysis analyzer tokenizer) = object
[ "analyzer" .= analyzer
, "tokenizer" .= tokenizer
]
instance FromJSON Analysis where
parseJSON = withObject "Analysis" $ \m -> Analysis
<$> m .: "analyzer"
<*> m .: "tokenizer"
data AnalyzerDefinition = AnalyzerDefinition
{ analyzerDefinitionTokenizer :: Maybe Tokenizer
} deriving (Eq,Show,Generic,Typeable)
instance ToJSON AnalyzerDefinition where
toJSON (AnalyzerDefinition tokenizer) = object $ catMaybes
[ fmap ("tokenizer" .=) tokenizer
]
instance FromJSON AnalyzerDefinition where
parseJSON = withObject "AnalyzerDefinition" $ \m -> AnalyzerDefinition
<$> m .:? "tokenizer"
data TokenizerDefinition
= TokenizerDefinitionNgram Ngram
deriving (Eq,Show,Generic,Typeable)
instance ToJSON TokenizerDefinition where
toJSON x = case x of
TokenizerDefinitionNgram (Ngram minGram maxGram tokenChars) -> object
[ "type" .= ("ngram" :: Text)
, "min_gram" .= minGram
, "max_gram" .= maxGram
, "token_chars" .= tokenChars
]
instance FromJSON TokenizerDefinition where
parseJSON = withObject "TokenizerDefinition" $ \m -> do
typ <- m .: "type" :: Parser Text
case typ of
"ngram" -> fmap TokenizerDefinitionNgram $ Ngram
<$> (fmap unStringlyTypedInt (m .: "min_gram"))
<*> (fmap unStringlyTypedInt (m .: "max_gram"))
<*> m .: "token_chars"
_ -> fail "invalid TokenizerDefinition"
data Ngram = Ngram
{ ngramMinGram :: Int
, ngramMaxGram :: Int
, ngramTokenChars :: [TokenChar]
} deriving (Eq,Show,Generic,Typeable)
data TokenChar = TokenLetter | TokenDigit | TokenWhitespace | TokenPunctuation | TokenSymbol
deriving (Eq,Read,Show,Generic,Typeable)
instance ToJSON TokenChar where
toJSON t = String $ case t of
TokenLetter -> "letter"
TokenDigit -> "digit"
TokenWhitespace -> "whitespace"
TokenPunctuation -> "punctuation"
TokenSymbol -> "symbol"
instance FromJSON TokenChar where
parseJSON = withText "TokenChar" $ \t -> case t of
"letter" -> return TokenLetter
"digit" -> return TokenDigit
"whitespace" -> return TokenWhitespace
"punctuation" -> return TokenPunctuation
"symbol" -> return TokenSymbol
_ -> fail "invalid TokenChar"
data AllocationPolicy = AllocAll
-- ^ Allows shard allocation for all shards.
| AllocPrimaries
@ -1020,6 +1108,8 @@ newtype CutoffFrequency =
CutoffFrequency Double deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)
newtype Analyzer =
Analyzer Text deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)
newtype Tokenizer =
Tokenizer Text deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)
newtype MaxExpansions =
MaxExpansions Int deriving (Eq, Read, Show, Generic, ToJSON, FromJSON, Typeable)
@ -2990,6 +3080,7 @@ instance ToJSON UpdatableIndexSetting where
toJSON (BlocksWrite x) = oPath ("blocks" :| ["write"]) x
toJSON (BlocksMetaData x) = oPath ("blocks" :| ["metadata"]) x
toJSON (MappingTotalFieldsLimit x) = oPath ("index" :| ["mapping","total_fields","limit"]) x
toJSON (AnalysisSetting x) = oPath ("index" :| ["analysis"]) x
instance FromJSON UpdatableIndexSetting where
parseJSON = withObject "UpdatableIndexSetting" parse
@ -3022,6 +3113,7 @@ instance FromJSON UpdatableIndexSetting where
<|> blocksWrite `taggedAt` ["blocks", "write"]
<|> blocksMetaData `taggedAt` ["blocks", "metadata"]
<|> mappingTotalFieldsLimit `taggedAt` ["index", "mapping", "total_fields", "limit"]
<|> analysisSetting `taggedAt` ["index", "analysis"]
where taggedAt f ks = taggedAt' f (Object o) ks
taggedAt' f v [] = f =<< (parseJSON v <|> (parseJSON (unStringlyTypeJSON v)))
taggedAt' f v (k:ks) = withObject "Object" (\o -> do v' <- o .: k
@ -3055,6 +3147,7 @@ instance FromJSON UpdatableIndexSetting where
blocksWrite = pure . BlocksWrite
blocksMetaData = pure . BlocksMetaData
mappingTotalFieldsLimit = pure . MappingTotalFieldsLimit
analysisSetting = pure . AnalysisSetting
instance FromJSON IndexSettingsSummary where
parseJSON = withObject "IndexSettingsSummary" parse
@ -4643,10 +4736,13 @@ instance FromJSON NodeDataPathStats where
newtype StringlyTypedDouble = StringlyTypedDouble { unStringlyTypedDouble :: Double }
instance FromJSON StringlyTypedDouble where
parseJSON = fmap StringlyTypedDouble . parseJSON . unStringlyTypeJSON
newtype StringlyTypedInt = StringlyTypedInt { unStringlyTypedInt :: Int }
instance FromJSON StringlyTypedInt where
parseJSON = fmap StringlyTypedInt . parseJSON . unStringlyTypeJSON
instance FromJSON NodeFSTotalStats where
parseJSON = withObject "NodeFSTotalStats" parse

View File

@ -907,6 +907,12 @@ instance Arbitrary RegexpFlag where arbitrary = sopArbitrary; shrink = genericSh
instance Arbitrary BoolMatch where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Term where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary IndexSettings where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary TokenChar where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Ngram where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary TokenizerDefinition where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary AnalyzerDefinition where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Analysis where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Tokenizer where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary UpdatableIndexSetting where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary Bytes where arbitrary = sopArbitrary; shrink = genericShrink
instance Arbitrary AllocationPolicy where arbitrary = sopArbitrary; shrink = genericShrink
@ -1622,6 +1628,26 @@ main = hspec $ do
(IndexSettings (ShardCount 1) (ReplicaCount 0))
(NE.toList updates))
it "accepts customer analyzers" $ when' (atleast es50) $ withTestEnv $ do
_ <- deleteExampleIndex
let analysis = Analysis
(M.singleton "ex_analyzer" (AnalyzerDefinition (Just (Tokenizer "ex_tokenizer"))))
(M.singleton "ex_tokenizer"
( TokenizerDefinitionNgram
( Ngram 3 4 [TokenLetter,TokenDigit])
)
)
updates = [AnalysisSetting analysis]
createResp <- createIndexWith (updates ++ [NumberOfReplicas (ReplicaCount 0)]) 1 testIndex
liftIO $ validateStatus createResp 200
getResp <- getIndexSettings testIndex
liftIO $
getResp `shouldBe` Right (IndexSettingsSummary
testIndex
(IndexSettings (ShardCount 1) (ReplicaCount 0))
updates
)
describe "Index Optimization" $ do
it "returns a successful response upon completion" $ withTestEnv $ do
_ <- createExampleIndex