From 4cc75a0d983fc247835da6ea86c30ece0a77dcf9 Mon Sep 17 00:00:00 2001 From: Chris Allen Date: Thu, 8 Feb 2018 17:29:52 -0600 Subject: [PATCH] Auto id integration for the module reorg --- bloodhound.cabal | 4 +- src/Database/V5/Bloodhound/Client.hs | 19 ++++++- .../V5/Bloodhound/Types/Internal/Client.hs | 23 ++++++-- tests/V5/tests.hs | 52 +++++++++++++++---- 4 files changed, 84 insertions(+), 14 deletions(-) diff --git a/bloodhound.cabal b/bloodhound.cabal index cd066fd..17b3b87 100644 --- a/bloodhound.cabal +++ b/bloodhound.cabal @@ -106,5 +106,7 @@ test-suite bloodhound-tests exceptions, temporary, unix-compat, - network-uri + network-uri, + microlens, + microlens-aeson default-language: Haskell2010 diff --git a/src/Database/V5/Bloodhound/Client.hs b/src/Database/V5/Bloodhound/Client.hs index fbf2e3f..b93c5a2 100644 --- a/src/Database/V5/Bloodhound/Client.hs +++ b/src/Database/V5/Bloodhound/Client.hs @@ -858,7 +858,6 @@ deleteDocument (IndexName indexName) -- >>> _ <- runBH' $ refreshIndex testIndex bulk :: MonadBH m => V.Vector BulkOperation -> m Reply bulk bulkOps = do - liftIO $ print body bindM2 post url (return body) where url = joinPath ["_bulk"] body = Just $ encodeBulkOperations bulkOps @@ -888,6 +887,12 @@ mkBulkStreamValue operation indexName mappingName docId = , "_type" .= mappingName , "_id" .= docId]] +mkBulkStreamValueAuto :: Text -> Text -> Text -> Value +mkBulkStreamValueAuto operation indexName mappingName = + object [operation .= + object [ "_index" .= indexName + , "_type" .= mappingName]] + -- | 'encodeBulkOperation' is a convenience function for dumping a single 'BulkOperation' -- into an 'L.ByteString' -- @@ -901,6 +906,18 @@ encodeBulkOperation (BulkIndex (IndexName indexName) where metadata = mkBulkStreamValue "index" indexName mappingName docId blob = encode metadata `mappend` "\n" `mappend` encode value +encodeBulkOperation (BulkIndexAuto (IndexName indexName) + (MappingName mappingName) + value) = blob + where metadata = mkBulkStreamValueAuto "index" indexName mappingName + blob = encode metadata `mappend` "\n" `mappend` encode value + +encodeBulkOperation (BulkIndexEncodingAuto (IndexName indexName) + (MappingName mappingName) + encoding) = toLazyByteString blob + where metadata = toEncoding (mkBulkStreamValueAuto "index" indexName mappingName) + blob = fromEncoding metadata <> "\n" <> fromEncoding encoding + encodeBulkOperation (BulkCreate (IndexName indexName) (MappingName mappingName) (DocId docId) value) = blob diff --git a/src/Database/V5/Bloodhound/Types/Internal/Client.hs b/src/Database/V5/Bloodhound/Types/Internal/Client.hs index 17d9136..c92a8a6 100644 --- a/src/Database/V5/Bloodhound/Types/Internal/Client.hs +++ b/src/Database/V5/Bloodhound/Types/Internal/Client.hs @@ -358,15 +358,32 @@ data AllocationPolicy = AllocAll {-| 'BulkOperation' is a sum type for expressing the four kinds of bulk operation index, create, delete, and update. 'BulkIndex' behaves like an "upsert", 'BulkCreate' will fail if a document already exists at the DocId. - - + Consult the + for further explanation. + Warning: Bulk operations suffixed with @Auto@ rely on ElasticSearch to + generate the id. Often, people use auto-generated identifiers when + ElasticSearch is the only place that their data is stored. Do not let + ElasticSearch be the only place your data is stored. It does not guarantee + durability, and it may silently discard data. + This is + discussed further on github. -} data BulkOperation = BulkIndex IndexName MappingName DocId Value + -- ^ Create the document, replacing it if it already exists. + | BulkIndexAuto IndexName MappingName Value + -- ^ Create a document with an autogenerated id. + | BulkIndexEncodingAuto IndexName MappingName Encoding + -- ^ Create a document with an autogenerated id. Use fast JSON encoding. | BulkCreate IndexName MappingName DocId Value + -- ^ Create a document, failing if it already exists. | BulkCreateEncoding IndexName MappingName DocId Encoding + -- ^ Create a document, failing if it already exists. Use fast JSON encoding. | BulkDelete IndexName MappingName DocId - | BulkUpdate IndexName MappingName DocId Value deriving (Eq, Show) + -- ^ Delete the document + | BulkUpdate IndexName MappingName DocId Value + -- ^ Update the document, merging the new value with the existing one. + deriving (Eq, Show) {-| 'EsResult' describes the standard wrapper JSON document that you see in successful Elasticsearch lookups or lookups that couldn't find the document. diff --git a/tests/V5/tests.hs b/tests/V5/tests.hs index 9cd1146..c29c4b9 100644 --- a/tests/V5/tests.hs +++ b/tests/V5/tests.hs @@ -46,6 +46,9 @@ import Data.Typeable import qualified Data.Vector as V import qualified Data.Version as Vers import Database.V5.Bloodhound +import Lens.Micro +import Lens.Micro.Aeson +import qualified Lens.Micro.Aeson as LMA import Network.HTTP.Client hiding (Proxy) import qualified Network.HTTP.Types.Method as NHTM import qualified Network.HTTP.Types.Status as NHTS @@ -59,7 +62,7 @@ import Test.QuickCheck.Property.Monoid (T (..), eq, prop_Monoid) import Test.Hspec.QuickCheck (prop) import Test.QuickCheck import Test.QuickCheck.TH.Generators -import Text.Pretty.Simple (pPrint) +-- import Text.Pretty.Simple (pPrint) testServer :: Server testServer = Server "http://localhost:9200" @@ -580,13 +583,13 @@ instance Arbitrary AliasRouting where arbitrary = oneof [allAlias ,one ,theOther - ,both] + ,both'] where one = GranularAliasRouting <$> (Just <$> arbitrary) <*> pure Nothing theOther = GranularAliasRouting Nothing <$> (Just <$> arbitrary) - both = GranularAliasRouting + both' = GranularAliasRouting <$> (Just <$> arbitrary) <*> (Just <$> arbitrary) allAlias = AllAliasRouting <$> arbitrary @@ -1123,20 +1126,28 @@ main = hspec $ do let firstTest = BulkTest "blah" let secondTest = BulkTest "bloo" let thirdTest = BulkTest "graffle" + let fourthTest = BulkTest "garabadoo" + let fifthTest = BulkTest "serenity" let firstDoc = BulkIndex testIndex testMapping (DocId "2") (toJSON firstTest) let secondDoc = BulkCreate testIndex testMapping (DocId "3") (toJSON secondTest) let thirdDoc = BulkCreateEncoding testIndex testMapping (DocId "4") (toEncoding thirdTest) - let stream = V.fromList [firstDoc, secondDoc, thirdDoc] - bulkResp <- bulk stream - liftIO $ pPrint bulkResp - refreshResp <- refreshIndex testIndex - liftIO $ pPrint refreshResp + let fourthDoc = BulkIndexAuto testIndex + testMapping (toJSON fourthTest) + let fifthDoc = BulkIndexEncodingAuto testIndex + testMapping (toEncoding fifthTest) + let stream = V.fromList [firstDoc, secondDoc, thirdDoc, fourthDoc, fifthDoc] + _ <- bulk stream + -- liftIO $ pPrint bulkResp + _ <- refreshIndex testIndex + -- liftIO $ pPrint refreshResp fDoc <- getDocument testIndex testMapping (DocId "2") sDoc <- getDocument testIndex testMapping (DocId "3") tDoc <- getDocument testIndex testMapping (DocId "4") + -- note that we cannot query for fourthDoc and fifthDoc since we + -- do not know their autogenerated ids. let maybeFirst = eitherDecode $ responseBody fDoc @@ -1149,11 +1160,34 @@ main = hspec $ do eitherDecode $ responseBody tDoc :: Either String (EsResult BulkTest) - liftIO $ pPrint [maybeFirst, maybeSecond, maybeThird] + -- liftIO $ pPrint [maybeFirst, maybeSecond, maybeThird] liftIO $ do fmap getSource maybeFirst `shouldBe` Right (Just firstTest) fmap getSource maybeSecond `shouldBe` Right (Just secondTest) fmap getSource maybeThird `shouldBe` Right (Just thirdTest) + -- Since we can't get the docs by doc id, we check for their existence in + -- a match all query. + let query = MatchAllQuery Nothing + let search = mkSearch (Just query) Nothing + resp <- searchByIndex testIndex search + parsed <- parseEsResponse resp :: BH IO (Either EsError (SearchResult Value)) + case parsed of + Left e -> + liftIO $ expectationFailure ("Expected a script-transformed result but got: " <> show e) + (Right sr) -> do + liftIO $ + hitsTotal (searchHits sr) `shouldBe` 6 + let nameList :: [Text] + nameList = + (hits (searchHits sr)) + ^.. traverse + . to hitSource + . _Just + . LMA.key "name" + . _String + liftIO $ + nameList + `shouldBe` ["blah","bloo","graffle","garabadoo","serenity"] describe "query API" $ do