Auto id integration for the module reorg

This commit is contained in:
Chris Allen 2018-02-08 17:29:52 -06:00
parent f7d31499d5
commit 4cc75a0d98
4 changed files with 84 additions and 14 deletions

View File

@ -106,5 +106,7 @@ test-suite bloodhound-tests
exceptions,
temporary,
unix-compat,
network-uri
network-uri,
microlens,
microlens-aeson
default-language: Haskell2010

View File

@ -858,7 +858,6 @@ deleteDocument (IndexName indexName)
-- >>> _ <- runBH' $ refreshIndex testIndex
bulk :: MonadBH m => V.Vector BulkOperation -> m Reply
bulk bulkOps = do
liftIO $ print body
bindM2 post url (return body)
where url = joinPath ["_bulk"]
body = Just $ encodeBulkOperations bulkOps
@ -888,6 +887,12 @@ mkBulkStreamValue operation indexName mappingName docId =
, "_type" .= mappingName
, "_id" .= docId]]
mkBulkStreamValueAuto :: Text -> Text -> Text -> Value
mkBulkStreamValueAuto operation indexName mappingName =
object [operation .=
object [ "_index" .= indexName
, "_type" .= mappingName]]
-- | 'encodeBulkOperation' is a convenience function for dumping a single 'BulkOperation'
-- into an 'L.ByteString'
--
@ -901,6 +906,18 @@ encodeBulkOperation (BulkIndex (IndexName indexName)
where metadata = mkBulkStreamValue "index" indexName mappingName docId
blob = encode metadata `mappend` "\n" `mappend` encode value
encodeBulkOperation (BulkIndexAuto (IndexName indexName)
(MappingName mappingName)
value) = blob
where metadata = mkBulkStreamValueAuto "index" indexName mappingName
blob = encode metadata `mappend` "\n" `mappend` encode value
encodeBulkOperation (BulkIndexEncodingAuto (IndexName indexName)
(MappingName mappingName)
encoding) = toLazyByteString blob
where metadata = toEncoding (mkBulkStreamValueAuto "index" indexName mappingName)
blob = fromEncoding metadata <> "\n" <> fromEncoding encoding
encodeBulkOperation (BulkCreate (IndexName indexName)
(MappingName mappingName)
(DocId docId) value) = blob

View File

@ -358,15 +358,32 @@ data AllocationPolicy = AllocAll
{-| 'BulkOperation' is a sum type for expressing the four kinds of bulk
operation index, create, delete, and update. 'BulkIndex' behaves like an
"upsert", 'BulkCreate' will fail if a document already exists at the DocId.
<http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html#docs-bulk>
Consult the <http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/docs-bulk.html#docs-bulk Bulk API documentation>
for further explanation.
Warning: Bulk operations suffixed with @Auto@ rely on ElasticSearch to
generate the id. Often, people use auto-generated identifiers when
ElasticSearch is the only place that their data is stored. Do not let
ElasticSearch be the only place your data is stored. It does not guarantee
durability, and it may silently discard data.
This <https://github.com/elastic/elasticsearch/issues/10708 issue> is
discussed further on github.
-}
data BulkOperation =
BulkIndex IndexName MappingName DocId Value
-- ^ Create the document, replacing it if it already exists.
| BulkIndexAuto IndexName MappingName Value
-- ^ Create a document with an autogenerated id.
| BulkIndexEncodingAuto IndexName MappingName Encoding
-- ^ Create a document with an autogenerated id. Use fast JSON encoding.
| BulkCreate IndexName MappingName DocId Value
-- ^ Create a document, failing if it already exists.
| BulkCreateEncoding IndexName MappingName DocId Encoding
-- ^ Create a document, failing if it already exists. Use fast JSON encoding.
| BulkDelete IndexName MappingName DocId
| BulkUpdate IndexName MappingName DocId Value deriving (Eq, Show)
-- ^ Delete the document
| BulkUpdate IndexName MappingName DocId Value
-- ^ Update the document, merging the new value with the existing one.
deriving (Eq, Show)
{-| 'EsResult' describes the standard wrapper JSON document that you see in
successful Elasticsearch lookups or lookups that couldn't find the document.

View File

@ -46,6 +46,9 @@ import Data.Typeable
import qualified Data.Vector as V
import qualified Data.Version as Vers
import Database.V5.Bloodhound
import Lens.Micro
import Lens.Micro.Aeson
import qualified Lens.Micro.Aeson as LMA
import Network.HTTP.Client hiding (Proxy)
import qualified Network.HTTP.Types.Method as NHTM
import qualified Network.HTTP.Types.Status as NHTS
@ -59,7 +62,7 @@ import Test.QuickCheck.Property.Monoid (T (..), eq, prop_Monoid)
import Test.Hspec.QuickCheck (prop)
import Test.QuickCheck
import Test.QuickCheck.TH.Generators
import Text.Pretty.Simple (pPrint)
-- import Text.Pretty.Simple (pPrint)
testServer :: Server
testServer = Server "http://localhost:9200"
@ -580,13 +583,13 @@ instance Arbitrary AliasRouting where
arbitrary = oneof [allAlias
,one
,theOther
,both]
,both']
where one = GranularAliasRouting
<$> (Just <$> arbitrary)
<*> pure Nothing
theOther = GranularAliasRouting Nothing
<$> (Just <$> arbitrary)
both = GranularAliasRouting
both' = GranularAliasRouting
<$> (Just <$> arbitrary)
<*> (Just <$> arbitrary)
allAlias = AllAliasRouting <$> arbitrary
@ -1123,20 +1126,28 @@ main = hspec $ do
let firstTest = BulkTest "blah"
let secondTest = BulkTest "bloo"
let thirdTest = BulkTest "graffle"
let fourthTest = BulkTest "garabadoo"
let fifthTest = BulkTest "serenity"
let firstDoc = BulkIndex testIndex
testMapping (DocId "2") (toJSON firstTest)
let secondDoc = BulkCreate testIndex
testMapping (DocId "3") (toJSON secondTest)
let thirdDoc = BulkCreateEncoding testIndex
testMapping (DocId "4") (toEncoding thirdTest)
let stream = V.fromList [firstDoc, secondDoc, thirdDoc]
bulkResp <- bulk stream
liftIO $ pPrint bulkResp
refreshResp <- refreshIndex testIndex
liftIO $ pPrint refreshResp
let fourthDoc = BulkIndexAuto testIndex
testMapping (toJSON fourthTest)
let fifthDoc = BulkIndexEncodingAuto testIndex
testMapping (toEncoding fifthTest)
let stream = V.fromList [firstDoc, secondDoc, thirdDoc, fourthDoc, fifthDoc]
_ <- bulk stream
-- liftIO $ pPrint bulkResp
_ <- refreshIndex testIndex
-- liftIO $ pPrint refreshResp
fDoc <- getDocument testIndex testMapping (DocId "2")
sDoc <- getDocument testIndex testMapping (DocId "3")
tDoc <- getDocument testIndex testMapping (DocId "4")
-- note that we cannot query for fourthDoc and fifthDoc since we
-- do not know their autogenerated ids.
let maybeFirst =
eitherDecode
$ responseBody fDoc
@ -1149,11 +1160,34 @@ main = hspec $ do
eitherDecode
$ responseBody tDoc
:: Either String (EsResult BulkTest)
liftIO $ pPrint [maybeFirst, maybeSecond, maybeThird]
-- liftIO $ pPrint [maybeFirst, maybeSecond, maybeThird]
liftIO $ do
fmap getSource maybeFirst `shouldBe` Right (Just firstTest)
fmap getSource maybeSecond `shouldBe` Right (Just secondTest)
fmap getSource maybeThird `shouldBe` Right (Just thirdTest)
-- Since we can't get the docs by doc id, we check for their existence in
-- a match all query.
let query = MatchAllQuery Nothing
let search = mkSearch (Just query) Nothing
resp <- searchByIndex testIndex search
parsed <- parseEsResponse resp :: BH IO (Either EsError (SearchResult Value))
case parsed of
Left e ->
liftIO $ expectationFailure ("Expected a script-transformed result but got: " <> show e)
(Right sr) -> do
liftIO $
hitsTotal (searchHits sr) `shouldBe` 6
let nameList :: [Text]
nameList =
(hits (searchHits sr))
^.. traverse
. to hitSource
. _Just
. LMA.key "name"
. _String
liftIO $
nameList
`shouldBe` ["blah","bloo","graffle","garabadoo","serenity"]
describe "query API" $ do