mirror of
https://github.com/typeable/bloodhound.git
synced 2024-12-02 14:34:23 +03:00
Add date_range aggregation
Note that this commit is based off of the low-level-scroll PR as a starting point.
This commit is contained in:
parent
6f38ff01af
commit
4269d2787b
@ -71,7 +71,8 @@ test-suite tests
|
||||
mtl,
|
||||
quickcheck-properties,
|
||||
derive,
|
||||
quickcheck-instances
|
||||
quickcheck-instances,
|
||||
errors >= 2.0
|
||||
default-language: Haskell2010
|
||||
|
||||
test-suite doctests
|
||||
|
@ -225,6 +225,12 @@ module Database.Bloodhound.Types
|
||||
, ValueCountAggregation(..)
|
||||
, FilterAggregation(..)
|
||||
, DateHistogramAggregation(..)
|
||||
, DateRangeAggregation(..)
|
||||
, DateRangeAggRange(..)
|
||||
, DateMathExpr(..)
|
||||
, DateMathAnchor(..)
|
||||
, DateMathModifier(..)
|
||||
, DateMathUnit(..)
|
||||
|
||||
, Highlights(..)
|
||||
, FieldHighlight(..)
|
||||
@ -240,6 +246,7 @@ module Database.Bloodhound.Types
|
||||
|
||||
, TermsResult(..)
|
||||
, DateHistogramResult(..)
|
||||
, DateRangeResult(..)
|
||||
) where
|
||||
|
||||
import Control.Applicative
|
||||
@ -261,7 +268,9 @@ import qualified Data.Map.Strict as M
|
||||
import Data.Maybe
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Time.Calendar
|
||||
import Data.Time.Clock (UTCTime)
|
||||
import Data.Time.Clock.POSIX
|
||||
import qualified Data.Traversable as DT
|
||||
import Data.Typeable (Typeable)
|
||||
import qualified Data.Vector as V
|
||||
@ -773,6 +782,9 @@ newtype PhraseSlop = PhraseSlop Int deriving (Eq, Show, Generic, ToJSO
|
||||
newtype MinDocFrequency = MinDocFrequency Int deriving (Eq, Show, Generic, ToJSON, FromJSON, Typeable)
|
||||
newtype MaxDocFrequency = MaxDocFrequency Int deriving (Eq, Show, Generic, ToJSON, FromJSON, Typeable)
|
||||
|
||||
-- | Newtype wrapper to parse ES's concerning tendency to in some APIs return a floating point number of milliseconds since epoch ಠ_ಠ
|
||||
newtype POSIXMS = POSIXMS { posixMS :: UTCTime }
|
||||
|
||||
{-| 'unpackId' is a silly convenience function that gets used once.
|
||||
-}
|
||||
unpackId :: DocId -> Text
|
||||
@ -1408,7 +1420,8 @@ data Interval = Year
|
||||
data Aggregation = TermsAgg TermsAggregation
|
||||
| DateHistogramAgg DateHistogramAggregation
|
||||
| ValueCountAgg ValueCountAggregation
|
||||
| FilterAgg FilterAggregation deriving (Eq, Show)
|
||||
| FilterAgg FilterAggregation
|
||||
| DateRangeAgg DateRangeAggregation deriving (Eq, Show)
|
||||
|
||||
|
||||
data TermsAggregation = TermsAggregation { term :: Either Text Text
|
||||
@ -1434,6 +1447,36 @@ data DateHistogramAggregation = DateHistogramAggregation { dateField :: Fie
|
||||
, dateAggs :: Maybe Aggregations
|
||||
} deriving (Eq, Show)
|
||||
|
||||
|
||||
data DateRangeAggregation = DateRangeAggregation { draField :: FieldName
|
||||
, draFormat :: Maybe Text
|
||||
, draRanges :: NonEmpty DateRangeAggRange
|
||||
} deriving (Eq, Show)
|
||||
|
||||
data DateRangeAggRange = DateRangeFrom DateMathExpr
|
||||
| DateRangeTo DateMathExpr
|
||||
| DateRangeFromAndTo DateMathExpr DateMathExpr deriving (Eq, Show)
|
||||
|
||||
-- | See <https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math> for more information.
|
||||
data DateMathExpr = DateMathExpr DateMathAnchor [DateMathModifier] deriving (Eq, Show)
|
||||
|
||||
|
||||
-- | Starting point for a date range. This along with the 'DateMathModifiers' gets you the date ES will start from.
|
||||
data DateMathAnchor = DMNow
|
||||
| DMDate Day deriving (Eq, Show)
|
||||
|
||||
data DateMathModifier = AddTime Int DateMathUnit
|
||||
| SubtractTime Int DateMathUnit
|
||||
| RoundDownTo DateMathUnit deriving (Eq, Show)
|
||||
|
||||
data DateMathUnit = DMYear
|
||||
| DMMonth
|
||||
| DMWeek
|
||||
| DMDay
|
||||
| DMHour
|
||||
| DMMinute
|
||||
| DMSecond deriving (Eq, Show)
|
||||
|
||||
-- | See <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-valuecount-aggregation.html> for more information.
|
||||
data ValueCountAggregation = FieldValueCount FieldName
|
||||
| ScriptValueCount Script deriving (Eq, Show)
|
||||
@ -1537,6 +1580,39 @@ instance ToJSON Aggregation where
|
||||
toJSON (FilterAgg (FilterAggregation filt ags)) =
|
||||
omitNulls [ "filter" .= filt
|
||||
, "aggs" .= ags]
|
||||
toJSON (DateRangeAgg a) = object [ "date_range" .= a
|
||||
]
|
||||
|
||||
instance ToJSON DateRangeAggregation where
|
||||
toJSON DateRangeAggregation {..} =
|
||||
omitNulls [ "field" .= draField
|
||||
, "format" .= draFormat
|
||||
, "ranges" .= toList draRanges
|
||||
]
|
||||
|
||||
instance ToJSON DateRangeAggRange where
|
||||
toJSON (DateRangeFrom e) = object [ "from" .= e ]
|
||||
toJSON (DateRangeTo e) = object [ "to" .= e ]
|
||||
toJSON (DateRangeFromAndTo f t) = object [ "from" .= f, "to" .= t ]
|
||||
|
||||
instance ToJSON DateMathExpr where
|
||||
toJSON (DateMathExpr a mods) = String (fmtA a <> mconcat (fmtMod <$> mods))
|
||||
where fmtA DMNow = "now"
|
||||
fmtA (DMDate date) = case toGregorian date of
|
||||
(y,m,d) -> showText y <> "-" <>
|
||||
showText m <> "-" <>
|
||||
showText d <> "||"
|
||||
fmtMod (AddTime n u) = "+" <> showText n <> fmtU u
|
||||
fmtMod (SubtractTime n u) = "-" <> showText n <> fmtU u
|
||||
fmtMod (RoundDownTo u) = "/" <> fmtU u
|
||||
fmtU DMYear = "y"
|
||||
fmtU DMMonth = "M"
|
||||
fmtU DMWeek = "w"
|
||||
fmtU DMDay = "d"
|
||||
fmtU DMHour = "h"
|
||||
fmtU DMMinute = "m"
|
||||
fmtU DMSecond = "s"
|
||||
|
||||
|
||||
type AggregationResults = M.Map Text Value
|
||||
|
||||
@ -1557,6 +1633,14 @@ data DateHistogramResult = DateHistogramResult { dateKey :: Int
|
||||
, dateDocCount :: Int
|
||||
, dateHistogramAggs :: Maybe AggregationResults } deriving (Show)
|
||||
|
||||
data DateRangeResult = DateRangeResult { dateRangeKey :: Text
|
||||
, dateRangeFrom :: Maybe UTCTime
|
||||
, dateRangeFromAsString :: Maybe Text
|
||||
, dateRangeTo :: Maybe UTCTime
|
||||
, dateRangeToAsString :: Maybe Text
|
||||
, dateRangeDocCount :: Int
|
||||
, dateRangeAggs :: Maybe AggregationResults } deriving (Show, Eq)
|
||||
|
||||
toTerms :: Text -> AggregationResults -> Maybe (Bucket TermsResult)
|
||||
toTerms t a = M.lookup t a >>= deserialize
|
||||
where deserialize = parseMaybe parseJSON
|
||||
@ -1575,6 +1659,11 @@ instance BucketAggregation DateHistogramResult where
|
||||
docCount = dateDocCount
|
||||
aggs = dateHistogramAggs
|
||||
|
||||
instance BucketAggregation DateRangeResult where
|
||||
key = dateRangeKey
|
||||
docCount = dateRangeDocCount
|
||||
aggs = dateRangeAggs
|
||||
|
||||
instance (FromJSON a, BucketAggregation a) => FromJSON (Bucket a) where
|
||||
parseJSON (Object v) = Bucket <$>
|
||||
v .: "buckets"
|
||||
@ -1595,6 +1684,22 @@ instance FromJSON DateHistogramResult where
|
||||
v .:? "aggregations"
|
||||
parseJSON _ = mempty
|
||||
|
||||
instance FromJSON DateRangeResult where
|
||||
parseJSON = withObject "DateRangeResult" parse
|
||||
where parse v = DateRangeResult <$>
|
||||
v .: "key" <*>
|
||||
(fmap posixMS <$> v .:? "from") <*>
|
||||
v .:? "from_as_string" <*>
|
||||
(fmap posixMS <$> v .:? "to") <*>
|
||||
v .:? "to_as_string" <*>
|
||||
v .: "doc_count" <*>
|
||||
v .:? "aggregations"
|
||||
|
||||
instance FromJSON POSIXMS where
|
||||
parseJSON = withScientific "POSIXMS" (return . parse)
|
||||
where parse n = let n' = truncate n :: Integer
|
||||
in POSIXMS (posixSecondsToUTCTime (fromInteger (n' `div` 1000)))
|
||||
|
||||
instance Monoid Filter where
|
||||
mempty = IdentityFilter
|
||||
mappend a b = AndFilter [a, b] defaultCache
|
||||
@ -1890,7 +1995,7 @@ instance FromJSON Query where
|
||||
<|> idsQuery `taggedWith` "ids"
|
||||
<|> queryQueryStringQuery `taggedWith` "query_string"
|
||||
<|> queryMatchQuery `taggedWith` "match"
|
||||
<|> queryMultiMatchQuery --TODO: is this a precedence issue?
|
||||
<|> queryMultiMatchQuery
|
||||
<|> queryBoolQuery `taggedWith` "bool"
|
||||
<|> queryBoostingQuery `taggedWith` "boosting"
|
||||
<|> queryCommonTermsQuery `taggedWith` "common"
|
||||
|
@ -7,6 +7,7 @@
|
||||
module Main where
|
||||
|
||||
import Control.Applicative
|
||||
import Control.Error
|
||||
import Control.Exception
|
||||
import Control.Monad
|
||||
import Control.Monad.Reader
|
||||
@ -23,7 +24,7 @@ import Data.Monoid
|
||||
import Data.Proxy
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Time.Calendar (Day (..))
|
||||
import Data.Time.Calendar (Day (..), fromGregorian)
|
||||
import Data.Time.Clock (UTCTime (..),
|
||||
secondsToDiffTime)
|
||||
import Data.Typeable
|
||||
@ -917,6 +918,43 @@ main = hspec $ do
|
||||
fmap aggregations res `shouldBe` Right (Just (M.fromList [ docCountPair "bitemyapps" 1
|
||||
, docCountPair "notmyapps" 1
|
||||
]))
|
||||
it "can execute date_range aggregations" $ withTestEnv $ do
|
||||
let now = fromGregorian 2015 3 14
|
||||
let ltAMonthAgo = UTCTime (fromGregorian 2015 3 1) 0
|
||||
let ltAWeekAgo = UTCTime (fromGregorian 2015 3 10) 0
|
||||
let oldDoc = exampleTweet { postDate = ltAMonthAgo }
|
||||
let newDoc = exampleTweet { postDate = ltAWeekAgo }
|
||||
_ <- indexDocument testIndex testMapping defaultIndexDocumentSettings oldDoc (DocId "1")
|
||||
_ <- indexDocument testIndex testMapping defaultIndexDocumentSettings newDoc (DocId "2")
|
||||
_ <- refreshIndex testIndex
|
||||
let thisMonth = DateRangeFrom (DateMathExpr (DMDate now) [SubtractTime 1 DMMonth])
|
||||
let thisWeek = DateRangeFrom (DateMathExpr (DMDate now) [SubtractTime 1 DMWeek])
|
||||
let agg = DateRangeAggregation (FieldName "postDate") Nothing (thisMonth :| [thisWeek])
|
||||
let ags = mkAggregations "date_ranges" (DateRangeAgg agg)
|
||||
let search = mkAggregateSearch Nothing ags
|
||||
res <- searchTweets search
|
||||
liftIO $ hitsTotal . searchHits <$> res `shouldBe` Right 2
|
||||
let bucks = do magrs <- fmapL show (aggregations <$> res)
|
||||
agrs <- note "no aggregations returned" magrs
|
||||
rawBucks <- note "no date_ranges aggregation" $ M.lookup "date_ranges" agrs
|
||||
parseEither parseJSON rawBucks
|
||||
let fromMonthT = UTCTime (fromGregorian 2015 2 14) 0
|
||||
let fromWeekT = UTCTime (fromGregorian 2015 3 7) 0
|
||||
liftIO $ buckets <$> bucks `shouldBe` Right [ DateRangeResult "2015-02-14T00:00:00.000Z-*"
|
||||
(Just fromMonthT)
|
||||
(Just "2015-02-14T00:00:00.000Z")
|
||||
Nothing
|
||||
Nothing
|
||||
2
|
||||
Nothing
|
||||
, DateRangeResult "2015-03-07T00:00:00.000Z-*"
|
||||
(Just fromWeekT)
|
||||
(Just "2015-03-07T00:00:00.000Z")
|
||||
Nothing
|
||||
Nothing
|
||||
1
|
||||
Nothing
|
||||
]
|
||||
|
||||
it "returns date histogram aggregation results" $ withTestEnv $ do
|
||||
_ <- insertData
|
||||
|
Loading…
Reference in New Issue
Block a user