Add date_range aggregation

Note that this commit is based off of the low-level-scroll PR as a
starting point.
This commit is contained in:
Michael Xavier 2015-11-13 16:22:34 -08:00
parent 6f38ff01af
commit 4269d2787b
3 changed files with 148 additions and 4 deletions

View File

@ -71,7 +71,8 @@ test-suite tests
mtl,
quickcheck-properties,
derive,
quickcheck-instances
quickcheck-instances,
errors >= 2.0
default-language: Haskell2010
test-suite doctests

View File

@ -225,6 +225,12 @@ module Database.Bloodhound.Types
, ValueCountAggregation(..)
, FilterAggregation(..)
, DateHistogramAggregation(..)
, DateRangeAggregation(..)
, DateRangeAggRange(..)
, DateMathExpr(..)
, DateMathAnchor(..)
, DateMathModifier(..)
, DateMathUnit(..)
, Highlights(..)
, FieldHighlight(..)
@ -240,6 +246,7 @@ module Database.Bloodhound.Types
, TermsResult(..)
, DateHistogramResult(..)
, DateRangeResult(..)
) where
import Control.Applicative
@ -261,7 +268,9 @@ import qualified Data.Map.Strict as M
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Calendar
import Data.Time.Clock (UTCTime)
import Data.Time.Clock.POSIX
import qualified Data.Traversable as DT
import Data.Typeable (Typeable)
import qualified Data.Vector as V
@ -773,6 +782,9 @@ newtype PhraseSlop = PhraseSlop Int deriving (Eq, Show, Generic, ToJSO
newtype MinDocFrequency = MinDocFrequency Int deriving (Eq, Show, Generic, ToJSON, FromJSON, Typeable)
newtype MaxDocFrequency = MaxDocFrequency Int deriving (Eq, Show, Generic, ToJSON, FromJSON, Typeable)
-- | Newtype wrapper to parse ES's concerning tendency to in some APIs return a floating point number of milliseconds since epoch ಠ_ಠ
newtype POSIXMS = POSIXMS { posixMS :: UTCTime }
{-| 'unpackId' is a silly convenience function that gets used once.
-}
unpackId :: DocId -> Text
@ -1408,7 +1420,8 @@ data Interval = Year
data Aggregation = TermsAgg TermsAggregation
| DateHistogramAgg DateHistogramAggregation
| ValueCountAgg ValueCountAggregation
| FilterAgg FilterAggregation deriving (Eq, Show)
| FilterAgg FilterAggregation
| DateRangeAgg DateRangeAggregation deriving (Eq, Show)
data TermsAggregation = TermsAggregation { term :: Either Text Text
@ -1434,6 +1447,36 @@ data DateHistogramAggregation = DateHistogramAggregation { dateField :: Fie
, dateAggs :: Maybe Aggregations
} deriving (Eq, Show)
data DateRangeAggregation = DateRangeAggregation { draField :: FieldName
, draFormat :: Maybe Text
, draRanges :: NonEmpty DateRangeAggRange
} deriving (Eq, Show)
data DateRangeAggRange = DateRangeFrom DateMathExpr
| DateRangeTo DateMathExpr
| DateRangeFromAndTo DateMathExpr DateMathExpr deriving (Eq, Show)
-- | See <https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#date-math> for more information.
data DateMathExpr = DateMathExpr DateMathAnchor [DateMathModifier] deriving (Eq, Show)
-- | Starting point for a date range. This along with the 'DateMathModifiers' gets you the date ES will start from.
data DateMathAnchor = DMNow
| DMDate Day deriving (Eq, Show)
data DateMathModifier = AddTime Int DateMathUnit
| SubtractTime Int DateMathUnit
| RoundDownTo DateMathUnit deriving (Eq, Show)
data DateMathUnit = DMYear
| DMMonth
| DMWeek
| DMDay
| DMHour
| DMMinute
| DMSecond deriving (Eq, Show)
-- | See <https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-valuecount-aggregation.html> for more information.
data ValueCountAggregation = FieldValueCount FieldName
| ScriptValueCount Script deriving (Eq, Show)
@ -1537,6 +1580,39 @@ instance ToJSON Aggregation where
toJSON (FilterAgg (FilterAggregation filt ags)) =
omitNulls [ "filter" .= filt
, "aggs" .= ags]
toJSON (DateRangeAgg a) = object [ "date_range" .= a
]
instance ToJSON DateRangeAggregation where
toJSON DateRangeAggregation {..} =
omitNulls [ "field" .= draField
, "format" .= draFormat
, "ranges" .= toList draRanges
]
instance ToJSON DateRangeAggRange where
toJSON (DateRangeFrom e) = object [ "from" .= e ]
toJSON (DateRangeTo e) = object [ "to" .= e ]
toJSON (DateRangeFromAndTo f t) = object [ "from" .= f, "to" .= t ]
instance ToJSON DateMathExpr where
toJSON (DateMathExpr a mods) = String (fmtA a <> mconcat (fmtMod <$> mods))
where fmtA DMNow = "now"
fmtA (DMDate date) = case toGregorian date of
(y,m,d) -> showText y <> "-" <>
showText m <> "-" <>
showText d <> "||"
fmtMod (AddTime n u) = "+" <> showText n <> fmtU u
fmtMod (SubtractTime n u) = "-" <> showText n <> fmtU u
fmtMod (RoundDownTo u) = "/" <> fmtU u
fmtU DMYear = "y"
fmtU DMMonth = "M"
fmtU DMWeek = "w"
fmtU DMDay = "d"
fmtU DMHour = "h"
fmtU DMMinute = "m"
fmtU DMSecond = "s"
type AggregationResults = M.Map Text Value
@ -1557,6 +1633,14 @@ data DateHistogramResult = DateHistogramResult { dateKey :: Int
, dateDocCount :: Int
, dateHistogramAggs :: Maybe AggregationResults } deriving (Show)
data DateRangeResult = DateRangeResult { dateRangeKey :: Text
, dateRangeFrom :: Maybe UTCTime
, dateRangeFromAsString :: Maybe Text
, dateRangeTo :: Maybe UTCTime
, dateRangeToAsString :: Maybe Text
, dateRangeDocCount :: Int
, dateRangeAggs :: Maybe AggregationResults } deriving (Show, Eq)
toTerms :: Text -> AggregationResults -> Maybe (Bucket TermsResult)
toTerms t a = M.lookup t a >>= deserialize
where deserialize = parseMaybe parseJSON
@ -1575,6 +1659,11 @@ instance BucketAggregation DateHistogramResult where
docCount = dateDocCount
aggs = dateHistogramAggs
instance BucketAggregation DateRangeResult where
key = dateRangeKey
docCount = dateRangeDocCount
aggs = dateRangeAggs
instance (FromJSON a, BucketAggregation a) => FromJSON (Bucket a) where
parseJSON (Object v) = Bucket <$>
v .: "buckets"
@ -1595,6 +1684,22 @@ instance FromJSON DateHistogramResult where
v .:? "aggregations"
parseJSON _ = mempty
instance FromJSON DateRangeResult where
parseJSON = withObject "DateRangeResult" parse
where parse v = DateRangeResult <$>
v .: "key" <*>
(fmap posixMS <$> v .:? "from") <*>
v .:? "from_as_string" <*>
(fmap posixMS <$> v .:? "to") <*>
v .:? "to_as_string" <*>
v .: "doc_count" <*>
v .:? "aggregations"
instance FromJSON POSIXMS where
parseJSON = withScientific "POSIXMS" (return . parse)
where parse n = let n' = truncate n :: Integer
in POSIXMS (posixSecondsToUTCTime (fromInteger (n' `div` 1000)))
instance Monoid Filter where
mempty = IdentityFilter
mappend a b = AndFilter [a, b] defaultCache
@ -1890,7 +1995,7 @@ instance FromJSON Query where
<|> idsQuery `taggedWith` "ids"
<|> queryQueryStringQuery `taggedWith` "query_string"
<|> queryMatchQuery `taggedWith` "match"
<|> queryMultiMatchQuery --TODO: is this a precedence issue?
<|> queryMultiMatchQuery
<|> queryBoolQuery `taggedWith` "bool"
<|> queryBoostingQuery `taggedWith` "boosting"
<|> queryCommonTermsQuery `taggedWith` "common"

View File

@ -7,6 +7,7 @@
module Main where
import Control.Applicative
import Control.Error
import Control.Exception
import Control.Monad
import Control.Monad.Reader
@ -23,7 +24,7 @@ import Data.Monoid
import Data.Proxy
import Data.Text (Text)
import qualified Data.Text as T
import Data.Time.Calendar (Day (..))
import Data.Time.Calendar (Day (..), fromGregorian)
import Data.Time.Clock (UTCTime (..),
secondsToDiffTime)
import Data.Typeable
@ -917,6 +918,43 @@ main = hspec $ do
fmap aggregations res `shouldBe` Right (Just (M.fromList [ docCountPair "bitemyapps" 1
, docCountPair "notmyapps" 1
]))
it "can execute date_range aggregations" $ withTestEnv $ do
let now = fromGregorian 2015 3 14
let ltAMonthAgo = UTCTime (fromGregorian 2015 3 1) 0
let ltAWeekAgo = UTCTime (fromGregorian 2015 3 10) 0
let oldDoc = exampleTweet { postDate = ltAMonthAgo }
let newDoc = exampleTweet { postDate = ltAWeekAgo }
_ <- indexDocument testIndex testMapping defaultIndexDocumentSettings oldDoc (DocId "1")
_ <- indexDocument testIndex testMapping defaultIndexDocumentSettings newDoc (DocId "2")
_ <- refreshIndex testIndex
let thisMonth = DateRangeFrom (DateMathExpr (DMDate now) [SubtractTime 1 DMMonth])
let thisWeek = DateRangeFrom (DateMathExpr (DMDate now) [SubtractTime 1 DMWeek])
let agg = DateRangeAggregation (FieldName "postDate") Nothing (thisMonth :| [thisWeek])
let ags = mkAggregations "date_ranges" (DateRangeAgg agg)
let search = mkAggregateSearch Nothing ags
res <- searchTweets search
liftIO $ hitsTotal . searchHits <$> res `shouldBe` Right 2
let bucks = do magrs <- fmapL show (aggregations <$> res)
agrs <- note "no aggregations returned" magrs
rawBucks <- note "no date_ranges aggregation" $ M.lookup "date_ranges" agrs
parseEither parseJSON rawBucks
let fromMonthT = UTCTime (fromGregorian 2015 2 14) 0
let fromWeekT = UTCTime (fromGregorian 2015 3 7) 0
liftIO $ buckets <$> bucks `shouldBe` Right [ DateRangeResult "2015-02-14T00:00:00.000Z-*"
(Just fromMonthT)
(Just "2015-02-14T00:00:00.000Z")
Nothing
Nothing
2
Nothing
, DateRangeResult "2015-03-07T00:00:00.000Z-*"
(Just fromWeekT)
(Just "2015-03-07T00:00:00.000Z")
Nothing
Nothing
1
Nothing
]
it "returns date histogram aggregation results" $ withTestEnv $ do
_ <- insertData