mirror of
https://github.com/simonmichael/hledger.git
synced 2024-09-19 10:17:35 +03:00
queries: Add generalised boolean queries
This commit changes some of the functions in the Query module and changes the overall way to parse queries. Instead of using the words'' split function, this commit starts to fully parse the query, as it's seen as a type of expression. AND, OR, NOT, and space operators can be used. The space operator simulates the behaviour from before, leaving a minimal amount of tests that need to be adjusted to comply to the new behaviour.
This commit is contained in:
parent
790b6ca9da
commit
35db1cae4f
@ -8,6 +8,7 @@ transactions..) by various criteria, and a SimpleTextParser for query expressio
|
||||
{-# LANGUAGE FlexibleContexts #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE ViewPatterns #-}
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
|
||||
module Hledger.Query (
|
||||
-- * Query and QueryOpt
|
||||
@ -19,7 +20,7 @@ module Hledger.Query (
|
||||
generatedTransactionTag,
|
||||
-- * parsing
|
||||
parseQuery,
|
||||
parseQueryList,
|
||||
parseQueries,
|
||||
parseQueryTerm,
|
||||
parseAccountType,
|
||||
-- * modifying
|
||||
@ -82,9 +83,10 @@ import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Time.Calendar (Day, fromGregorian )
|
||||
import Safe (readDef, readMay, maximumByMay, maximumMay, minimumMay)
|
||||
import Text.Megaparsec (between, noneOf, sepBy)
|
||||
import Text.Megaparsec (between, noneOf, sepBy, try, (<?>), notFollowedBy)
|
||||
import Text.Megaparsec.Char (char, string)
|
||||
|
||||
|
||||
import Hledger.Utils hiding (words')
|
||||
import Hledger.Data.Types
|
||||
import Hledger.Data.AccountName
|
||||
@ -154,23 +156,13 @@ data QueryOpt = QueryOptInAcctOnly AccountName -- ^ show an account register fo
|
||||
-- showAccountMatcher _ = Nothing
|
||||
|
||||
|
||||
-- | A version of parseQueryList which acts on a single Text of
|
||||
-- space-separated terms.
|
||||
-- | Parses a query from the string containing a query expression.
|
||||
-- Parts of the query expression are either (sub-)queries or query options.
|
||||
--
|
||||
-- The usual shell quoting rules are assumed. When a pattern contains
|
||||
-- whitespace, it (or the whole term including prefix) should be enclosed
|
||||
-- in single or double quotes.
|
||||
--
|
||||
-- >>> parseQuery nulldate "expenses:dining out"
|
||||
-- Right (Or [Acct (RegexpCI "expenses:dining"),Acct (RegexpCI "out")],[])
|
||||
--
|
||||
-- >>> parseQuery nulldate "\"expenses:dining out\""
|
||||
-- Right (Acct (RegexpCI "expenses:dining out"),[])
|
||||
parseQuery :: Day -> T.Text -> Either String (Query,[QueryOpt])
|
||||
parseQuery d = parseQueryList d . words'' queryprefixes
|
||||
|
||||
-- | Convert a list of query expression containing to a query and zero
|
||||
-- or more query options; or return an error message if query parsing fails.
|
||||
-- in ESCAPED single or double quotes or the whole term should be between
|
||||
-- parentheses to denotate a subquery.
|
||||
--
|
||||
-- A query term is either:
|
||||
--
|
||||
@ -191,30 +183,121 @@ parseQuery d = parseQueryList d . words'' queryprefixes
|
||||
-- Period expressions may contain relative dates, so a reference date is
|
||||
-- required to fully parse these.
|
||||
--
|
||||
-- >>> parseQuery nulldate "expenses:dining out"
|
||||
-- Right (Or [Acct (RegexpCI "expenses:dining"),Acct (RegexpCI "out")],[])
|
||||
--
|
||||
-- >>> parseQuery nulldate "\"expenses:dining out\""
|
||||
-- Right (Acct (RegexpCI "expenses:dining out"),[])
|
||||
parseQuery :: Day -> T.Text -> Either String (Query,[QueryOpt])
|
||||
parseQuery = parseBooleanQuery
|
||||
|
||||
-- | Variant of parseQuery that recombines a list of queries before parsing.
|
||||
--
|
||||
-- This function succeeds the parseQueryList function. The list of expressions
|
||||
-- is simply concatenated before passing to parseQuery, as the list might contain
|
||||
-- keywords such as AND that cannot be separately interpreted as a query.
|
||||
parseQueries :: Day -> [T.Text] -> Either String (Query,[QueryOpt])
|
||||
parseQueries d ts = parseQuery d $ T.intercalate " " ts
|
||||
|
||||
-- | Parses a boolean query expression.
|
||||
--
|
||||
-- Boolean queries combine smaller queries into larger ones. The boolean operators
|
||||
-- made available through this function are "NOT e", "e AND e", "e OR e", and "e e".
|
||||
-- Query options defined in multiple sub-queries are simply combined by concatenating
|
||||
-- all options into one list.
|
||||
--
|
||||
-- Boolean operators in queries take precedence over one another. For instance, the
|
||||
-- prefix-operator "NOT e" is always parsed before "e AND e", "e AND e" before "e OR e",
|
||||
-- and "e OR e" before "e e".
|
||||
--
|
||||
-- The space-separation operator is left as it was the default before the introduction of
|
||||
-- boolean operators. It takes the behaviour defined in the interpretQueryList function,
|
||||
-- whereas the NOT, OR, and AND operators simply wrap a list of queries with the associated
|
||||
--
|
||||
--
|
||||
-- The result of this function is either an error encountered during parsing of the
|
||||
-- expression or the combined query and query options.
|
||||
--
|
||||
-- >>> parseBooleanQuery nulldate "expenses:dining AND out"
|
||||
-- Right (And [Acct (RegexpCI "expenses:dining"),Acct (RegexpCI "out")],[])
|
||||
--
|
||||
-- >>> parseBooleanQuery nulldate "expenses:dining AND desc:a OR desc:b"
|
||||
-- Right (Or [And [Acct (RegexpCI "expenses:dining"),Desc (RegexpCI "a")],Desc (RegexpCI "b")],[])
|
||||
parseBooleanQuery :: Day -> T.Text -> Either String (Query,[QueryOpt])
|
||||
parseBooleanQuery d t = either (Left . ("failed to parse query:" <>) . customErrorBundlePretty) Right $ parsewith allQueriesP t
|
||||
where
|
||||
allQueriesP :: SimpleTextParser (Query, [QueryOpt])
|
||||
allQueriesP = either (Any,) id <$> spacedQueriesP
|
||||
|
||||
regexP :: SimpleTextParser T.Text
|
||||
regexP = choice'
|
||||
[ stripquotes . T.pack <$> between (char '\'') (char '\'') (many $ noneOf ("'" :: [Char])),
|
||||
stripquotes . T.pack <$> between (char '"') (char '"') (many $ noneOf ("\"" :: [Char])),
|
||||
T.pack <$> (notFollowedBy keywordSpaceP >> (many $ noneOf (") \n\r" :: [Char]))) ]
|
||||
queryPrefixP :: SimpleTextParser T.Text
|
||||
queryPrefixP = (string "not:" <> (fromMaybe "" <$> optional queryPrefixP))
|
||||
<|> choice' (string <$> queryprefixes)
|
||||
<?> "query prefix"
|
||||
queryTermP :: SimpleTextParser (Either [QueryOpt] (Query, [QueryOpt]))
|
||||
queryTermP = do
|
||||
prefix <- optional queryPrefixP
|
||||
queryRegex <- regexP
|
||||
|
||||
case parseQueryTerm d (fromMaybe "" prefix <> queryRegex) of
|
||||
Right q -> case q of
|
||||
Right opt -> return $ Left [opt]
|
||||
Left query -> return $ Right (query, [])
|
||||
Left err -> error' err -- PARTIAL:
|
||||
|
||||
keywordSpaceP :: SimpleTextParser T.Text
|
||||
keywordSpaceP = choice' ["NOT ", "AND ", "OR "]
|
||||
|
||||
parQueryP,notQueryP :: SimpleTextParser (Either [QueryOpt] (Query, [QueryOpt]))
|
||||
parQueryP = between (char '(' >> skipNonNewlineSpaces)
|
||||
(try $ skipNonNewlineSpaces >> char ')')
|
||||
spacedQueriesP
|
||||
<|> queryTermP
|
||||
notQueryP = (maybe id (\_ (Right (q, qopts)) -> Right (Not q, qopts)) <$> optional (string "NOT" >> skipNonNewlineSpaces1)) <*> parQueryP
|
||||
|
||||
andQueriesP,orQueriesP,spacedQueriesP :: SimpleTextParser (Either [QueryOpt] (Query, [QueryOpt]))
|
||||
andQueriesP = nArityOp And <$> notQueryP `sepBy` (try $ skipNonNewlineSpaces >> string "AND" >> skipNonNewlineSpaces1)
|
||||
orQueriesP = nArityOp Or <$> andQueriesP `sepBy` (try $ skipNonNewlineSpaces >> string "OR" >> skipNonNewlineSpaces1)
|
||||
spacedQueriesP = nArityOp interpretQueryList <$> orQueriesP `sepBy` skipNonNewlineSpaces1
|
||||
|
||||
nArityOp :: ([Query] -> Query) -> [Either [QueryOpt] (Query, [QueryOpt])] -> Either [QueryOpt] (Query, [QueryOpt])
|
||||
nArityOp f res = let (qoptss, results) = partitionEithers res
|
||||
(qs, qoptss') = unzip results
|
||||
qoptss'' = concat qoptss <> concat qoptss'
|
||||
in case qs of
|
||||
[] -> Left qoptss''
|
||||
(q:[]) -> Right (simplifyQuery q, qoptss'')
|
||||
_ -> Right (simplifyQuery $ f qs, qoptss'')
|
||||
|
||||
-- | Convert a list of space-separated queries to a single query
|
||||
--
|
||||
-- Multiple terms are combined as follows:
|
||||
-- 1. multiple account patterns are OR'd together
|
||||
-- 2. multiple description patterns are OR'd together
|
||||
-- 3. multiple status patterns are OR'd together
|
||||
-- 4. then all terms are AND'd together
|
||||
parseQueryList :: Day -> [T.Text] -> Either String (Query, [QueryOpt])
|
||||
parseQueryList d termstrs = do
|
||||
eterms <- mapM (parseQueryTerm d) termstrs
|
||||
let (pats, opts) = partitionEithers eterms
|
||||
(descpats, pats') = partition queryIsDesc pats
|
||||
(acctpats, pats'') = partition queryIsAcct pats'
|
||||
(statuspats, otherpats) = partition queryIsStatus pats''
|
||||
q = simplifyQuery $ And $ [Or acctpats, Or descpats, Or statuspats] ++ otherpats
|
||||
Right (q, opts)
|
||||
|
||||
interpretQueryList :: [Query] -> Query
|
||||
interpretQueryList pats = q
|
||||
where
|
||||
(descpats, pats') = partition queryIsDesc pats
|
||||
(acctpats, pats'') = partition queryIsAcct pats'
|
||||
(statuspats, otherpats) = partition queryIsStatus pats''
|
||||
q = simplifyQuery $ And $ [Or acctpats, Or descpats, Or statuspats] ++ otherpats
|
||||
|
||||
-- XXX
|
||||
-- | Quote-and-prefix-aware version of words - don't split on spaces which
|
||||
-- are inside quotes, including quotes which may have one of the specified
|
||||
-- prefixes in front, and maybe an additional not: prefix in front of that.
|
||||
words'' :: [T.Text] -> T.Text -> [T.Text]
|
||||
words'' prefixes = fromparse . parsewith maybeprefixedquotedphrases -- XXX
|
||||
where
|
||||
maybeprefixedquotedphrases :: SimpleTextParser [T.Text]
|
||||
maybeprefixedquotedphrases = choice' [prefixedQuotedPattern, singleQuotedPattern, doubleQuotedPattern, patterns] `sepBy` skipNonNewlineSpaces1
|
||||
words'' prefixes = fromparse . parsewith maybePrefixedQuotedPhrases -- XXX
|
||||
where
|
||||
maybePrefixedQuotedPhrases :: SimpleTextParser [T.Text]
|
||||
maybePrefixedQuotedPhrases = choice' [prefixedQuotedPattern, singleQuotedPattern, doubleQuotedPattern, patterns] `sepBy`
|
||||
(notFollowedBy (skipNonNewlineSpaces >> char ')') >> skipNonNewlineSpaces1)
|
||||
prefixedQuotedPattern :: SimpleTextParser T.Text
|
||||
prefixedQuotedPattern = do
|
||||
not' <- fromMaybe "" `fmap` (optional $ string "not:")
|
||||
@ -847,6 +930,22 @@ tests_Query = testGroup "Query" [
|
||||
parseQuery nulldate "'a a' 'b" @?= Right (Or [Acct $ toRegexCI' "a a",Acct $ toRegexCI' "'b"], [])
|
||||
parseQuery nulldate "\"" @?= Right (Acct $ toRegexCI' "\"", [])
|
||||
|
||||
,testCase "parseBooleanQuery" $ do
|
||||
parseBooleanQuery nulldate "(tag:'atag=a')" @?= Right (Tag (toRegexCI' "atag") (Just $ toRegexCI' "a"), [])
|
||||
parseBooleanQuery nulldate "( tag:\"atag=a\" )" @?= Right (Tag (toRegexCI' "atag") (Just $ toRegexCI' "a"), [])
|
||||
parseBooleanQuery nulldate "(acct:'expenses:food')" @?= Right (Acct $ toRegexCI' "expenses:food", [])
|
||||
parseBooleanQuery nulldate "(((acct:'expenses:food')))" @?= Right (Acct $ toRegexCI' "expenses:food", [])
|
||||
parseBooleanQuery nulldate "acct:'expenses:food' AND desc:'b'" @?= Right (And [Acct $ toRegexCI' "expenses:food", Desc $ toRegexCI' "b"], [])
|
||||
parseBooleanQuery nulldate "((desc:'a') AND (desc:'b') OR (desc:'c'))" @?= Right (Or [And [Desc $ toRegexCI' "a", Desc $ toRegexCI' "b"], Desc $ toRegexCI' "c"], [])
|
||||
parseBooleanQuery nulldate "((desc:'a') OR (desc:'b') AND (desc:'c'))" @?= Right (Or [Desc $ toRegexCI' "a", And [Desc $ toRegexCI' "b", Desc $ toRegexCI' "c"]], [])
|
||||
parseBooleanQuery nulldate "((desc:'a') AND desc:'b' AND (desc:'c'))" @?= Right (And [Desc $ toRegexCI' "a", Desc $ toRegexCI' "b", Desc $ toRegexCI' "c"], [])
|
||||
parseBooleanQuery nulldate "(NOT (desc:'a') AND (desc:'b'))" @?= Right (And [Not $ Desc $ toRegexCI' "a", Desc $ toRegexCI' "b"], [])
|
||||
parseBooleanQuery nulldate "((desc:'a') AND (NOT desc:'b'))" @?= Right (And [Desc $ toRegexCI' "a", Not $ Desc $ toRegexCI' "b"], [])
|
||||
parseBooleanQuery nulldate "(desc:'a' AND desc:'b')" @?= Right (And [Desc $ toRegexCI' "a", Desc $ toRegexCI' "b"], [])
|
||||
parseBooleanQuery nulldate "(acct:'a' acct:'b')" @?= Right (Or [Acct $ toRegexCI' "a", Acct $ toRegexCI' "b"], [])
|
||||
parseBooleanQuery nulldate " acct:'a' acct:'b'" @?= Right (Or [Acct $ toRegexCI' "a", Acct $ toRegexCI' "b"], [])
|
||||
parseBooleanQuery nulldate "not:a" @?= Right (Not $ Acct $ toRegexCI' "a", [])
|
||||
|
||||
,testCase "words''" $ do
|
||||
(words'' [] "a b") @?= ["a","b"]
|
||||
(words'' [] "'a b'") @?= ["a b"]
|
||||
|
@ -804,7 +804,7 @@ makeHledgerClassyLenses ''ReportSpec
|
||||
-- >>> _rsQuery <$> setEither querystring ["assets"] defreportspec
|
||||
-- Right (Acct (RegexpCI "assets"))
|
||||
-- >>> _rsQuery <$> setEither querystring ["(assets"] defreportspec
|
||||
-- Left "This regular expression is malformed...
|
||||
-- Left "failed to parse query:1:8:\n |\n1 | (assets\n | ^\nunexpected end of input\nexpecting \"AND\", \"OR\", or ')'\n"
|
||||
-- >>> _rsQuery $ set querystring ["assets"] defreportspec
|
||||
-- Acct (RegexpCI "assets")
|
||||
-- >>> _rsQuery $ set querystring ["(assets"] defreportspec
|
||||
@ -855,7 +855,7 @@ instance HasReportOpts ReportSpec where
|
||||
-- | Generate a ReportSpec from a set of ReportOpts on a given day.
|
||||
reportOptsToSpec :: Day -> ReportOpts -> Either String ReportSpec
|
||||
reportOptsToSpec day ropts = do
|
||||
(argsquery, queryopts) <- parseQueryList day $ querystring_ ropts
|
||||
(argsquery, queryopts) <- parseQueries day $ querystring_ ropts
|
||||
return ReportSpec
|
||||
{ _rsReportOpts = ropts
|
||||
, _rsDay = day
|
||||
|
@ -36,7 +36,7 @@ tags CliOpts{rawopts_=rawopts,reportspec_=rspec} j = do
|
||||
values = boolopt "values" rawopts
|
||||
parsed = boolopt "parsed" rawopts
|
||||
empty = empty_ $ _rsReportOpts rspec
|
||||
query <- either usageError (return . fst) $ parseQueryList today querystr
|
||||
query <- either usageError (return . fst) $ parseQueries today querystr
|
||||
let
|
||||
q = simplifyQuery $ And [queryFromFlags $ _rsReportOpts rspec, query]
|
||||
matchedtxns = filter (q `matchesTransaction`) $ jtxns $ journalApplyValuationFromOpts rspec j
|
||||
|
@ -4,7 +4,11 @@
|
||||
a a 1
|
||||
b
|
||||
|
||||
$ hledger -f- register 'a a'
|
||||
2010/3/1 y
|
||||
a 1
|
||||
b
|
||||
|
||||
$ hledger -f- register "'a a'"
|
||||
>
|
||||
2010-03-01 x a a 1 1
|
||||
>=0
|
||||
@ -20,7 +24,7 @@ $ hledger -f- register 'a a'
|
||||
a 1
|
||||
b
|
||||
|
||||
$ hledger -f- register desc:'x x'
|
||||
$ hledger -f- register "desc:'x x'"
|
||||
>
|
||||
2010-03-02 x x a 1 1
|
||||
b -1 0
|
||||
@ -33,7 +37,7 @@ $ hledger -f- register desc:'x x'
|
||||
a a 1
|
||||
'b
|
||||
|
||||
$ hledger -f- register 'a a' "'b"
|
||||
$ hledger -f- register "'a a' \"'b\""
|
||||
>
|
||||
2011-09-11 a a 1 1
|
||||
'b -1 0
|
||||
|
95
hledger/test/query-bool.test
Normal file
95
hledger/test/query-bool.test
Normal file
@ -0,0 +1,95 @@
|
||||
<
|
||||
2022-01-01 Transaction 1 ; transactiontag:A
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food
|
||||
|
||||
2022-01-01 Transaction 2 ; transactiontag:A
|
||||
assets:bank:main -1
|
||||
assets:bank:secondary -1 ; atag:a
|
||||
expenses:food
|
||||
|
||||
2022-01-01 Transaction 3 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:drink
|
||||
|
||||
2022-01-01 Transaction 4 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food 2
|
||||
expenses:drink
|
||||
|
||||
# 1. Simple queries can be encased in an arbitrary number of parentheses (1)
|
||||
$ hledger -f - print "(tag:'transactiontag=B')"
|
||||
2022-01-01 Transaction 3 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:drink
|
||||
|
||||
2022-01-01 Transaction 4 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food 2
|
||||
expenses:drink
|
||||
|
||||
>=
|
||||
|
||||
# 2. Simple queries can be encased in an arbitrary number of parentheses (3)
|
||||
$ hledger -f - print "(((tag:'transactiontag=B')))"
|
||||
2022-01-01 Transaction 3 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:drink
|
||||
|
||||
2022-01-01 Transaction 4 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food 2
|
||||
expenses:drink
|
||||
|
||||
>=
|
||||
|
||||
# 3. Simple boolean AND query works
|
||||
$ hledger -f - print tag:'transactiontag=B' AND desc:3
|
||||
2022-01-01 Transaction 3 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:drink
|
||||
|
||||
>=
|
||||
|
||||
# 4. AND + OR works without parentheses
|
||||
$ hledger -f - print tag:'transactiontag=B' AND desc:3 OR desc:1
|
||||
2022-01-01 Transaction 1 ; transactiontag:A
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food
|
||||
|
||||
2022-01-01 Transaction 3 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:drink
|
||||
|
||||
>=
|
||||
|
||||
# 5. Unnecessary NOT + OR works without parentheses
|
||||
$ hledger -f - print NOT tag:'transactiontag=B' OR desc:1
|
||||
2022-01-01 Transaction 1 ; transactiontag:A
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food
|
||||
|
||||
2022-01-01 Transaction 2 ; transactiontag:A
|
||||
assets:bank:main -1
|
||||
assets:bank:secondary -1 ; atag:a
|
||||
expenses:food
|
||||
|
||||
>=
|
||||
|
||||
# 6. Necessary NOT + OR works without parentheses
|
||||
$ hledger -f - print NOT tag:'transactiontag=B' OR desc:4
|
||||
2022-01-01 Transaction 1 ; transactiontag:A
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food
|
||||
|
||||
2022-01-01 Transaction 2 ; transactiontag:A
|
||||
assets:bank:main -1
|
||||
assets:bank:secondary -1 ; atag:a
|
||||
expenses:food
|
||||
|
||||
2022-01-01 Transaction 4 ; transactiontag:B
|
||||
assets:bank:main -1 ; A comment
|
||||
expenses:food 2
|
||||
expenses:drink
|
||||
|
||||
>=
|
Loading…
Reference in New Issue
Block a user