From 76effdd2c794293c2250b6b63b81a4bd988f5eb6 Mon Sep 17 00:00:00 2001 From: Jean-Christophe Amiel Date: Fri, 5 Jul 2024 13:46:46 +0200 Subject: [PATCH] Cached JSON parsing across queries. --- .../tests_failed/runner_errors.err.pattern | 2 +- .../hurl/tests_failed/runner_errors.hurl | 2 +- .../runner_errors_color.err.pattern | 2 +- integration/hurl/tests_ok/parse_cache.hurl | 220 ++++++++++++++++++ integration/hurl/tests_ok/parse_cache.py | 10 + packages/hurl/src/runner/cache.rs | 19 +- packages/hurl/src/runner/filter/jsonpath.rs | 41 ++-- packages/hurl/src/runner/filter/mod.rs | 2 +- packages/hurl/src/runner/query.rs | 50 +++- 9 files changed, 312 insertions(+), 36 deletions(-) diff --git a/integration/hurl/tests_failed/runner_errors.err.pattern b/integration/hurl/tests_failed/runner_errors.err.pattern index 9027485ee..b86fc8992 100644 --- a/integration/hurl/tests_failed/runner_errors.err.pattern +++ b/integration/hurl/tests_failed/runner_errors.err.pattern @@ -184,7 +184,7 @@ error: Invalid JSON error: Invalid JSONPath --> tests_failed/runner_errors.hurl:119:10 | - | GET http://localhost:8000/runner_errors + | GET http://localhost:8000/runner_errors/json-list | ... 119 | jsonpath "xxx" == 10 | ^^^^^ the JSONPath expression 'xxx' is not valid diff --git a/integration/hurl/tests_failed/runner_errors.hurl b/integration/hurl/tests_failed/runner_errors.hurl index 39e5a3dcf..4dd0d2ec3 100644 --- a/integration/hurl/tests_failed/runner_errors.hurl +++ b/integration/hurl/tests_failed/runner_errors.hurl @@ -113,7 +113,7 @@ HTTP 200 jsonpath "$.count" == 10 # QueryInvalidJsonpathExpression -GET http://localhost:8000/runner_errors +GET http://localhost:8000/runner_errors/json-list HTTP 200 [Asserts] jsonpath "xxx" == 10 diff --git a/integration/hurl/tests_failed/runner_errors_color.err.pattern b/integration/hurl/tests_failed/runner_errors_color.err.pattern index 7b48a8170..f1db9d803 100644 --- a/integration/hurl/tests_failed/runner_errors_color.err.pattern +++ b/integration/hurl/tests_failed/runner_errors_color.err.pattern @@ -184,7 +184,7 @@ error: Invalid JSONPath --> tests_failed/runner_errors.hurl:119:10  | - | GET http://localhost:8000/runner_errors + | GET http://localhost:8000/runner_errors/json-list  | ... 119 | jsonpath "xxx" == 10  | ^^^^^ the JSONPath expression 'xxx' is not valid diff --git a/integration/hurl/tests_ok/parse_cache.hurl b/integration/hurl/tests_ok/parse_cache.hurl index 5b71aa277..26646f389 100644 --- a/integration/hurl/tests_ok/parse_cache.hurl +++ b/integration/hurl/tests_ok/parse_cache.hurl @@ -208,3 +208,223 @@ GET http://localhost:8000/large/html HTTP 200 Content-Encoding: gzip Content-Type: text/html; charset=utf-8 + + +# We check that parsed JSON are reused across queries. +GET http://localhost:8000/large/json +HTTP 200 +Content-Encoding: gzip +Content-Type: application/json +[Asserts] +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + + +# Captures are reusing parsing cache also. +GET http://localhost:8000/large/json +HTTP 200 +Content-Encoding: gzip +Content-Type: application/json +[Captures] +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + + +# Captures and asserts are reusing parsing cache. +GET http://localhost:8000/large/json +HTTP 200 +Content-Encoding: gzip +Content-Type: application/json +[Captures] +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +var1: jsonpath "$.parse.title" +var2: jsonpath "$.parse.pageid" +var3: jsonpath "$.parse.revid" +var4: jsonpath "$.parse.categories" count +var5: jsonpath "$.parse.images" count +var6: jsonpath "$.parse.links" count +var7: jsonpath "$.parse.sections" count + +[Asserts] +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes" +jsonpath "$.parse.pageid" == 1892897 +jsonpath "$.parse.revid" == 1232145637 +jsonpath "$.parse.categories" count == 6 +jsonpath "$.parse.images" count == 2 +jsonpath "$.parse.links" count == 1626 +jsonpath "$.parse.sections" count == 15 + +# A call without explicit asserts to give a comparison for the first call. +GET http://localhost:8000/large/json +HTTP 200 +Content-Encoding: gzip +Content-Type: application/json diff --git a/integration/hurl/tests_ok/parse_cache.py b/integration/hurl/tests_ok/parse_cache.py index f6ad010e4..621c1a782 100644 --- a/integration/hurl/tests_ok/parse_cache.py +++ b/integration/hurl/tests_ok/parse_cache.py @@ -6,5 +6,15 @@ from flask import Response, make_response def large_html(): data = open("tests_ok/parse_cache.html.gz", "rb") resp = make_response(data) + resp.headers["Content-Type"] = "text/html; charset=utf-8" + resp.headers["Content-Encoding"] = "gzip" + return resp + + +@app.route("/large/json") +def large_json(): + data = open("tests_ok/parse_cache.json.gz", "rb") + resp = make_response(data) + resp.headers["Content-Type"] = "application/json" resp.headers["Content-Encoding"] = "gzip" return resp diff --git a/packages/hurl/src/runner/cache.rs b/packages/hurl/src/runner/cache.rs index 1ac7b84c5..d796becb6 100644 --- a/packages/hurl/src/runner/cache.rs +++ b/packages/hurl/src/runner/cache.rs @@ -16,6 +16,7 @@ * */ use crate::runner::xpath::Document; +use serde_json::Value; /// This is a cache to hold parsed structured data (XML/JSON/text), computed from an HTTP response /// body bytes. This cache lives for a given request, and allows reusing parsed response for @@ -25,6 +26,8 @@ use crate::runner::xpath::Document; pub struct BodyCache { /// The parsed XML document. xml: Option, + /// The parsed JSON body + json: Option, } impl BodyCache { @@ -38,9 +41,19 @@ impl BodyCache { self.xml.as_ref() } - /// Set a XML document `doc` to the cache. - pub fn set_xml(&mut self, doc: Document) { - self.xml = Some(doc); + /// Caches a XML document `doc`. + pub fn set_xml(&mut self, xml: Document) { + self.xml = Some(xml); + } + + /// Returns a reference to a cached JSON response. + pub fn json(&self) -> Option<&Value> { + self.json.as_ref() + } + + /// Caches a parsed JSON. + pub fn set_json(&mut self, json: Value) { + self.json = Some(json); } } diff --git a/packages/hurl/src/runner/filter/jsonpath.rs b/packages/hurl/src/runner/filter/jsonpath.rs index e9faa1585..af5fdc951 100644 --- a/packages/hurl/src/runner/filter/jsonpath.rs +++ b/packages/hurl/src/runner/filter/jsonpath.rs @@ -31,7 +31,19 @@ pub fn eval_jsonpath( assert: bool, ) -> Result, RunnerError> { match value { - Value::String(json) => eval_jsonpath_string(json, expr, variables, source_info), + Value::String(text) => { + let json = match serde_json::from_str(text) { + Err(_) => { + return Err(RunnerError::new( + source_info, + RunnerErrorKind::QueryInvalidJson, + false, + )); + } + Ok(v) => v, + }; + eval_jsonpath_json(&json, expr, variables) + } v => { let kind = RunnerErrorKind::FilterInvalidInput(v._type()); Err(RunnerError::new(source_info, kind, assert)) @@ -39,33 +51,22 @@ pub fn eval_jsonpath( } } -pub fn eval_jsonpath_string( - json: &str, +pub fn eval_jsonpath_json( + json: &serde_json::Value, expr: &Template, variables: &HashMap, - source_info: SourceInfo, ) -> Result, RunnerError> { - let value = eval_template(expr, variables)?; - let expr_source_info = &expr.source_info; - let jsonpath_query = match jsonpath::parse(value.as_str()) { + let expr_str = eval_template(expr, variables)?; + let expr_source_info = expr.source_info; + let jsonpath_query = match jsonpath::parse(&expr_str) { Ok(q) => q, Err(_) => { - let kind = RunnerErrorKind::QueryInvalidJsonpathExpression { value }; - return Err(RunnerError::new(*expr_source_info, kind, false)); + let kind = RunnerErrorKind::QueryInvalidJsonpathExpression { value: expr_str }; + return Err(RunnerError::new(expr_source_info, kind, false)); } }; - let value = match serde_json::from_str(json) { - Err(_) => { - return Err(RunnerError::new( - source_info, - RunnerErrorKind::QueryInvalidJson, - false, - )); - } - Ok(v) => v, - }; - let results = jsonpath_query.eval(&value); + let results = jsonpath_query.eval(json); match results { None => Ok(None), Some(jsonpath::JsonpathResult::SingleEntry(value)) => Ok(Some(Value::from_json(&value))), diff --git a/packages/hurl/src/runner/filter/mod.rs b/packages/hurl/src/runner/filter/mod.rs index 083ea017d..80c05edcb 100644 --- a/packages/hurl/src/runner/filter/mod.rs +++ b/packages/hurl/src/runner/filter/mod.rs @@ -17,7 +17,7 @@ */ pub use eval::eval_filters; -pub use jsonpath::eval_jsonpath_string; +pub use jsonpath::eval_jsonpath_json; pub use xpath::eval_xpath_doc; mod count; diff --git a/packages/hurl/src/runner/query.rs b/packages/hurl/src/runner/query.rs index 061a85666..408278f79 100644 --- a/packages/hurl/src/runner/query.rs +++ b/packages/hurl/src/runner/query.rs @@ -50,7 +50,7 @@ pub fn eval_query( eval_query_xpath(response, cache, expr, variables, query.source_info) } QueryValue::Jsonpath { expr, .. } => { - eval_query_jsonpath(response, expr, variables, query.source_info) + eval_query_jsonpath(response, cache, expr, variables, query.source_info) } QueryValue::Regex { value, .. } => { eval_query_regex(response, value, variables, query.source_info) @@ -196,18 +196,50 @@ fn parse_cache_xml<'cache>( /// `query_source_info` is the source position of the query, used if an error is returned. fn eval_query_jsonpath( response: &http::Response, + cache: &mut BodyCache, expr: &Template, variables: &HashMap, query_source_info: SourceInfo, ) -> QueryResult { - match response.text() { - Ok(json) => filter::eval_jsonpath_string(&json, expr, variables, query_source_info), - Err(inner) => Err(RunnerError::new( - query_source_info, - RunnerErrorKind::Http(inner), - false, - )), - } + let json = match cache.json() { + Some(j) => j, + None => parse_cache_json(response, cache, query_source_info)?, + }; + filter::eval_jsonpath_json(json, expr, variables) +} + +/// Parse this HTTP `response` body to JSON, and store the document to the response `cache`. +/// +/// `query_source_info` is used for error reporting. +fn parse_cache_json<'cache>( + response: &http::Response, + cache: &'cache mut BodyCache, + query_source_info: SourceInfo, +) -> Result<&'cache serde_json::Value, RunnerError> { + // Get the response as text if possible + let text = match response.text() { + Ok(t) => t, + Err(e) => { + return Err(RunnerError::new( + query_source_info, + RunnerErrorKind::Http(e), + false, + )) + } + }; + let json = match serde_json::from_str(&text) { + Err(_) => { + return Err(RunnerError::new( + query_source_info, + RunnerErrorKind::QueryInvalidJson, + false, + )); + } + Ok(v) => v, + }; + // Everything is ok, we can put the response in the cache + cache.set_json(json); + Ok(cache.json().unwrap()) } /// Evaluates a regex query on the HTTP `response` body, given a set of `variables`.