Cached JSON parsing across queries.

This commit is contained in:
Jean-Christophe Amiel 2024-07-05 13:46:46 +02:00
parent a58e020a3e
commit 76effdd2c7
No known key found for this signature in database
GPG Key ID: 07FF11CFD55356CC
9 changed files with 312 additions and 36 deletions

View File

@ -184,7 +184,7 @@ error: Invalid JSON
error: Invalid JSONPath
--> tests_failed/runner_errors.hurl:119:10
|
| GET http://localhost:8000/runner_errors
| GET http://localhost:8000/runner_errors/json-list
| ...
119 | jsonpath "xxx" == 10
| ^^^^^ the JSONPath expression 'xxx' is not valid

View File

@ -113,7 +113,7 @@ HTTP 200
jsonpath "$.count" == 10
# QueryInvalidJsonpathExpression
GET http://localhost:8000/runner_errors
GET http://localhost:8000/runner_errors/json-list
HTTP 200
[Asserts]
jsonpath "xxx" == 10

View File

@ -184,7 +184,7 @@
error: Invalid JSONPath
--> tests_failed/runner_errors.hurl:119:10
 |
 | GET http://localhost:8000/runner_errors
 | GET http://localhost:8000/runner_errors/json-list
 | ...
119 | jsonpath "xxx" == 10
 | ^^^^^ the JSONPath expression 'xxx' is not valid

View File

@ -208,3 +208,223 @@ GET http://localhost:8000/large/html
HTTP 200
Content-Encoding: gzip
Content-Type: text/html; charset=utf-8
# We check that parsed JSON are reused across queries.
GET http://localhost:8000/large/json
HTTP 200
Content-Encoding: gzip
Content-Type: application/json
[Asserts]
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
# Captures are reusing parsing cache also.
GET http://localhost:8000/large/json
HTTP 200
Content-Encoding: gzip
Content-Type: application/json
[Captures]
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
# Captures and asserts are reusing parsing cache.
GET http://localhost:8000/large/json
HTTP 200
Content-Encoding: gzip
Content-Type: application/json
[Captures]
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
var1: jsonpath "$.parse.title"
var2: jsonpath "$.parse.pageid"
var3: jsonpath "$.parse.revid"
var4: jsonpath "$.parse.categories" count
var5: jsonpath "$.parse.images" count
var6: jsonpath "$.parse.links" count
var7: jsonpath "$.parse.sections" count
[Asserts]
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
jsonpath "$.parse.title" == "List of Alfred Hitchcock Presents episodes"
jsonpath "$.parse.pageid" == 1892897
jsonpath "$.parse.revid" == 1232145637
jsonpath "$.parse.categories" count == 6
jsonpath "$.parse.images" count == 2
jsonpath "$.parse.links" count == 1626
jsonpath "$.parse.sections" count == 15
# A call without explicit asserts to give a comparison for the first call.
GET http://localhost:8000/large/json
HTTP 200
Content-Encoding: gzip
Content-Type: application/json

View File

@ -6,5 +6,15 @@ from flask import Response, make_response
def large_html():
data = open("tests_ok/parse_cache.html.gz", "rb")
resp = make_response(data)
resp.headers["Content-Type"] = "text/html; charset=utf-8"
resp.headers["Content-Encoding"] = "gzip"
return resp
@app.route("/large/json")
def large_json():
data = open("tests_ok/parse_cache.json.gz", "rb")
resp = make_response(data)
resp.headers["Content-Type"] = "application/json"
resp.headers["Content-Encoding"] = "gzip"
return resp

View File

@ -16,6 +16,7 @@
*
*/
use crate::runner::xpath::Document;
use serde_json::Value;
/// This is a cache to hold parsed structured data (XML/JSON/text), computed from an HTTP response
/// body bytes. This cache lives for a given request, and allows reusing parsed response for
@ -25,6 +26,8 @@ use crate::runner::xpath::Document;
pub struct BodyCache {
/// The parsed XML document.
xml: Option<Document>,
/// The parsed JSON body
json: Option<Value>,
}
impl BodyCache {
@ -38,9 +41,19 @@ impl BodyCache {
self.xml.as_ref()
}
/// Set a XML document `doc` to the cache.
pub fn set_xml(&mut self, doc: Document) {
self.xml = Some(doc);
/// Caches a XML document `doc`.
pub fn set_xml(&mut self, xml: Document) {
self.xml = Some(xml);
}
/// Returns a reference to a cached JSON response.
pub fn json(&self) -> Option<&Value> {
self.json.as_ref()
}
/// Caches a parsed JSON.
pub fn set_json(&mut self, json: Value) {
self.json = Some(json);
}
}

View File

@ -31,7 +31,19 @@ pub fn eval_jsonpath(
assert: bool,
) -> Result<Option<Value>, RunnerError> {
match value {
Value::String(json) => eval_jsonpath_string(json, expr, variables, source_info),
Value::String(text) => {
let json = match serde_json::from_str(text) {
Err(_) => {
return Err(RunnerError::new(
source_info,
RunnerErrorKind::QueryInvalidJson,
false,
));
}
Ok(v) => v,
};
eval_jsonpath_json(&json, expr, variables)
}
v => {
let kind = RunnerErrorKind::FilterInvalidInput(v._type());
Err(RunnerError::new(source_info, kind, assert))
@ -39,33 +51,22 @@ pub fn eval_jsonpath(
}
}
pub fn eval_jsonpath_string(
json: &str,
pub fn eval_jsonpath_json(
json: &serde_json::Value,
expr: &Template,
variables: &HashMap<String, Value>,
source_info: SourceInfo,
) -> Result<Option<Value>, RunnerError> {
let value = eval_template(expr, variables)?;
let expr_source_info = &expr.source_info;
let jsonpath_query = match jsonpath::parse(value.as_str()) {
let expr_str = eval_template(expr, variables)?;
let expr_source_info = expr.source_info;
let jsonpath_query = match jsonpath::parse(&expr_str) {
Ok(q) => q,
Err(_) => {
let kind = RunnerErrorKind::QueryInvalidJsonpathExpression { value };
return Err(RunnerError::new(*expr_source_info, kind, false));
let kind = RunnerErrorKind::QueryInvalidJsonpathExpression { value: expr_str };
return Err(RunnerError::new(expr_source_info, kind, false));
}
};
let value = match serde_json::from_str(json) {
Err(_) => {
return Err(RunnerError::new(
source_info,
RunnerErrorKind::QueryInvalidJson,
false,
));
}
Ok(v) => v,
};
let results = jsonpath_query.eval(&value);
let results = jsonpath_query.eval(json);
match results {
None => Ok(None),
Some(jsonpath::JsonpathResult::SingleEntry(value)) => Ok(Some(Value::from_json(&value))),

View File

@ -17,7 +17,7 @@
*/
pub use eval::eval_filters;
pub use jsonpath::eval_jsonpath_string;
pub use jsonpath::eval_jsonpath_json;
pub use xpath::eval_xpath_doc;
mod count;

View File

@ -50,7 +50,7 @@ pub fn eval_query(
eval_query_xpath(response, cache, expr, variables, query.source_info)
}
QueryValue::Jsonpath { expr, .. } => {
eval_query_jsonpath(response, expr, variables, query.source_info)
eval_query_jsonpath(response, cache, expr, variables, query.source_info)
}
QueryValue::Regex { value, .. } => {
eval_query_regex(response, value, variables, query.source_info)
@ -196,18 +196,50 @@ fn parse_cache_xml<'cache>(
/// `query_source_info` is the source position of the query, used if an error is returned.
fn eval_query_jsonpath(
response: &http::Response,
cache: &mut BodyCache,
expr: &Template,
variables: &HashMap<String, Value>,
query_source_info: SourceInfo,
) -> QueryResult {
match response.text() {
Ok(json) => filter::eval_jsonpath_string(&json, expr, variables, query_source_info),
Err(inner) => Err(RunnerError::new(
query_source_info,
RunnerErrorKind::Http(inner),
false,
)),
}
let json = match cache.json() {
Some(j) => j,
None => parse_cache_json(response, cache, query_source_info)?,
};
filter::eval_jsonpath_json(json, expr, variables)
}
/// Parse this HTTP `response` body to JSON, and store the document to the response `cache`.
///
/// `query_source_info` is used for error reporting.
fn parse_cache_json<'cache>(
response: &http::Response,
cache: &'cache mut BodyCache,
query_source_info: SourceInfo,
) -> Result<&'cache serde_json::Value, RunnerError> {
// Get the response as text if possible
let text = match response.text() {
Ok(t) => t,
Err(e) => {
return Err(RunnerError::new(
query_source_info,
RunnerErrorKind::Http(e),
false,
))
}
};
let json = match serde_json::from_str(&text) {
Err(_) => {
return Err(RunnerError::new(
query_source_info,
RunnerErrorKind::QueryInvalidJson,
false,
));
}
Ok(v) => v,
};
// Everything is ok, we can put the response in the cache
cache.set_json(json);
Ok(cache.json().unwrap())
}
/// Evaluates a regex query on the HTTP `response` body, given a set of `variables`.