mbs: improve performance of /validate and, to a lesser extent, /build (#1253)

<!-- The PR description should answer 2 important questions: -->

### What

Some "medium-hanging" fruit optimizations of mbs validate and build code
paths

Latest timing relative to `main` as of 10/18:

```
validate example/chinook.json
                        time:   [6.3984 ms 6.4236 ms 6.4532 ms]
                        change: [-46.276% -45.876% -45.440%] (p = 0.00 < 0.05)
                        Performance has improved.

validate example/multiple-subgraphs.json
                        time:   [1.6909 ms 1.7024 ms 1.7165 ms]
                        change: [-44.488% -44.141% -43.771%] (p = 0.00 < 0.05)
                        Performance has improved.

validate example/chinook-invoices.json
                        time:   [3.8878 ms 3.9039 ms 3.9220 ms]
                        change: [-45.544% -45.201% -44.863%] (p = 0.00 < 0.05)
                        Performance has improved.

validate example/big_pretty_DONT_COMMIT.json
                        time:   [3.6692 s 3.6789 s 3.6889 s]
                        change: [-36.964% -36.755% -36.544%] (p = 0.00 < 0.05)
                        Performance has improved.

build example/chinook.json
                        time:   [18.274 ms 18.345 ms 18.414 ms]
                        change: [-29.874% -29.359% -28.850%] (p = 0.00 < 0.05)
                        Performance has improved.

build example/multiple-subgraphs.json
                        time:   [3.4423 ms 3.4588 ms 3.4760 ms]
                        change: [-33.073% -32.628% -32.199%] (p = 0.00 < 0.05)
                        Performance has improved.

build example/chinook-invoices.json
                        time:   [11.000 ms 11.027 ms 11.057 ms]
                        change: [-26.051% -25.757% -25.449%] (p = 0.00 < 0.05)
                        Performance has improved.

Benchmarking build example/big_pretty_DONT_COMMIT.json: Collecting 100 samples in estimated 1359.8 s (100 iterations)
build example/big_pretty_DONT_COMMIT.json
                        time:   [13.429 s 13.465 s 13.503 s]
                        change: [-21.864% -21.634% -21.367%] (p = 0.00 < 0.05)
                        Performance has improved.

```

### How

 optimizing 

### Internal details

The absolute numbers for build in the PR are not accurate after rebasing
on current `main`, but absolute improvement in secs/ms is about the
same.

(/private)

V3_GIT_ORIGIN_REV_ID: a4fe628a08a35a88c2ca958a1d22f1fc6ddf2a5b
This commit is contained in:
Brandon Simmons 2024-10-23 05:27:28 -04:00 committed by hasura-bot
parent 20b46151e1
commit b6767e8399
9 changed files with 61 additions and 33 deletions

2
v3/Cargo.lock generated
View File

@ -2894,6 +2894,7 @@ dependencies = [
name = "json-ext"
version = "3.0.0"
dependencies = [
"indexmap 2.6.0",
"serde",
"serde_json",
]
@ -3022,6 +3023,7 @@ dependencies = [
"http 1.1.0",
"human_bytes",
"indexmap 2.6.0",
"json-ext",
"lexical-core",
"nonempty",
"postcard",

View File

@ -9,6 +9,7 @@
### Fixed
- improve performance of metadata build service `/validate` and `/build`
- When the `CompatibilityConfig` date is set to `2024-10-16` or newer, session
variables returned by webhooks, set in `noAuth` config in `AuthConfig` or set
in JWT claims are now correctly allowed to be full JSON values, not just JSON

View File

@ -36,8 +36,8 @@ impl KeyValueResponse for IndexMap<ndc_models::FieldName, ndc_models::RowFieldVa
self.swap_remove(key).map(|row_field| row_field.0)
}
}
// --------------------------------------------------
// These three bits are workarounds for the performance issue documented in ENG-1073
// Workaround for the performance issue documented in ENG-1073
//
// Assumes the input is an object. Used for our performance workaround, bypassing RowSet, which
// hopefully we can revert
@ -46,22 +46,6 @@ impl KeyValueResponse for serde_json::Value {
self.as_object_mut()?.swap_remove(key)
}
}
// More efficient `to_value`, for our performance workaround.
fn alias_map_to_value(index_map: IndexMap<ast::Alias, json::Value>) -> json::Value {
let mut json_object = json::Map::with_capacity(index_map.len());
for (alias, value) in index_map {
let key: String = alias.0.get().to_string();
json_object.insert(key, value);
}
json::Value::Object(json_object)
}
// More efficient `to_value`, for our performance workaround.
fn vec_alias_map_to_value(index_maps: Vec<IndexMap<ast::Alias, json::Value>>) -> json::Value {
let json_array: Vec<json::Value> = index_maps.into_iter().map(alias_map_to_value).collect();
json::Value::Array(json_array)
}
// --------------------------------------------------
// With the response headers forwarding feature, we also need to extract the
// response headers from NDC result. So that engine's server layer can use that
@ -180,14 +164,16 @@ where
)
// NOTE: I assume a Null returned here is internal error, but
// this behavior is preserved for now:
.map(|v| v.map_or(json::Value::Null, vec_alias_map_to_value))
.map(|v| {
v.map_or(json::Value::Null, json_ext::vec_alias_map_to_value)
})
} else {
process_selection_set_as_object(
rows_set_rows,
&field.selection_set,
response_config,
)
.map(|v| v.map_or(json::Value::Null, alias_map_to_value))
.map(|v| v.map_or(json::Value::Null, json_ext::alias_map_to_value))
}
}
OutputAnnotation::RelationshipToModelAggregate { .. } => {

View File

@ -38,6 +38,7 @@ serde_with = { workspace = true }
smol_str = { workspace = true }
thiserror = { workspace = true }
tracing-util = { path = "../utils/tracing-util" }
json-ext = { path = "../utils/json-ext" }
[dev-dependencies]
anyhow = { workspace = true }

View File

@ -2,6 +2,7 @@
This module provides functions to generate introspection result as GraphQL schema
for each namespace from the schema.
*/
use json_ext;
use std::collections::HashMap;
use std::sync::OnceLock;
use tracing_util::SpanVisibility;
@ -48,18 +49,19 @@ pub fn build_namespace_schema<
introspection_request(),
)
.map_err(|e| Error::NormalizeIntrospectionQuery(e.to_string()))?;
let mut result = HashMap::new();
// Build a Value directly to avoid the same performance hit we fixed in f709e4f8a
let mut result = serde_json::Map::with_capacity(nr.selection_set.fields.len());
for (_alias, field) in &nr.selection_set.fields {
let field_call = field.field_call().map_err(|_| Error::FieldCallNotFound)?;
match field_call.name.as_str() {
"__schema" => {
result.insert(
&field_call.name,
serde_json::to_value(crate::introspection::schema_type(
field_call.name.to_string(),
json_ext::alias_map_to_value(crate::introspection::schema_type(
schema,
namespaced_getter,
&field.selection_set,
)?)?,
)?),
);
}
name => Err(Error::OnlySchemaFieldExpected {
@ -67,7 +69,7 @@ pub fn build_namespace_schema<
})?,
}
}
Ok(serde_json::to_value(result)?)
Ok(serde_json::Value::Object(result))
},
)
}

View File

@ -49,16 +49,16 @@ where
Ok(json::to_value(response?)?)
}
fn array_response<A, B, F>(l: &[A], f: F) -> Result<json::Value>
/// for l map f, returning a Value::Array
fn array_response<A, F>(l: &[A], f: F) -> Result<json::Value>
where
F: Fn(&A) -> Result<B>,
B: serde::Serialize,
F: Fn(&A) -> Result<IndexMap<ast::Alias, json::Value>>,
{
let mut response = Vec::new();
for v in l {
response.push(json::to_value(f(v)?)?);
}
Ok(json::Value::Array(response))
l.iter()
.map(f)
.collect::<Result<Vec<_>>>()
// vec_alias_map_to_value() for effeciency vs. to_value()
.map(json_ext::vec_alias_map_to_value)
}
pub fn named_type<'s, S: schema::SchemaContext, NSGet: schema::NamespacedGetter<S>>(

View File

@ -10,6 +10,7 @@ bench = false
[dependencies]
serde = { workspace = true }
serde_json = { workspace = true }
indexmap = { workspace = true }
[lints]
workspace = true

View File

@ -1,7 +1,9 @@
mod json_ext;
use indexmap::IndexMap;
pub use json_ext::ValueExt;
use serde::{de::DeserializeOwned, ser::SerializeMap, Deserialize, Serialize};
use serde_json as json;
use std::{collections::HashMap, hash::Hash};
/// HashMapWithJsonKey serializes the `key<K>` as a String
@ -51,6 +53,35 @@ impl<'de, K: DeserializeOwned + Hash + Eq + Serialize, V: Deserialize<'de>> Dese
}
}
// These two functions are workarounds for the performance issue documented in ENG-1073
// implemented in f709e4f8a. Marked inline so we don't have to re-check performance
// after factoring these out. Ideally these will go away when we have a more suitable json library
/// More efficient `to_value`, for our ENG-1073 performance workaround.
#[inline]
pub fn alias_map_to_value<K>(index_map: IndexMap<K, json::Value>) -> json::Value
where
K: ToString + Eq + Hash,
{
let mut json_object = json::Map::with_capacity(index_map.len());
for (alias, value) in index_map {
let key: String = alias.to_string();
json_object.insert(key, value);
}
json::Value::Object(json_object)
}
/// More efficient `to_value`, for our ENG-1073 performance workaround.
#[inline]
pub fn vec_alias_map_to_value<K>(index_maps: Vec<IndexMap<K, json::Value>>) -> json::Value
where
K: ToString + Eq + Hash,
{
let json_array: Vec<json::Value> = index_maps.into_iter().map(alias_map_to_value).collect();
json::Value::Array(json_array)
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;

View File

@ -7,6 +7,10 @@ pub enum JSONPathElement {
Index(usize),
}
// NOTE (performance): the required clones of JSONPath for calls to deserialize() was ~8% of
// runtime of validate(). Replacing Vec with imbl::Vector for structural sharing was a slight
// improvement, but not worth the dependency.
/// Represents a JSON path.
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct JSONPath(pub Vec<JSONPathElement>);