mirror of
https://github.com/hasura/graphql-engine.git
synced 2024-12-14 08:02:15 +03:00
Experimental SQL interface (#742)
Adds a very experimental SQL interface to v3-engine for GenAI use cases. --------- Co-authored-by: Abhinav Gupta <127770473+abhinav-hasura@users.noreply.github.com> Co-authored-by: Gil Mizrahi <gil@gilmi.net> Co-authored-by: Anon Ray <ecthiender@users.noreply.github.com> V3_GIT_ORIGIN_REV_ID: 077779ec4e7843abdffdac1ed6aa655210649b93
This commit is contained in:
parent
91ec7bafd1
commit
8af78227a0
1301
v3/Cargo.lock
generated
1301
v3/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -16,6 +16,7 @@ members = [
|
||||
"crates/open-dds",
|
||||
"crates/query-usage-analytics",
|
||||
"crates/schema",
|
||||
"crates/sql",
|
||||
"crates/utils/*",
|
||||
]
|
||||
|
||||
|
@ -27,13 +27,13 @@ lang-graphql = { path = "../lang-graphql" }
|
||||
open-dds = { path = "../open-dds" }
|
||||
opendds-derive = { path = "../utils/opendds-derive" }
|
||||
schema = { path = "../schema" }
|
||||
sql = { path = "../sql" }
|
||||
tracing-util = { path = "../utils/tracing-util" }
|
||||
metadata-resolve = {path = "../metadata-resolve" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
axum = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
bincode = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive", "env"] }
|
||||
json_value_merge = { workspace = true }
|
||||
reqwest = { workspace = true, features = ["json", "multipart"] }
|
||||
|
@ -13,6 +13,7 @@ use axum::{
|
||||
Extension, Json, Router,
|
||||
};
|
||||
use clap::Parser;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use tower_http::cors::CorsLayer;
|
||||
use tower_http::trace::TraceLayer;
|
||||
use tracing_util::{
|
||||
@ -61,6 +62,9 @@ struct ServerOptions {
|
||||
/// The port on which the server listens.
|
||||
#[arg(long, value_name = "PORT", env = "PORT", default_value_t = DEFAULT_PORT)]
|
||||
port: u16,
|
||||
/// Enables the '/v1/sql' endpoint
|
||||
#[arg(long, env = "ENABLE_SQL_INTERFACE")]
|
||||
enable_sql_interface: bool,
|
||||
/// Enable CORS. Support preflight request and include related headers in responses.
|
||||
#[arg(long, env = "ENABLE_CORS")]
|
||||
enable_cors: bool,
|
||||
@ -88,6 +92,7 @@ struct EngineState {
|
||||
http_context: HttpContext,
|
||||
schema: gql::schema::Schema<GDS>,
|
||||
auth_config: AuthConfig,
|
||||
sql_context: sql::catalog::Context,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
@ -156,7 +161,7 @@ async fn shutdown_signal() {
|
||||
enum StartupError {
|
||||
#[error("could not read the auth config - {0}")]
|
||||
ReadAuth(anyhow::Error),
|
||||
#[error("could not read the schema - {0}")]
|
||||
#[error("failed to build engine state - {0}")]
|
||||
ReadSchema(anyhow::Error),
|
||||
}
|
||||
|
||||
@ -174,6 +179,8 @@ struct EngineRouter {
|
||||
/// The metadata routes for the introspection metadata file.
|
||||
/// Contains /metadata and /metadata-hash routes.
|
||||
metadata_routes: Option<Router>,
|
||||
/// Routes for the SQL interface
|
||||
sql_routes: Option<Router>,
|
||||
/// The CORS layer for the engine.
|
||||
cors_layer: Option<CorsLayer>,
|
||||
}
|
||||
@ -233,6 +240,7 @@ impl EngineRouter {
|
||||
Self {
|
||||
base_router: base_routes,
|
||||
metadata_routes: None,
|
||||
sql_routes: None,
|
||||
cors_layer: None,
|
||||
}
|
||||
}
|
||||
@ -257,6 +265,25 @@ impl EngineRouter {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_sql_route(&mut self, state: Arc<EngineState>) {
|
||||
let sql_routes = Router::new()
|
||||
.route("/v1/sql", post(handle_sql_request))
|
||||
.layer(axum::middleware::from_fn(
|
||||
hasura_authn_core::resolve_session,
|
||||
))
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
authentication_middleware,
|
||||
))
|
||||
.layer(axum::middleware::from_fn(sql_request_tracing_middleware))
|
||||
// *PLEASE DO NOT ADD ANY MIDDLEWARE
|
||||
// BEFORE THE `explain_request_tracing_middleware`*
|
||||
// Refer to it for more details.
|
||||
.layer(TraceLayer::new_for_http())
|
||||
.with_state(state);
|
||||
self.sql_routes = Some(sql_routes);
|
||||
}
|
||||
|
||||
fn add_cors_layer(&mut self, allow_origin: &[String]) {
|
||||
self.cors_layer = Some(cors::build_cors_layer(allow_origin));
|
||||
}
|
||||
@ -264,6 +291,10 @@ impl EngineRouter {
|
||||
fn into_make_service(self) -> axum::routing::IntoMakeService<Router> {
|
||||
let mut app = self.base_router;
|
||||
// Merge the metadata routes if they exist.
|
||||
if let Some(sql_routes) = self.sql_routes {
|
||||
app = app.merge(sql_routes);
|
||||
}
|
||||
// Merge the metadata routes if they exist.
|
||||
if let Some(metadata_routes) = self.metadata_routes {
|
||||
app = app.merge(metadata_routes);
|
||||
}
|
||||
@ -279,25 +310,20 @@ impl EngineRouter {
|
||||
|
||||
#[allow(clippy::print_stdout)]
|
||||
async fn start_engine(server: &ServerOptions) -> Result<(), StartupError> {
|
||||
let auth_config =
|
||||
read_auth_config(&server.authn_config_path).map_err(StartupError::ReadAuth)?;
|
||||
|
||||
let metadata_resolve_flags = resolve_unstable_features(&server.unstable_features);
|
||||
|
||||
let schema = read_schema(&server.metadata_path, &metadata_resolve_flags)
|
||||
let state = build_state(
|
||||
&server.authn_config_path,
|
||||
&server.metadata_path,
|
||||
&metadata_resolve_flags,
|
||||
)
|
||||
.map_err(StartupError::ReadSchema)?;
|
||||
|
||||
let http_context = HttpContext {
|
||||
client: reqwest::Client::new(),
|
||||
ndc_response_size_limit: None,
|
||||
};
|
||||
let state = Arc::new(EngineState {
|
||||
http_context,
|
||||
schema,
|
||||
auth_config,
|
||||
});
|
||||
let mut engine_router = EngineRouter::new(state.clone());
|
||||
|
||||
let mut engine_router = EngineRouter::new(state);
|
||||
if server.enable_sql_interface {
|
||||
engine_router.add_sql_route(state.clone());
|
||||
}
|
||||
|
||||
// If `--introspection-metadata` is specified we also serve the file indicated on `/metadata`
|
||||
// and its hash on `/metadata-hash`.
|
||||
@ -390,6 +416,33 @@ async fn explain_request_tracing_middleware<B: Send>(
|
||||
.response
|
||||
}
|
||||
|
||||
/// Middleware to start tracing of the `/v1/sql` request.
|
||||
/// This middleware must be active for the entire duration
|
||||
/// of the request i.e. this middleware should be the
|
||||
/// entry point and the exit point of the SQL request.
|
||||
async fn sql_request_tracing_middleware<B: Send>(
|
||||
request: Request<B>,
|
||||
next: Next<B>,
|
||||
) -> axum::response::Response {
|
||||
let tracer = tracing_util::global_tracer();
|
||||
let path = "/v1/sql";
|
||||
tracer
|
||||
.in_span_async_with_parent_context(
|
||||
path,
|
||||
path,
|
||||
SpanVisibility::User,
|
||||
&request.headers().clone(),
|
||||
|| {
|
||||
Box::pin(async move {
|
||||
let response = next.run(request).await;
|
||||
TraceableHttpResponse::new(response, path)
|
||||
})
|
||||
},
|
||||
)
|
||||
.await
|
||||
.response
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
enum AuthError {
|
||||
#[error("JWT auth error: {0}")]
|
||||
@ -540,16 +593,78 @@ async fn handle_explain_request(
|
||||
response
|
||||
}
|
||||
|
||||
fn read_schema(
|
||||
/// Handle a SQL request and execute it.
|
||||
async fn handle_sql_request(
|
||||
State(state): State<Arc<EngineState>>,
|
||||
Extension(session): Extension<Session>,
|
||||
Json(request): Json<sql::execute::SqlRequest>,
|
||||
) -> axum::response::Response {
|
||||
let tracer = tracing_util::global_tracer();
|
||||
let response = tracer
|
||||
.in_span_async(
|
||||
"handle_sql_request",
|
||||
"Handle SQL Request",
|
||||
SpanVisibility::User,
|
||||
|| {
|
||||
Box::pin(async {
|
||||
sql::execute::execute_sql(
|
||||
&state.sql_context,
|
||||
Arc::new(session),
|
||||
Arc::new(state.http_context.clone()),
|
||||
&request,
|
||||
)
|
||||
.await
|
||||
})
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Set the span as error if the response contains an error
|
||||
set_status_on_current_span(&response);
|
||||
|
||||
match response {
|
||||
Ok(r) => {
|
||||
let mut response = (axum::http::StatusCode::OK, r).into_response();
|
||||
response.headers_mut().insert(
|
||||
CONTENT_TYPE,
|
||||
axum::http::HeaderValue::from_static("application/json"),
|
||||
);
|
||||
response
|
||||
}
|
||||
Err(e) => (
|
||||
axum::http::StatusCode::BAD_REQUEST,
|
||||
Json(serde_json::json!({"error": e.to_string()})),
|
||||
)
|
||||
.into_response(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the engine state - include auth, metadata, and sql context.
|
||||
fn build_state(
|
||||
authn_config_path: &PathBuf,
|
||||
metadata_path: &PathBuf,
|
||||
metadata_resolve_flags: &metadata_resolve::MetadataResolveFlagsInternal,
|
||||
) -> Result<gql::schema::Schema<GDS>, anyhow::Error> {
|
||||
) -> Result<Arc<EngineState>, anyhow::Error> {
|
||||
let auth_config = read_auth_config(authn_config_path).map_err(StartupError::ReadAuth)?;
|
||||
let raw_metadata = std::fs::read_to_string(metadata_path)?;
|
||||
let metadata = open_dds::Metadata::from_json_str(&raw_metadata)?;
|
||||
Ok(engine::build::build_schema(
|
||||
metadata,
|
||||
metadata_resolve_flags,
|
||||
)?)
|
||||
let resolved_metadata = metadata_resolve::resolve(metadata, metadata_resolve_flags)?;
|
||||
let http_context = HttpContext {
|
||||
client: reqwest::Client::new(),
|
||||
ndc_response_size_limit: None,
|
||||
};
|
||||
let sql_context = sql::catalog::Context::from_metadata(&resolved_metadata);
|
||||
let schema = schema::GDS {
|
||||
metadata: resolved_metadata,
|
||||
}
|
||||
.build_schema()?;
|
||||
let state = Arc::new(EngineState {
|
||||
http_context,
|
||||
schema,
|
||||
auth_config,
|
||||
sql_context,
|
||||
});
|
||||
Ok(state)
|
||||
}
|
||||
|
||||
fn read_auth_config(path: &PathBuf) -> Result<AuthConfig, anyhow::Error> {
|
||||
|
@ -10,14 +10,15 @@ pub enum BuildError {
|
||||
InvalidMetadata(#[from] metadata_resolve::Error),
|
||||
#[error("unable to build schema: {0}")]
|
||||
UnableToBuildSchema(#[from] schema::Error),
|
||||
#[error("unable to encode schema: {0}")]
|
||||
EncodingError(#[from] bincode::Error),
|
||||
}
|
||||
|
||||
pub fn build_schema(
|
||||
metadata: open_dds::Metadata,
|
||||
metadata_resolve_flags: &metadata_resolve::MetadataResolveFlagsInternal,
|
||||
) -> Result<gql_schema::Schema<GDS>, BuildError> {
|
||||
let gds = schema::GDS::new(metadata, metadata_resolve_flags)?;
|
||||
let resolved_metadata = metadata_resolve::resolve(metadata, metadata_resolve_flags)?;
|
||||
let gds = schema::GDS {
|
||||
metadata: resolved_metadata,
|
||||
};
|
||||
Ok(gds.build_schema()?)
|
||||
}
|
||||
|
@ -75,7 +75,7 @@ pub(crate) fn get_argument_presets(
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn process_model_predicate<'s>(
|
||||
pub fn process_model_predicate<'s>(
|
||||
model_predicate: &'s metadata_resolve::ModelPredicate,
|
||||
session_variables: &SessionVariables,
|
||||
relationships: &mut BTreeMap<NDCRelationshipName, LocalModelRelationshipInfo<'s>>,
|
||||
|
@ -31,7 +31,7 @@ use schema::{Annotation, BooleanExpressionAnnotation, InputAnnotation, ModelInpu
|
||||
use schema::{CommandRelationshipAnnotation, CommandTargetSource};
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
pub(crate) struct LocalModelRelationshipInfo<'s> {
|
||||
pub struct LocalModelRelationshipInfo<'s> {
|
||||
pub relationship_name: &'s RelationshipName,
|
||||
pub relationship_type: &'s RelationshipType,
|
||||
pub source_type: &'s Qualified<CustomTypeName>,
|
||||
|
@ -1,13 +1,14 @@
|
||||
mod error;
|
||||
mod explain;
|
||||
mod global_id;
|
||||
mod ir;
|
||||
mod model_tracking;
|
||||
mod ndc;
|
||||
pub mod ir;
|
||||
pub mod model_tracking;
|
||||
pub mod ndc;
|
||||
mod plan;
|
||||
mod process_response;
|
||||
mod remote_joins;
|
||||
|
||||
pub use plan::process_model_relationship_definition;
|
||||
use plan::ExecuteQueryResult;
|
||||
use thiserror::Error;
|
||||
|
||||
@ -28,9 +29,11 @@ use tracing_util::{
|
||||
// we explicitly export things used by other crates
|
||||
pub use explain::execute_explain;
|
||||
pub use explain::types::{redact_ndc_explain, ExplainResponse};
|
||||
pub use ndc::fetch_from_data_connector;
|
||||
pub use plan::{execute_mutation_plan, execute_query_plan, generate_request_plan, RequestPlan};
|
||||
|
||||
/// Context for making HTTP requests
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct HttpContext {
|
||||
/// The HTTP client to use for making requests
|
||||
pub client: reqwest::Client,
|
||||
|
@ -56,7 +56,7 @@ pub async fn execute_ndc_query<'n, 's>(
|
||||
.await
|
||||
}
|
||||
|
||||
pub(crate) async fn fetch_from_data_connector<'s>(
|
||||
pub async fn fetch_from_data_connector<'s>(
|
||||
http_context: &HttpContext,
|
||||
query_request: &ndc_models::QueryRequest,
|
||||
data_connector: &metadata_resolve::DataConnectorLink,
|
||||
|
@ -4,6 +4,8 @@ mod model_selection;
|
||||
mod relationships;
|
||||
pub(crate) mod selection_set;
|
||||
|
||||
pub use relationships::process_model_relationship_definition;
|
||||
|
||||
use gql::normalized_ast;
|
||||
use gql::schema::NamespacedGetter;
|
||||
use hasura_authn_core::Role;
|
||||
|
@ -73,7 +73,7 @@ pub(crate) fn collect_relationships(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn process_model_relationship_definition(
|
||||
pub fn process_model_relationship_definition(
|
||||
relationship_info: &LocalModelRelationshipInfo,
|
||||
) -> Result<ndc_models::Relationship, error::Error> {
|
||||
let &LocalModelRelationshipInfo {
|
||||
|
@ -57,6 +57,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"author_id": {
|
||||
@ -65,6 +66,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"title": {
|
||||
@ -73,6 +75,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -177,6 +180,7 @@
|
||||
"type": "named",
|
||||
"name": "int8"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"first_name": {
|
||||
@ -185,6 +189,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"last_name": {
|
||||
@ -193,6 +198,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -297,6 +303,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"author_id": {
|
||||
@ -305,6 +312,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"title": {
|
||||
@ -313,6 +321,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -656,6 +665,7 @@
|
||||
"type": "named",
|
||||
"name": "int8"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"first_name": {
|
||||
@ -664,6 +674,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"last_name": {
|
||||
@ -672,6 +683,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -711,6 +723,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"author_id": {
|
||||
@ -719,6 +732,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"title": {
|
||||
@ -727,6 +741,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -803,6 +818,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"author_id": {
|
||||
@ -811,6 +827,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"title": {
|
||||
@ -819,6 +836,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -858,6 +876,7 @@
|
||||
"type": "named",
|
||||
"name": "int8"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"first_name": {
|
||||
@ -866,6 +885,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"last_name": {
|
||||
@ -874,6 +894,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -950,6 +971,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"author_id": {
|
||||
@ -958,6 +980,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"title": {
|
||||
@ -966,6 +989,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
@ -1005,6 +1029,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"author_id": {
|
||||
@ -1013,6 +1038,7 @@
|
||||
"type": "named",
|
||||
"name": "int4"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
},
|
||||
"title": {
|
||||
@ -1021,6 +1047,7 @@
|
||||
"type": "named",
|
||||
"name": "varchar"
|
||||
},
|
||||
"column_type_representation": null,
|
||||
"argument_mappings": {}
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ pub use stages::commands::Command;
|
||||
pub use stages::data_connectors;
|
||||
pub use stages::data_connectors::DataConnectorLink;
|
||||
pub use stages::model_permissions::{
|
||||
FilterPermission, ModelPredicate, ModelTargetSource, ModelWithPermissions,
|
||||
FilterPermission, ModelPredicate, ModelTargetSource, ModelWithPermissions, SelectPermission,
|
||||
};
|
||||
pub use stages::models::{ConnectorArgumentName, Model, ModelSource};
|
||||
|
||||
|
@ -9,6 +9,7 @@ pub use types::{
|
||||
ResolvedApolloFederationObjectKey, ResolvedObjectApolloFederationConfig, TypeMapping,
|
||||
};
|
||||
|
||||
use crate::helpers::ndc_validation::get_underlying_named_type;
|
||||
use crate::helpers::types::{mk_name, store_new_graphql_type};
|
||||
use crate::stages::data_connectors;
|
||||
|
||||
@ -328,9 +329,17 @@ pub fn resolve_data_connector_type_mapping(
|
||||
)
|
||||
};
|
||||
let source_column = get_column(ndc_object_type, field_name, resolved_field_mapping_column)?;
|
||||
let underlying_column_type = get_underlying_named_type(&source_column.r#type);
|
||||
let column_type_representation = data_connector_context
|
||||
.inner
|
||||
.schema
|
||||
.scalar_types
|
||||
.get(underlying_column_type)
|
||||
.and_then(|scalar_type| scalar_type.representation.clone());
|
||||
let resolved_field_mapping = FieldMapping {
|
||||
column: resolved_field_mapping_column.clone(),
|
||||
column_type: source_column.r#type.clone(),
|
||||
column_type_representation,
|
||||
argument_mappings: resolved_argument_mappings.0,
|
||||
};
|
||||
|
||||
|
@ -129,6 +129,7 @@ pub struct ResolvedApolloFederationObjectKey {
|
||||
pub struct FieldMapping {
|
||||
pub column: DataConnectorColumnName,
|
||||
pub column_type: ndc_models::Type,
|
||||
pub column_type_representation: Option<ndc_models::TypeRepresentation>,
|
||||
pub argument_mappings: BTreeMap<ArgumentName, DataConnectorArgumentName>,
|
||||
}
|
||||
|
||||
|
@ -989,7 +989,7 @@ pub enum TypeMappingValidationError {
|
||||
unknown_ndc_field_type_name: String,
|
||||
},
|
||||
#[error("ndc validation error: {0}")]
|
||||
NDCValidationError(NDCValidationError),
|
||||
NDCValidationError(#[from] NDCValidationError),
|
||||
}
|
||||
|
||||
impl From<AggregateExpressionError> for Error {
|
||||
|
24
v3/crates/sql/Cargo.toml
Normal file
24
v3/crates/sql/Cargo.toml
Normal file
@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "sql"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
[dependencies]
|
||||
metadata-resolve = {path = "../metadata-resolve" }
|
||||
open-dds = { path = "../open-dds" }
|
||||
schema = { path = "../schema" }
|
||||
execute = { path = "../execute" }
|
||||
tracing-util = { path = "../utils/tracing-util" }
|
||||
hasura-authn-core = { path = "../auth/hasura-authn-core" }
|
||||
|
||||
ndc-models = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
datafusion = { version = "39.0.0", features = ["serde"] }
|
||||
async-trait = "0.1.80"
|
||||
futures = "0.3.30"
|
||||
serde = { workspace = true, features = ["rc"] }
|
||||
thiserror = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
12
v3/crates/sql/readme.md
Normal file
12
v3/crates/sql/readme.md
Normal file
@ -0,0 +1,12 @@
|
||||
# SQL Interface
|
||||
|
||||
An experimental SQL interface over OpenDD models. This is mostly targeted at AI
|
||||
use cases for now - GenAI models are better at generating SQL queries than
|
||||
GraphQL queries.
|
||||
|
||||
This is implemented using the Apache DataFusion Query Engine by deriving the SQL
|
||||
metadata for datafusion from Open DDS metadata. As the implementation currently
|
||||
stands, once we get a `LogicalPlan` from datafusion we replace `TableScan`s with
|
||||
NDC queries to the underlying connector. There is a rudimentary optimizer that
|
||||
pushes down projections to the ndc query so that we don't fetch all the columns
|
||||
of a collection.
|
259
v3/crates/sql/src/catalog.rs
Normal file
259
v3/crates/sql/src/catalog.rs
Normal file
@ -0,0 +1,259 @@
|
||||
use std::{any::Any, collections::HashMap, sync::Arc};
|
||||
|
||||
use ::datafusion::{
|
||||
execution::{context::SessionState, runtime_env::RuntimeEnv},
|
||||
sql::TableReference,
|
||||
};
|
||||
use async_trait::async_trait;
|
||||
use hasura_authn_core::Session;
|
||||
use indexmap::IndexMap;
|
||||
use metadata_resolve::{self as resolved};
|
||||
use open_dds::permissions::Role;
|
||||
use schema::OpenDDSchemaProvider;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
mod datafusion {
|
||||
pub(super) use datafusion::{
|
||||
catalog::{schema::SchemaProvider, CatalogProvider},
|
||||
datasource::TableProvider,
|
||||
error::Result,
|
||||
prelude::{SessionConfig, SessionContext},
|
||||
scalar::ScalarValue,
|
||||
};
|
||||
}
|
||||
|
||||
pub mod introspection;
|
||||
pub mod schema;
|
||||
pub mod table;
|
||||
|
||||
/// The context in which to compile and execute SQL queries.
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub struct Context {
|
||||
pub(crate) subgraphs: IndexMap<String, schema::Subgraph>,
|
||||
pub(crate) type_permissions: HashMap<Role, Arc<table::TypePermissionsOfRole>>,
|
||||
pub(crate) introspection: introspection::Introspection,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
/// Derive a SQL Context from resolved Open DDS metadata.
|
||||
pub fn from_metadata(metadata: &resolved::Metadata) -> Self {
|
||||
let mut subgraphs = IndexMap::new();
|
||||
for (model_name, model) in &metadata.models {
|
||||
let schema_name = &model_name.subgraph;
|
||||
let table_name = &model_name.name;
|
||||
let subgraph =
|
||||
subgraphs
|
||||
.entry(schema_name.clone())
|
||||
.or_insert_with(|| schema::Subgraph {
|
||||
models: IndexMap::new(),
|
||||
});
|
||||
subgraph.models.insert(
|
||||
table_name.to_string(),
|
||||
table::Model::from_resolved_model(model),
|
||||
);
|
||||
}
|
||||
|
||||
let mut type_permissions = HashMap::new();
|
||||
for (type_name, object_type) in &metadata.object_types {
|
||||
for (role, output_permission) in &object_type.type_output_permissions {
|
||||
let output_permission = table::TypePermission {
|
||||
output: output_permission.clone(),
|
||||
};
|
||||
let role_permissions =
|
||||
type_permissions
|
||||
.entry(role)
|
||||
.or_insert_with(|| table::TypePermissionsOfRole {
|
||||
permissions: HashMap::new(),
|
||||
});
|
||||
role_permissions
|
||||
.permissions
|
||||
.insert(type_name.clone(), output_permission);
|
||||
}
|
||||
}
|
||||
let introspection = introspection::Introspection::from_metadata(metadata, &subgraphs);
|
||||
Context {
|
||||
subgraphs,
|
||||
type_permissions: type_permissions
|
||||
.into_iter()
|
||||
.map(|(role, role_permissions)| (role.clone(), Arc::new(role_permissions)))
|
||||
.collect(),
|
||||
introspection,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct OpenDDCatalogProvider {
|
||||
schemas: IndexMap<String, Arc<HasuraSchemaProvider>>,
|
||||
}
|
||||
|
||||
impl OpenDDCatalogProvider {
|
||||
fn new(
|
||||
session: &Arc<Session>,
|
||||
http_context: &Arc<execute::HttpContext>,
|
||||
context: &Context,
|
||||
) -> Self {
|
||||
let type_permissions = context.type_permissions.get(&session.role).cloned();
|
||||
let mut schemas = IndexMap::new();
|
||||
for (subgraph_name, subgraph) in &context.subgraphs {
|
||||
let mut tables = IndexMap::new();
|
||||
for model in subgraph.models.values() {
|
||||
let select_permission = model.permissions.get(&session.role).cloned();
|
||||
let provider = table::OpenDDTableProvider {
|
||||
session: session.clone(),
|
||||
http_context: http_context.clone(),
|
||||
name: model.name.clone(),
|
||||
data_type: model.data_type.clone(),
|
||||
source: model.source.clone(),
|
||||
schema: model.schema.clone(),
|
||||
select_permission,
|
||||
type_permissions: type_permissions.clone(),
|
||||
};
|
||||
tables.insert(model.name.to_string(), Arc::new(provider));
|
||||
}
|
||||
let provider = HasuraSchemaProvider::OpenDD(schema::OpenDDSchemaProvider { tables });
|
||||
schemas.insert(subgraph_name.clone(), Arc::new(provider));
|
||||
}
|
||||
schemas.insert(
|
||||
introspection::HASURA_METADATA_SCHEMA.to_string(),
|
||||
Arc::new(HasuraSchemaProvider::Introspection(
|
||||
introspection::IntrospectionSchemaProvider::new(&context.introspection),
|
||||
)),
|
||||
);
|
||||
OpenDDCatalogProvider { schemas }
|
||||
}
|
||||
pub(crate) fn get(
|
||||
&self,
|
||||
default_schema: Option<&str>,
|
||||
table: &TableReference,
|
||||
) -> Option<&table::OpenDDTableProvider> {
|
||||
let schema = table.schema().or(default_schema);
|
||||
let table = table.table();
|
||||
if let Some(schema) = schema {
|
||||
if let HasuraSchemaProvider::OpenDD(schema) = self.schemas.get(schema)?.as_ref() {
|
||||
schema.tables.get(table).map(std::convert::AsRef::as_ref)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum HasuraSchemaProvider {
|
||||
OpenDD(OpenDDSchemaProvider),
|
||||
Introspection(introspection::IntrospectionSchemaProvider),
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl datafusion::SchemaProvider for HasuraSchemaProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Vec<String> {
|
||||
match self {
|
||||
HasuraSchemaProvider::OpenDD(schema) => schema.table_names(),
|
||||
HasuraSchemaProvider::Introspection(schema) => schema.table_names(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn table(
|
||||
&self,
|
||||
name: &str,
|
||||
) -> datafusion::Result<Option<Arc<dyn datafusion::TableProvider>>> {
|
||||
match self {
|
||||
HasuraSchemaProvider::OpenDD(schema) => schema.table(name).await,
|
||||
HasuraSchemaProvider::Introspection(schema) => schema.table(name).await,
|
||||
}
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> bool {
|
||||
match self {
|
||||
HasuraSchemaProvider::OpenDD(schema) => schema.table_exist(name),
|
||||
HasuraSchemaProvider::Introspection(schema) => schema.table_exist(name),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl datafusion::CatalogProvider for OpenDDCatalogProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema_names(&self) -> Vec<String> {
|
||||
self.schemas.keys().cloned().collect()
|
||||
}
|
||||
|
||||
fn schema(&self, name: &str) -> Option<Arc<dyn datafusion::SchemaProvider>> {
|
||||
self.schemas
|
||||
.get(name)
|
||||
.cloned()
|
||||
.map(|schema| schema as Arc<dyn datafusion::SchemaProvider>)
|
||||
}
|
||||
}
|
||||
|
||||
impl Context {
|
||||
pub fn create_session_context(
|
||||
&self,
|
||||
session: &Arc<Session>,
|
||||
http_context: &Arc<execute::HttpContext>,
|
||||
) -> datafusion::SessionContext {
|
||||
let default_schema_name = if self.subgraphs.len() == 1 {
|
||||
self.subgraphs.get_index(0).map(|v| v.0)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let session_config = datafusion::SessionConfig::new()
|
||||
.set(
|
||||
"datafusion.catalog.default_catalog",
|
||||
datafusion::ScalarValue::Utf8(Some("default".to_string())),
|
||||
)
|
||||
.set(
|
||||
"datafusion.catalog.information_schema",
|
||||
datafusion::ScalarValue::Boolean(Some(true)),
|
||||
)
|
||||
.set(
|
||||
"datafusion.execution.target_partitions",
|
||||
datafusion::ScalarValue::Int32(Some(1)),
|
||||
)
|
||||
.set(
|
||||
"datafusion.execution.planning_concurrency",
|
||||
datafusion::ScalarValue::Int32(Some(1)),
|
||||
)
|
||||
.set(
|
||||
"datafusion.sql_parser.enable_ident_normalization",
|
||||
datafusion::ScalarValue::Boolean(Some(false)),
|
||||
);
|
||||
|
||||
let session_config = if let Some(default_schema_name) = default_schema_name {
|
||||
session_config.set(
|
||||
"datafusion.catalog.default_schema",
|
||||
datafusion::ScalarValue::Utf8(Some(default_schema_name.clone())),
|
||||
)
|
||||
} else {
|
||||
session_config
|
||||
};
|
||||
let catalog = Arc::new(OpenDDCatalogProvider::new(session, http_context, self));
|
||||
let query_planner = Arc::new(super::execute::planner::NDCQueryPlanner {
|
||||
default_schema: default_schema_name.map(|s| Arc::new(s.clone())),
|
||||
catalog: catalog.clone(),
|
||||
});
|
||||
let session_state =
|
||||
SessionState::new_with_config_rt(session_config, Arc::new(RuntimeEnv::default()))
|
||||
.with_analyzer_rules(vec![Arc::new(
|
||||
super::execute::analyzer::ReplaceTableScan::new(
|
||||
default_schema_name.map(|s| Arc::new(s.clone())),
|
||||
catalog.clone(),
|
||||
),
|
||||
)])
|
||||
.with_query_planner(query_planner)
|
||||
.add_optimizer_rule(Arc::new(
|
||||
super::execute::optimizer::NDCPushDownProjection {},
|
||||
));
|
||||
let session_context = datafusion::SessionContext::new_with_state(session_state);
|
||||
session_context
|
||||
.register_catalog("default", catalog as Arc<dyn datafusion::CatalogProvider>);
|
||||
session_context
|
||||
}
|
||||
}
|
354
v3/crates/sql/src/catalog/introspection.rs
Normal file
354
v3/crates/sql/src/catalog/introspection.rs
Normal file
@ -0,0 +1,354 @@
|
||||
//! Describe and populate the introspection tables used by data fusion.
|
||||
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use indexmap::IndexMap;
|
||||
use metadata_resolve::{self as resolved, ModelRelationshipTarget};
|
||||
mod df {
|
||||
pub(super) use datafusion::{
|
||||
arrow::{
|
||||
array::RecordBatch,
|
||||
datatypes::{DataType, Field, Schema, SchemaRef},
|
||||
},
|
||||
catalog::schema::SchemaProvider,
|
||||
common::ScalarValue,
|
||||
datasource::{TableProvider, TableType},
|
||||
error::Result,
|
||||
execution::context::SessionState,
|
||||
logical_expr::Expr,
|
||||
physical_plan::{values::ValuesExec, ExecutionPlan},
|
||||
};
|
||||
}
|
||||
use open_dds::relationships::RelationshipType;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
pub const HASURA_METADATA_SCHEMA: &str = "hasura";
|
||||
pub const TABLE_METADATA: &str = "table_metadata";
|
||||
pub const COLUMN_METADATA: &str = "column_metadata";
|
||||
pub const INFERRED_FOREIGN_KEY_CONSTRAINTS: &str = "inferred_foreign_key_constraints";
|
||||
|
||||
/// Describes the database schema structure and metadata.
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct Introspection {
|
||||
pub(crate) table_metadata: TableMetadata,
|
||||
pub(crate) column_metadata: ColumnMetadata,
|
||||
pub(crate) inferred_foreign_key_constraints: InferredForeignKeys,
|
||||
}
|
||||
|
||||
impl Introspection {
|
||||
/// Derive SQL schema from the Open DDS metadata.
|
||||
pub fn from_metadata(
|
||||
metadata: &resolved::Metadata,
|
||||
schemas: &IndexMap<String, crate::catalog::schema::Subgraph>,
|
||||
) -> Self {
|
||||
let mut table_metadata_rows = Vec::new();
|
||||
let mut column_metadata_rows = Vec::new();
|
||||
let mut foreign_key_constraint_rows = Vec::new();
|
||||
for (schema_name, schema) in schemas {
|
||||
for (table_name, table) in &schema.models {
|
||||
table_metadata_rows.push(TableRow::new(
|
||||
schema_name.clone(),
|
||||
table_name.to_string(),
|
||||
table.description.clone(),
|
||||
));
|
||||
for (column_name, column_description) in &table.columns {
|
||||
column_metadata_rows.push(ColumnRow {
|
||||
schema_name: schema_name.clone(),
|
||||
table_name: table_name.clone(),
|
||||
column_name: column_name.clone(),
|
||||
description: column_description.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
// TODO:
|
||||
// 1. Need to check if the target_model is part of subgraphs
|
||||
// 2. Need to also check for array relationships in case the corresponding
|
||||
// object relationship isn't present
|
||||
if let Some(object_type) = metadata.object_types.get(&table.data_type) {
|
||||
for relationship in object_type.relationship_fields.values() {
|
||||
if let metadata_resolve::RelationshipTarget::Model(
|
||||
ModelRelationshipTarget {
|
||||
model_name,
|
||||
relationship_type: RelationshipType::Object,
|
||||
target_typename: _,
|
||||
mappings,
|
||||
},
|
||||
) = &relationship.target
|
||||
{
|
||||
for mapping in mappings {
|
||||
foreign_key_constraint_rows.push(ForeignKeyRow {
|
||||
from_schema_name: schema_name.clone(),
|
||||
from_table_name: table_name.clone(),
|
||||
from_column_name: mapping.source_field.field_name.to_string(),
|
||||
to_schema_name: model_name.subgraph.clone(),
|
||||
to_table_name: model_name.name.to_string(),
|
||||
to_column_name: mapping.target_field.field_name.to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Introspection {
|
||||
table_metadata: TableMetadata::new(table_metadata_rows),
|
||||
column_metadata: ColumnMetadata::new(column_metadata_rows),
|
||||
inferred_foreign_key_constraints: InferredForeignKeys::new(foreign_key_constraint_rows),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct TableMetadata {
|
||||
schema: df::SchemaRef,
|
||||
rows: Vec<TableRow>,
|
||||
}
|
||||
|
||||
impl TableMetadata {
|
||||
pub(crate) fn new(rows: Vec<TableRow>) -> Self {
|
||||
let schema_name = df::Field::new("schema_name", df::DataType::Utf8, false);
|
||||
let table_name = df::Field::new("table_name", df::DataType::Utf8, false);
|
||||
let description = df::Field::new("description", df::DataType::Utf8, true);
|
||||
let schema =
|
||||
df::SchemaRef::new(df::Schema::new(vec![schema_name, table_name, description]));
|
||||
TableMetadata { schema, rows }
|
||||
}
|
||||
}
|
||||
|
||||
impl TableMetadata {
|
||||
fn to_values_table(&self) -> ValuesTable {
|
||||
ValuesTable {
|
||||
schema: self.schema.clone(),
|
||||
rows: self
|
||||
.rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
vec![
|
||||
df::ScalarValue::Utf8(Some(row.schema_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.table_name.clone())),
|
||||
df::ScalarValue::Utf8(row.description.clone()),
|
||||
]
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct TableRow {
|
||||
schema_name: String,
|
||||
table_name: String,
|
||||
description: Option<String>,
|
||||
}
|
||||
|
||||
impl TableRow {
|
||||
pub(crate) fn new(
|
||||
schema_name: String,
|
||||
table_name: String,
|
||||
description: Option<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
schema_name,
|
||||
table_name,
|
||||
description,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct ColumnMetadata {
|
||||
pub(crate) schema: df::SchemaRef,
|
||||
pub(crate) rows: Vec<ColumnRow>,
|
||||
}
|
||||
|
||||
impl ColumnMetadata {
|
||||
fn new(rows: Vec<ColumnRow>) -> Self {
|
||||
let schema_name = df::Field::new("schema_name", df::DataType::Utf8, false);
|
||||
let table_name = df::Field::new("table_name", df::DataType::Utf8, false);
|
||||
let column_name = df::Field::new("column_name", df::DataType::Utf8, false);
|
||||
let description = df::Field::new("description", df::DataType::Utf8, true);
|
||||
let schema = df::SchemaRef::new(df::Schema::new(vec![
|
||||
schema_name,
|
||||
table_name,
|
||||
column_name,
|
||||
description,
|
||||
]));
|
||||
ColumnMetadata { schema, rows }
|
||||
}
|
||||
fn to_values_table(&self) -> ValuesTable {
|
||||
ValuesTable {
|
||||
schema: self.schema.clone(),
|
||||
rows: self
|
||||
.rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
vec![
|
||||
df::ScalarValue::Utf8(Some(row.schema_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.table_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.column_name.clone())),
|
||||
df::ScalarValue::Utf8(row.description.clone()),
|
||||
]
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct ColumnRow {
|
||||
schema_name: String,
|
||||
table_name: String,
|
||||
column_name: String,
|
||||
description: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct InferredForeignKeys {
|
||||
schema: df::SchemaRef,
|
||||
rows: Vec<ForeignKeyRow>,
|
||||
}
|
||||
|
||||
impl InferredForeignKeys {
|
||||
fn new(rows: Vec<ForeignKeyRow>) -> Self {
|
||||
let from_schema_name = df::Field::new("from_schema_name", df::DataType::Utf8, false);
|
||||
let from_table_name = df::Field::new("from_table_name", df::DataType::Utf8, false);
|
||||
let from_column_name = df::Field::new("from_column_name", df::DataType::Utf8, false);
|
||||
let to_schema_name = df::Field::new("to_schema_name", df::DataType::Utf8, false);
|
||||
let to_table_name = df::Field::new("to_table_name", df::DataType::Utf8, false);
|
||||
let to_column_name = df::Field::new("to_column_name", df::DataType::Utf8, false);
|
||||
let schema = df::SchemaRef::new(df::Schema::new(vec![
|
||||
from_schema_name,
|
||||
from_table_name,
|
||||
from_column_name,
|
||||
to_schema_name,
|
||||
to_table_name,
|
||||
to_column_name,
|
||||
]));
|
||||
InferredForeignKeys { schema, rows }
|
||||
}
|
||||
fn to_values_table(&self) -> ValuesTable {
|
||||
ValuesTable {
|
||||
schema: self.schema.clone(),
|
||||
rows: self
|
||||
.rows
|
||||
.iter()
|
||||
.map(|row| {
|
||||
vec![
|
||||
df::ScalarValue::Utf8(Some(row.from_schema_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.from_table_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.from_column_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.to_schema_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.to_table_name.clone())),
|
||||
df::ScalarValue::Utf8(Some(row.to_column_name.clone())),
|
||||
]
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
struct ForeignKeyRow {
|
||||
from_schema_name: String,
|
||||
from_table_name: String,
|
||||
from_column_name: String,
|
||||
to_schema_name: String,
|
||||
to_table_name: String,
|
||||
to_column_name: String,
|
||||
}
|
||||
|
||||
pub(crate) struct IntrospectionSchemaProvider {
|
||||
tables: IndexMap<String, Arc<dyn df::TableProvider>>,
|
||||
}
|
||||
|
||||
impl IntrospectionSchemaProvider {
|
||||
pub(crate) fn new(introspection: &Introspection) -> Self {
|
||||
let tables = [
|
||||
(
|
||||
TABLE_METADATA,
|
||||
introspection.table_metadata.to_values_table(),
|
||||
),
|
||||
(
|
||||
COLUMN_METADATA,
|
||||
introspection.column_metadata.to_values_table(),
|
||||
),
|
||||
(
|
||||
INFERRED_FOREIGN_KEY_CONSTRAINTS,
|
||||
introspection
|
||||
.inferred_foreign_key_constraints
|
||||
.to_values_table(),
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.map(|(k, table)| (k.to_string(), Arc::new(table) as Arc<dyn df::TableProvider>))
|
||||
.collect();
|
||||
IntrospectionSchemaProvider { tables }
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl df::SchemaProvider for IntrospectionSchemaProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Vec<String> {
|
||||
self.tables.keys().cloned().collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
async fn table(
|
||||
&self,
|
||||
name: &str,
|
||||
) -> datafusion::error::Result<Option<Arc<dyn df::TableProvider>>> {
|
||||
Ok(self.tables.get(name).cloned())
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> bool {
|
||||
self.tables.contains_key(name)
|
||||
}
|
||||
}
|
||||
|
||||
// A table with static rows
|
||||
struct ValuesTable {
|
||||
schema: df::SchemaRef,
|
||||
rows: Vec<Vec<df::ScalarValue>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl df::TableProvider for ValuesTable {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> df::SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn table_type(&self) -> df::TableType {
|
||||
df::TableType::View
|
||||
}
|
||||
async fn scan(
|
||||
&self,
|
||||
_state: &df::SessionState,
|
||||
projection: Option<&Vec<usize>>,
|
||||
// filters and limit can be used here to inject some push-down operations if needed
|
||||
_filters: &[df::Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> datafusion::error::Result<Arc<dyn df::ExecutionPlan>> {
|
||||
let projected_schema = Arc::new(self.schema.project(projection.unwrap_or(&vec![]))?);
|
||||
let columnar_projection = projection
|
||||
.unwrap_or(&vec![])
|
||||
.iter()
|
||||
.map(|j| self.rows.iter().map(|row| row[*j].clone()))
|
||||
.map(df::ScalarValue::iter_to_array)
|
||||
.collect::<df::Result<Vec<_>>>()?;
|
||||
Ok(Arc::new(df::ValuesExec::try_new_from_batches(
|
||||
projected_schema.clone(),
|
||||
vec![df::RecordBatch::try_new(
|
||||
projected_schema,
|
||||
columnar_projection,
|
||||
)?],
|
||||
)?))
|
||||
}
|
||||
}
|
44
v3/crates/sql/src/catalog/schema.rs
Normal file
44
v3/crates/sql/src/catalog/schema.rs
Normal file
@ -0,0 +1,44 @@
|
||||
use async_trait::async_trait;
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use indexmap::IndexMap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
mod df {
|
||||
pub(super) use datafusion::error::Result;
|
||||
pub(super) use datafusion::{catalog::schema::SchemaProvider, datasource::TableProvider};
|
||||
}
|
||||
|
||||
use crate::catalog;
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct Subgraph {
|
||||
pub models: IndexMap<String, catalog::table::Model>,
|
||||
}
|
||||
|
||||
pub struct OpenDDSchemaProvider {
|
||||
pub(crate) tables: IndexMap<String, Arc<catalog::table::OpenDDTableProvider>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl df::SchemaProvider for OpenDDSchemaProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn table_names(&self) -> Vec<String> {
|
||||
self.tables.keys().cloned().collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
async fn table(&self, name: &str) -> df::Result<Option<Arc<dyn df::TableProvider>>> {
|
||||
Ok(self
|
||||
.tables
|
||||
.get(name)
|
||||
.cloned()
|
||||
.map(|table| table as Arc<dyn df::TableProvider>))
|
||||
}
|
||||
|
||||
fn table_exist(&self, name: &str) -> bool {
|
||||
self.tables.contains_key(name)
|
||||
}
|
||||
}
|
256
v3/crates/sql/src/catalog/table.rs
Normal file
256
v3/crates/sql/src/catalog/table.rs
Normal file
@ -0,0 +1,256 @@
|
||||
//! Describe a model for a SQL table and how to translate datafusion operations on the table
|
||||
//! to ndc-spec queries.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use datafusion::common::internal_err;
|
||||
use hasura_authn_core::Session;
|
||||
use indexmap::IndexMap;
|
||||
use metadata_resolve::{self as resolved, Qualified, SelectPermission};
|
||||
use open_dds::permissions::Role;
|
||||
use open_dds::{
|
||||
models::ModelName,
|
||||
types::{CustomTypeName, FieldName},
|
||||
};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
mod df {
|
||||
pub(super) use datafusion::arrow::datatypes::Field;
|
||||
pub(super) use datafusion::{
|
||||
arrow::datatypes::{DataType, Schema, SchemaBuilder, SchemaRef},
|
||||
datasource::{TableProvider, TableType},
|
||||
execution::context::SessionState,
|
||||
logical_expr::Expr,
|
||||
physical_plan::ExecutionPlan,
|
||||
};
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct TypePermission {
|
||||
pub output: open_dds::permissions::TypeOutputPermission,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct TypePermissionsOfRole {
|
||||
pub(crate) permissions: HashMap<Qualified<CustomTypeName>, TypePermission>,
|
||||
}
|
||||
|
||||
fn get_type_representation<'a>(
|
||||
model: &'a resolved::Model,
|
||||
field: &FieldName,
|
||||
) -> Option<&'a ndc_models::TypeRepresentation> {
|
||||
model
|
||||
.source
|
||||
.as_ref()
|
||||
.and_then(|source| {
|
||||
source
|
||||
.type_mappings
|
||||
.get(&model.data_type)
|
||||
.map(|type_mapping| {
|
||||
let resolved::TypeMapping::Object { field_mappings, .. } = type_mapping;
|
||||
field_mappings
|
||||
.get(field)
|
||||
.map(|mapping| mapping.column_type_representation.as_ref())
|
||||
})
|
||||
})
|
||||
.flatten()
|
||||
.flatten()
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
|
||||
pub(crate) struct Model {
|
||||
pub name: ModelName,
|
||||
|
||||
pub description: Option<String>,
|
||||
|
||||
// Datafusion table schema
|
||||
pub schema: df::SchemaRef,
|
||||
|
||||
// For now, descriptions of fields
|
||||
pub columns: IndexMap<String, Option<String>>,
|
||||
|
||||
// This is the entry point for the type mappings stored
|
||||
// in ModelSource
|
||||
pub data_type: Qualified<CustomTypeName>,
|
||||
|
||||
// The underlying source to execute ndc queries
|
||||
pub source: Option<Arc<metadata_resolve::ModelSource>>,
|
||||
|
||||
// Permisisons for the model. Note that the type permissions will need to be retrieved from the
|
||||
// global context
|
||||
pub permissions: HashMap<Role, Arc<resolved::SelectPermission>>,
|
||||
}
|
||||
|
||||
impl Model {
|
||||
pub fn from_resolved_model(model: &resolved::ModelWithPermissions) -> Self {
|
||||
let (schema, columns) = {
|
||||
let mut columns = IndexMap::new();
|
||||
let mut builder = df::SchemaBuilder::new();
|
||||
for (field_name, field_definition) in &model.model.type_fields {
|
||||
let ndc_type_representation = get_type_representation(&model.model, field_name);
|
||||
let field_type =
|
||||
to_arrow_type(&field_definition.field_type, ndc_type_representation);
|
||||
if let Some(field_type) = field_type {
|
||||
builder.push(df::Field::new(
|
||||
field_name.to_string(),
|
||||
field_type,
|
||||
field_definition.field_type.nullable,
|
||||
));
|
||||
let description = if let Some(ndc_models::TypeRepresentation::Enum { one_of }) =
|
||||
ndc_type_representation
|
||||
{
|
||||
// TODO: Instead of stuffing the possible enum values in description,
|
||||
// surface them in the metadata tables.
|
||||
Some(
|
||||
field_definition
|
||||
.description
|
||||
.clone()
|
||||
.unwrap_or_else(String::new)
|
||||
+ &format!(" Possible values: {}", one_of.join(", ")),
|
||||
)
|
||||
} else {
|
||||
field_definition.description.clone()
|
||||
};
|
||||
columns.insert(field_name.to_string(), description);
|
||||
}
|
||||
}
|
||||
let fields = builder.finish().fields;
|
||||
(df::SchemaRef::new(df::Schema::new(fields)), columns)
|
||||
};
|
||||
|
||||
let permissions = model
|
||||
.select_permissions
|
||||
.iter()
|
||||
.map(|(role, select_permission)| (role.clone(), Arc::new(select_permission.clone())))
|
||||
.collect();
|
||||
|
||||
Model {
|
||||
name: model.model.name.name.clone(),
|
||||
description: model.model.raw.description.clone(),
|
||||
schema,
|
||||
data_type: model.model.data_type.clone(),
|
||||
source: model
|
||||
.model
|
||||
.source
|
||||
.as_ref()
|
||||
.map(|source| Arc::new(source.clone())),
|
||||
columns,
|
||||
permissions,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts an opendd type to an arrow type.
|
||||
/// TODO: need to handle complex types
|
||||
#[allow(clippy::match_same_arms)]
|
||||
fn to_arrow_type(
|
||||
ty: &resolved::QualifiedTypeReference,
|
||||
ndc_type_representation: Option<&ndc_models::TypeRepresentation>,
|
||||
) -> Option<df::DataType> {
|
||||
match &ty.underlying_type {
|
||||
resolved::QualifiedBaseType::Named(resolved::QualifiedTypeName::Inbuilt(inbuilt_type)) => {
|
||||
let data_type = match inbuilt_type {
|
||||
open_dds::types::InbuiltType::ID => df::DataType::Utf8,
|
||||
open_dds::types::InbuiltType::Int => df::DataType::Int32,
|
||||
open_dds::types::InbuiltType::Float => df::DataType::Float32,
|
||||
open_dds::types::InbuiltType::Boolean => df::DataType::Boolean,
|
||||
open_dds::types::InbuiltType::String => df::DataType::Utf8,
|
||||
};
|
||||
Some(data_type)
|
||||
}
|
||||
resolved::QualifiedBaseType::Named(resolved::QualifiedTypeName::Custom(custom_type)) => {
|
||||
if let Some(type_representation) = ndc_type_representation {
|
||||
match type_representation {
|
||||
ndc_models::TypeRepresentation::Boolean => Some(df::DataType::Boolean),
|
||||
ndc_models::TypeRepresentation::String => Some(df::DataType::Utf8),
|
||||
ndc_models::TypeRepresentation::Int8 => Some(df::DataType::Int8),
|
||||
ndc_models::TypeRepresentation::Int16 => Some(df::DataType::Int16),
|
||||
ndc_models::TypeRepresentation::Int32 => Some(df::DataType::Int32),
|
||||
ndc_models::TypeRepresentation::Int64 => Some(df::DataType::Int64),
|
||||
ndc_models::TypeRepresentation::Float32 => Some(df::DataType::Float32),
|
||||
ndc_models::TypeRepresentation::Float64 => Some(df::DataType::Float64),
|
||||
// Can't do anything better for BigInteger, so we just use String.
|
||||
ndc_models::TypeRepresentation::BigInteger => Some(df::DataType::Utf8),
|
||||
// BigDecimal128 is not supported by arrow.
|
||||
ndc_models::TypeRepresentation::BigDecimal => Some(df::DataType::Float64),
|
||||
ndc_models::TypeRepresentation::UUID => Some(df::DataType::Utf8),
|
||||
ndc_models::TypeRepresentation::Date => Some(df::DataType::Date32),
|
||||
ndc_models::TypeRepresentation::Timestamp => Some(df::DataType::Timestamp(
|
||||
datafusion::arrow::datatypes::TimeUnit::Microsecond,
|
||||
None,
|
||||
)),
|
||||
ndc_models::TypeRepresentation::TimestampTZ => Some(df::DataType::Timestamp(
|
||||
datafusion::arrow::datatypes::TimeUnit::Microsecond,
|
||||
None,
|
||||
)),
|
||||
ndc_models::TypeRepresentation::Enum { .. } => Some(df::DataType::Utf8),
|
||||
_ => None,
|
||||
}
|
||||
} else {
|
||||
match custom_type.name.to_string().to_lowercase().as_str() {
|
||||
"bool" => Some(df::DataType::Boolean),
|
||||
"int8" => Some(df::DataType::Int8),
|
||||
"int16" => Some(df::DataType::Int16),
|
||||
"int32" => Some(df::DataType::Int32),
|
||||
"int64" => Some(df::DataType::Int64),
|
||||
"float32" => Some(df::DataType::Float32),
|
||||
"float64" => Some(df::DataType::Float64),
|
||||
"varchar" => Some(df::DataType::Utf8),
|
||||
"text" => Some(df::DataType::Utf8),
|
||||
"timestamp" => Some(df::DataType::Timestamp(
|
||||
datafusion::arrow::datatypes::TimeUnit::Microsecond,
|
||||
None,
|
||||
)),
|
||||
"timestamptz" => Some(df::DataType::Timestamp(
|
||||
datafusion::arrow::datatypes::TimeUnit::Microsecond,
|
||||
None,
|
||||
)),
|
||||
// BigDecimal128 is not supported by arrow.
|
||||
"bigdecimal" => Some(df::DataType::Float64),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
resolved::QualifiedBaseType::List(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct OpenDDTableProvider {
|
||||
pub(crate) session: Arc<Session>,
|
||||
pub(crate) http_context: Arc<execute::HttpContext>,
|
||||
pub(crate) name: ModelName,
|
||||
pub(crate) data_type: Qualified<CustomTypeName>,
|
||||
pub(crate) source: Option<Arc<metadata_resolve::ModelSource>>,
|
||||
pub(crate) schema: df::SchemaRef,
|
||||
pub(crate) select_permission: Option<Arc<SelectPermission>>,
|
||||
pub(crate) type_permissions: Option<Arc<TypePermissionsOfRole>>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl df::TableProvider for OpenDDTableProvider {
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn schema(&self) -> df::SchemaRef {
|
||||
self.schema.clone()
|
||||
}
|
||||
|
||||
fn table_type(&self) -> df::TableType {
|
||||
df::TableType::Base
|
||||
}
|
||||
|
||||
async fn scan(
|
||||
&self,
|
||||
_state: &df::SessionState,
|
||||
_projection: Option<&Vec<usize>>,
|
||||
// filters and limit can be used here to inject some push-down operations if needed
|
||||
_filters: &[df::Expr],
|
||||
_limit: Option<usize>,
|
||||
) -> datafusion::error::Result<Arc<dyn df::ExecutionPlan>> {
|
||||
internal_err!("scan shouldn't be called")
|
||||
}
|
||||
}
|
158
v3/crates/sql/src/execute.rs
Normal file
158
v3/crates/sql/src/execute.rs
Normal file
@ -0,0 +1,158 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::{
|
||||
arrow::{array::RecordBatch, error::ArrowError, json::writer::JsonArray, json::WriterBuilder},
|
||||
dataframe::DataFrame,
|
||||
error::DataFusionError,
|
||||
};
|
||||
use hasura_authn_core::Session;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use thiserror::Error;
|
||||
|
||||
use tracing_util::{ErrorVisibility, SpanVisibility, Successful, TraceableError};
|
||||
|
||||
pub use datafusion::execution::context::SessionContext;
|
||||
|
||||
pub(crate) mod analyzer;
|
||||
pub(crate) mod optimizer;
|
||||
pub(crate) mod planner;
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct SqlRequest {
|
||||
sql: String,
|
||||
}
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
pub enum SqlExecutionError {
|
||||
#[error("error in data fusion: {0}")]
|
||||
DataFusion(String),
|
||||
#[error("error in encoding data: {0}")]
|
||||
Arrow(String),
|
||||
}
|
||||
|
||||
impl From<DataFusionError> for SqlExecutionError {
|
||||
fn from(e: DataFusionError) -> Self {
|
||||
Self::DataFusion(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ArrowError> for SqlExecutionError {
|
||||
fn from(e: ArrowError) -> Self {
|
||||
Self::Arrow(e.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl TraceableError for SqlExecutionError {
|
||||
fn visibility(&self) -> ErrorVisibility {
|
||||
ErrorVisibility::User
|
||||
}
|
||||
}
|
||||
|
||||
/// Executes an SQL Request using the Apache DataFusion query engine.
|
||||
pub async fn execute_sql(
|
||||
context: &crate::catalog::Context,
|
||||
session: Arc<Session>,
|
||||
http_context: Arc<execute::HttpContext>,
|
||||
request: &SqlRequest,
|
||||
) -> Result<Vec<u8>, SqlExecutionError> {
|
||||
let tracer = tracing_util::global_tracer();
|
||||
let session_context = tracer
|
||||
.in_span(
|
||||
"create_session_context",
|
||||
"Create a datafusion SessionContext",
|
||||
SpanVisibility::Internal,
|
||||
|| {
|
||||
let session = context.create_session_context(&session, &http_context);
|
||||
Successful::new(session)
|
||||
},
|
||||
)
|
||||
.into_inner();
|
||||
let data_frame = tracer
|
||||
.in_span_async(
|
||||
"create_logical_plan",
|
||||
"Creates a Logical Plan for the given SQL statement",
|
||||
SpanVisibility::User,
|
||||
|| {
|
||||
Box::pin(async {
|
||||
session_context
|
||||
.sql(&request.sql)
|
||||
.await
|
||||
.map_err(|e| SqlExecutionError::DataFusion(e.to_string()))
|
||||
})
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
let batches = tracer
|
||||
.in_span_async(
|
||||
"execute_logical_plan",
|
||||
"Executes the Logical Plan of a query",
|
||||
SpanVisibility::User,
|
||||
|| Box::pin(async { execute_logical_plan(data_frame).await }),
|
||||
)
|
||||
.await?;
|
||||
tracer.in_span(
|
||||
"serialize_record_batch",
|
||||
"Serializes datafusion's RecordBatch into a JSON array",
|
||||
SpanVisibility::User,
|
||||
|| record_batches_to_json_array(&batches),
|
||||
)
|
||||
}
|
||||
|
||||
async fn execute_logical_plan(frame: DataFrame) -> Result<Vec<RecordBatch>, SqlExecutionError> {
|
||||
let tracer = tracing_util::global_tracer();
|
||||
let task_ctx = frame.task_ctx();
|
||||
let session_config = task_ctx.session_config().clone();
|
||||
let plan = tracer
|
||||
.in_span_async(
|
||||
"create_physical_plan",
|
||||
"Creates a physical plan from a logical plan",
|
||||
SpanVisibility::User,
|
||||
|| {
|
||||
Box::pin(async {
|
||||
frame
|
||||
.create_physical_plan()
|
||||
.await
|
||||
.map_err(|e| SqlExecutionError::DataFusion(e.to_string()))
|
||||
})
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
let record_batches = tracer
|
||||
.in_span_async(
|
||||
"execute_physical_plan",
|
||||
"Executes a physical plan to collect record batches",
|
||||
SpanVisibility::User,
|
||||
|| {
|
||||
let task_ctx = Arc::new(task_ctx.with_session_config(
|
||||
session_config.with_extension(Arc::new(tracing_util::Context::current())),
|
||||
));
|
||||
Box::pin(async {
|
||||
datafusion::physical_plan::collect(plan, task_ctx)
|
||||
.await
|
||||
.map_err(|e| SqlExecutionError::DataFusion(e.to_string()))
|
||||
})
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
Ok(record_batches)
|
||||
}
|
||||
|
||||
fn record_batches_to_json_array(batches: &[RecordBatch]) -> Result<Vec<u8>, SqlExecutionError> {
|
||||
if batches.is_empty() {
|
||||
return Ok(vec![b'[', b']']);
|
||||
}
|
||||
// Write the record batch out as a JSON array
|
||||
let buf = Vec::new();
|
||||
|
||||
let builder = WriterBuilder::new().with_explicit_nulls(true);
|
||||
let mut writer = builder.build::<_, JsonArray>(buf);
|
||||
|
||||
for batch in batches {
|
||||
writer.write(batch)?;
|
||||
}
|
||||
writer.finish()?;
|
||||
|
||||
// Get the underlying buffer back,
|
||||
Ok(writer.into_inner())
|
||||
}
|
188
v3/crates/sql/src/execute/analyzer.rs
Normal file
188
v3/crates/sql/src/execute/analyzer.rs
Normal file
@ -0,0 +1,188 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
//! Analyzed rule to replace TableScan references
|
||||
//! such as DataFrames and Views and inlines the LogicalPlan.
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::{
|
||||
common::{
|
||||
config::ConfigOptions,
|
||||
tree_node::{Transformed, TransformedResult, TreeNode},
|
||||
Result,
|
||||
},
|
||||
error::DataFusionError,
|
||||
logical_expr::{logical_plan::LogicalPlan, Extension, TableScan},
|
||||
optimizer::AnalyzerRule,
|
||||
};
|
||||
use indexmap::IndexMap;
|
||||
use metadata_resolve::{self as resolved};
|
||||
use open_dds::identifier::Identifier;
|
||||
use open_dds::types::FieldName;
|
||||
|
||||
use crate::plan::NDCQuery;
|
||||
|
||||
/// Analyzed rule that inlines TableScan that provide a [`LogicalPlan`]
|
||||
/// (DataFrame / ViewTable)
|
||||
pub struct ReplaceTableScan {
|
||||
default_schema: Option<Arc<String>>,
|
||||
catalog: Arc<crate::catalog::OpenDDCatalogProvider>,
|
||||
}
|
||||
|
||||
impl ReplaceTableScan {
|
||||
pub fn new(
|
||||
default_schema: Option<Arc<String>>,
|
||||
catalog: Arc<crate::catalog::OpenDDCatalogProvider>,
|
||||
) -> Self {
|
||||
Self {
|
||||
default_schema,
|
||||
catalog,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AnalyzerRule for ReplaceTableScan {
|
||||
fn analyze(&self, plan: LogicalPlan, _: &ConfigOptions) -> Result<LogicalPlan> {
|
||||
plan.transform_up(|n| {
|
||||
analyze_internal(
|
||||
self.default_schema.as_ref().map(|x| x.as_str()),
|
||||
&self.catalog,
|
||||
n,
|
||||
)
|
||||
})
|
||||
.data()
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"replace_table_scan_with_ndc_query"
|
||||
}
|
||||
}
|
||||
|
||||
fn analyze_internal(
|
||||
default_schema: Option<&str>,
|
||||
catalog: &crate::catalog::OpenDDCatalogProvider,
|
||||
plan: LogicalPlan,
|
||||
) -> Result<Transformed<LogicalPlan>> {
|
||||
// rewrite any subqueries in the plan first
|
||||
let transformed_plan = plan.map_subqueries(|plan| {
|
||||
plan.transform_up(|n| analyze_internal(default_schema, catalog, n))
|
||||
})?;
|
||||
|
||||
let transformed_plan = transformed_plan.transform_data(|plan| match plan {
|
||||
LogicalPlan::TableScan(TableScan {
|
||||
table_name,
|
||||
source: _,
|
||||
projection: _,
|
||||
projected_schema,
|
||||
filters: _,
|
||||
fetch: _,
|
||||
}) if table_name.schema() != Some("hasura") => {
|
||||
let table = catalog.get(default_schema, &table_name).ok_or_else(|| {
|
||||
DataFusionError::Internal(format!(
|
||||
"table provider not found for replace_table_scan: {table_name}"
|
||||
))
|
||||
})?;
|
||||
let model_source = table.source.as_ref().ok_or_else(|| {
|
||||
DataFusionError::Plan(format!(
|
||||
"model source should be configured for {}",
|
||||
table.name
|
||||
))
|
||||
})?;
|
||||
let mut ndc_fields = IndexMap::new();
|
||||
|
||||
let base_type_fields = {
|
||||
let base_type_mapping = model_source
|
||||
.type_mappings
|
||||
.get(&table.data_type)
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Internal(format!(
|
||||
"couldn't fetch type_mapping of type {} for model {}",
|
||||
table.data_type, table.name
|
||||
))
|
||||
})?;
|
||||
match base_type_mapping {
|
||||
resolved::TypeMapping::Object {
|
||||
ndc_object_type_name: _,
|
||||
field_mappings,
|
||||
} => field_mappings,
|
||||
}
|
||||
};
|
||||
for field in projected_schema.fields() {
|
||||
let field_name = {
|
||||
let field_name = Identifier::new(field.name().clone()).map_err(|e| {
|
||||
DataFusionError::Internal(format!(
|
||||
"field name conversion failed {}: {}",
|
||||
field.name(),
|
||||
e
|
||||
))
|
||||
})?;
|
||||
FieldName(field_name)
|
||||
};
|
||||
let ndc_field = {
|
||||
base_type_fields
|
||||
.get(&field_name)
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Internal(format!(
|
||||
"couldn't fetch field mapping of field {} in type {} for model {}",
|
||||
field_name, table.data_type, table.name
|
||||
))
|
||||
})
|
||||
.map(|field_mapping| field_mapping.column.clone())
|
||||
}?;
|
||||
ndc_fields.insert(
|
||||
field.name().clone(),
|
||||
ndc_models::Field::Column {
|
||||
column: ndc_field.to_string(),
|
||||
fields: None,
|
||||
arguments: BTreeMap::new(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
let ndc_query = ndc_models::Query {
|
||||
aggregates: None,
|
||||
fields: Some(ndc_fields),
|
||||
limit: None,
|
||||
offset: None,
|
||||
order_by: None,
|
||||
predicate: None,
|
||||
};
|
||||
|
||||
let query_request = ndc_models::QueryRequest {
|
||||
query: ndc_query,
|
||||
collection: model_source.collection.clone(),
|
||||
arguments: BTreeMap::new(),
|
||||
collection_relationships: BTreeMap::new(),
|
||||
variables: None,
|
||||
};
|
||||
let ndc_query_node = NDCQuery {
|
||||
table: table_name.clone(),
|
||||
query: query_request,
|
||||
data_source_name: Arc::new(model_source.collection.clone()),
|
||||
schema: projected_schema,
|
||||
};
|
||||
Ok(Transformed::yes(LogicalPlan::Extension(Extension {
|
||||
node: Arc::new(ndc_query_node),
|
||||
})))
|
||||
}
|
||||
_ => Ok(Transformed::no(plan)),
|
||||
})?;
|
||||
|
||||
Ok(transformed_plan)
|
||||
}
|
3
v3/crates/sql/src/execute/optimizer.rs
Normal file
3
v3/crates/sql/src/execute/optimizer.rs
Normal file
@ -0,0 +1,3 @@
|
||||
mod projection_pushdown;
|
||||
|
||||
pub(crate) use projection_pushdown::NDCPushDownProjection;
|
71
v3/crates/sql/src/execute/optimizer/projection_pushdown.rs
Normal file
71
v3/crates/sql/src/execute/optimizer/projection_pushdown.rs
Normal file
@ -0,0 +1,71 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use datafusion::{
|
||||
common::{internal_err, tree_node::Transformed},
|
||||
error::Result,
|
||||
logical_expr::{Expr, Extension, LogicalPlan},
|
||||
optimizer::{optimizer::ApplyOrder, OptimizerConfig, OptimizerRule},
|
||||
};
|
||||
|
||||
pub(crate) struct NDCPushDownProjection {}
|
||||
|
||||
impl OptimizerRule for NDCPushDownProjection {
|
||||
fn try_optimize(
|
||||
&self,
|
||||
_plan: &LogicalPlan,
|
||||
_config: &dyn OptimizerConfig,
|
||||
) -> Result<Option<LogicalPlan>> {
|
||||
internal_err!("Should have called NDCPushDownProjection::rewrite")
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"ndc_pushdown_projection"
|
||||
}
|
||||
fn apply_order(&self) -> Option<ApplyOrder> {
|
||||
Some(ApplyOrder::BottomUp)
|
||||
}
|
||||
|
||||
fn supports_rewrite(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn rewrite(
|
||||
&self,
|
||||
plan: LogicalPlan,
|
||||
_config: &dyn OptimizerConfig,
|
||||
) -> Result<Transformed<LogicalPlan>> {
|
||||
if let Some((projections, projected_schema, ndc_query)) = {
|
||||
match plan {
|
||||
LogicalPlan::Projection(ref projection) => match projection.input.as_ref() {
|
||||
LogicalPlan::Extension(Extension { node }) => node
|
||||
.as_ref()
|
||||
.as_any()
|
||||
.downcast_ref::<crate::plan::NDCQuery>()
|
||||
.map(|ndc_query| (&projection.expr, &projection.schema, ndc_query.clone())),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
} {
|
||||
let projected_columns = projections_to_columns(projections)?;
|
||||
let projected_query =
|
||||
ndc_query.project(projected_schema.clone(), &projected_columns)?;
|
||||
let plan = LogicalPlan::Extension(Extension {
|
||||
node: Arc::new(projected_query),
|
||||
});
|
||||
Ok(Transformed::yes(plan))
|
||||
} else {
|
||||
Ok(Transformed::no(plan))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn projections_to_columns(projections: &[Expr]) -> Result<Vec<String>> {
|
||||
projections
|
||||
.iter()
|
||||
.map(|expr| match expr {
|
||||
Expr::Column(column) => Ok(column.name.clone()),
|
||||
_ => internal_err!("non-column found in projection of ndcscan: {}", expr),
|
||||
})
|
||||
.collect()
|
||||
}
|
182
v3/crates/sql/src/execute/planner.rs
Normal file
182
v3/crates/sql/src/execute/planner.rs
Normal file
@ -0,0 +1,182 @@
|
||||
use std::{collections::BTreeMap, sync::Arc};
|
||||
|
||||
use datafusion::{
|
||||
error::{DataFusionError, Result},
|
||||
execution::context::{QueryPlanner, SessionState},
|
||||
logical_expr::{LogicalPlan, UserDefinedLogicalNode},
|
||||
physical_plan::ExecutionPlan,
|
||||
physical_planner::{DefaultPhysicalPlanner, ExtensionPlanner, PhysicalPlanner},
|
||||
};
|
||||
|
||||
use execute::process_model_relationship_definition;
|
||||
use indexmap::IndexMap;
|
||||
use metadata_resolve::FilterPermission;
|
||||
use open_dds::identifier::Identifier;
|
||||
use open_dds::types::FieldName;
|
||||
|
||||
use crate::plan::NDCPushDown;
|
||||
|
||||
use async_trait::async_trait;
|
||||
|
||||
pub(crate) struct NDCQueryPlanner {
|
||||
pub(crate) default_schema: Option<Arc<String>>,
|
||||
pub(crate) catalog: Arc<crate::catalog::OpenDDCatalogProvider>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl QueryPlanner for NDCQueryPlanner {
|
||||
/// Given a `LogicalPlan` created from above, create an
|
||||
/// `ExecutionPlan` suitable for execution
|
||||
async fn create_physical_plan(
|
||||
&self,
|
||||
logical_plan: &LogicalPlan,
|
||||
session_state: &SessionState,
|
||||
) -> Result<Arc<dyn ExecutionPlan>> {
|
||||
// Teach the default physical planner how to plan TopK nodes.
|
||||
let physical_planner =
|
||||
DefaultPhysicalPlanner::with_extension_planners(vec![Arc::new(NDCPushDownPlanner {
|
||||
default_schema: self.default_schema.clone(),
|
||||
catalog: self.catalog.clone(),
|
||||
})]);
|
||||
// Delegate most work of physical planning to the default physical planner
|
||||
physical_planner
|
||||
.create_physical_plan(logical_plan, session_state)
|
||||
.await
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct NDCPushDownPlanner {
|
||||
pub(crate) default_schema: Option<Arc<String>>,
|
||||
pub(crate) catalog: Arc<crate::catalog::OpenDDCatalogProvider>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ExtensionPlanner for NDCPushDownPlanner {
|
||||
/// Create a physical plan for an extension node
|
||||
async fn plan_extension(
|
||||
&self,
|
||||
_planner: &dyn PhysicalPlanner,
|
||||
node: &dyn UserDefinedLogicalNode,
|
||||
logical_inputs: &[&LogicalPlan],
|
||||
physical_inputs: &[Arc<dyn ExecutionPlan>],
|
||||
_session_state: &SessionState,
|
||||
) -> Result<Option<Arc<dyn ExecutionPlan>>> {
|
||||
if let Some(ndc_node) = node.as_any().downcast_ref::<crate::plan::NDCQuery>() {
|
||||
assert_eq!(logical_inputs.len(), 0, "Inconsistent number of inputs");
|
||||
assert_eq!(physical_inputs.len(), 0, "Inconsistent number of inputs");
|
||||
let table = self
|
||||
.catalog
|
||||
.get(
|
||||
self.default_schema.as_ref().map(|s| s.as_str()),
|
||||
&ndc_node.table,
|
||||
)
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Internal(format!(
|
||||
"table provider not found for replace_table_scan: {}",
|
||||
&ndc_node.table
|
||||
))
|
||||
})?;
|
||||
let model_source = table.source.as_ref().ok_or_else(|| {
|
||||
DataFusionError::Plan(format!(
|
||||
"model source should be configured for {}",
|
||||
table.name
|
||||
))
|
||||
})?;
|
||||
let select_permission = table.select_permission.as_ref().ok_or_else(|| {
|
||||
DataFusionError::Plan(format!(
|
||||
"role {} does not have select permission for model {}",
|
||||
table.session.role, table.name
|
||||
))
|
||||
})?;
|
||||
let type_permissions = table.type_permissions.as_ref().ok_or_else(|| {
|
||||
DataFusionError::Plan(format!(
|
||||
"role {} does not have permission to select any fields of model {}",
|
||||
table.session.role, table.name
|
||||
))
|
||||
})?;
|
||||
let base_type_allowed_fields = &type_permissions
|
||||
.permissions
|
||||
.get(&table.data_type)
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Plan(format!(
|
||||
"role {} has permission to select model {} but does not have permission \
|
||||
to select fields of the model's underlying type {}",
|
||||
table.session.role, table.name, table.data_type
|
||||
))
|
||||
})?
|
||||
.output
|
||||
.allowed_fields;
|
||||
for (field_name, _field) in ndc_node
|
||||
.query
|
||||
.query
|
||||
.fields
|
||||
.as_ref()
|
||||
.unwrap_or(&IndexMap::new())
|
||||
{
|
||||
let field_name = {
|
||||
let field_name = Identifier::new(field_name.clone()).map_err(|e| {
|
||||
DataFusionError::Internal(format!(
|
||||
"field name conversion failed {field_name}: {e}"
|
||||
))
|
||||
})?;
|
||||
FieldName(field_name)
|
||||
};
|
||||
if base_type_allowed_fields.contains(&field_name) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(DataFusionError::Plan(format!(
|
||||
"role {} does not have permission to select the field {} from type {} of model {}",
|
||||
table.session.role, field_name, table.data_type, table.name
|
||||
)))
|
||||
}?;
|
||||
}
|
||||
|
||||
let mut usage_counts = execute::model_tracking::UsagesCounts::default();
|
||||
let mut relationships = BTreeMap::new();
|
||||
|
||||
let permission_filter = match &select_permission.filter {
|
||||
FilterPermission::AllowAll => Ok(ndc_models::Expression::And {
|
||||
expressions: vec![],
|
||||
}),
|
||||
FilterPermission::Filter(filter) => {
|
||||
execute::ir::permissions::process_model_predicate(
|
||||
filter,
|
||||
&table.session.variables,
|
||||
&mut relationships,
|
||||
&mut usage_counts,
|
||||
)
|
||||
.map_err(|e| {
|
||||
DataFusionError::Internal(format!(
|
||||
"error when processing model predicate: {e}"
|
||||
))
|
||||
})
|
||||
}
|
||||
}?;
|
||||
|
||||
let relationships = relationships
|
||||
.into_values()
|
||||
.map(|v| {
|
||||
process_model_relationship_definition(&v)
|
||||
.map(|r| (v.relationship_name.to_string(), r))
|
||||
.map_err(|e| {
|
||||
DataFusionError::Internal(format!(
|
||||
"error constructing ndc relationship definition: {e}"
|
||||
))
|
||||
})
|
||||
})
|
||||
.collect::<Result<BTreeMap<String, ndc_models::Relationship>, DataFusionError>>()?;
|
||||
let mut query = ndc_node.query.clone();
|
||||
query.query.predicate = Some(permission_filter);
|
||||
query.collection_relationships = relationships;
|
||||
let ndc_pushdown = NDCPushDown::new(
|
||||
table.http_context.clone(),
|
||||
ndc_node.schema.inner().clone(),
|
||||
Arc::new(query),
|
||||
Arc::new(model_source.data_connector.clone()),
|
||||
);
|
||||
Ok(Some(Arc::new(ndc_pushdown)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
}
|
3
v3/crates/sql/src/lib.rs
Normal file
3
v3/crates/sql/src/lib.rs
Normal file
@ -0,0 +1,3 @@
|
||||
pub mod catalog;
|
||||
pub mod execute;
|
||||
pub mod plan;
|
257
v3/crates/sql/src/plan.rs
Normal file
257
v3/crates/sql/src/plan.rs
Normal file
@ -0,0 +1,257 @@
|
||||
use core::fmt;
|
||||
use std::{any::Any, hash::Hash, sync::Arc};
|
||||
|
||||
use datafusion::{
|
||||
arrow::{
|
||||
array::RecordBatch, datatypes::SchemaRef, error::ArrowError, json::reader as arrow_json,
|
||||
},
|
||||
common::DFSchemaRef,
|
||||
error::DataFusionError,
|
||||
logical_expr::{LogicalPlan, UserDefinedLogicalNodeCore},
|
||||
physical_expr::EquivalenceProperties,
|
||||
physical_plan::{
|
||||
stream::RecordBatchStreamAdapter, DisplayAs, DisplayFormatType, ExecutionMode,
|
||||
ExecutionPlan, Partitioning, PlanProperties,
|
||||
},
|
||||
sql::TableReference,
|
||||
};
|
||||
use execute::HttpContext;
|
||||
use futures::TryFutureExt;
|
||||
use tracing_util::{FutureExt, SpanVisibility, TraceableError};
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ExecutionPlanError {
|
||||
#[error("{0}")]
|
||||
NDCExecutionError(#[from] execute::ndc::client::Error),
|
||||
|
||||
#[error("NDC Response not as expected: {0}")]
|
||||
NDCResponseFormat(String),
|
||||
|
||||
#[error("Arrow error: {0}")]
|
||||
ArrowError(#[from] ArrowError),
|
||||
|
||||
#[error("Couldn't construct a RecordBatch: {0}")]
|
||||
RecordBatchConstruction(String),
|
||||
|
||||
#[error("Couldn't fetch otel tracing context")]
|
||||
TracingContextNotFound,
|
||||
}
|
||||
|
||||
impl TraceableError for ExecutionPlanError {
|
||||
fn visibility(&self) -> tracing_util::ErrorVisibility {
|
||||
tracing_util::ErrorVisibility::Internal
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub(crate) struct NDCQuery {
|
||||
pub(crate) table: TableReference,
|
||||
pub(crate) query: ndc_models::QueryRequest,
|
||||
pub(crate) data_source_name: Arc<String>,
|
||||
pub(crate) schema: DFSchemaRef,
|
||||
}
|
||||
|
||||
impl Hash for NDCQuery {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.data_source_name.hash(state);
|
||||
format!("{:#?}", self.query).hash(state);
|
||||
self.schema.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for NDCQuery {}
|
||||
|
||||
impl NDCQuery {
|
||||
pub(crate) fn project(
|
||||
mut self,
|
||||
schema: DFSchemaRef,
|
||||
projection: &[String],
|
||||
) -> datafusion::error::Result<Self> {
|
||||
let mut current_fields = self.query.query.fields.take().ok_or_else(|| {
|
||||
DataFusionError::Internal("empty fields found in ndcscan for projection".to_string())
|
||||
})?;
|
||||
let new_fields = projection
|
||||
.iter()
|
||||
.map(|projected_field| {
|
||||
current_fields
|
||||
.swap_remove(projected_field)
|
||||
.map(|field| (projected_field.clone(), field))
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::Internal(
|
||||
"failed to lookup projectd field in ndcscan".to_string(),
|
||||
)
|
||||
})
|
||||
})
|
||||
.collect::<Result<_, DataFusionError>>()?;
|
||||
let _ = std::mem::replace(&mut self.query.query.fields, Some(new_fields));
|
||||
let _ = std::mem::replace(&mut self.schema, schema);
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl UserDefinedLogicalNodeCore for NDCQuery {
|
||||
fn name(&self) -> &str {
|
||||
"NDCQuery"
|
||||
}
|
||||
|
||||
fn inputs(&self) -> Vec<&LogicalPlan> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
/// Schema for TopK is the same as the input
|
||||
fn schema(&self) -> &DFSchemaRef {
|
||||
&self.schema
|
||||
}
|
||||
|
||||
fn expressions(&self) -> Vec<datafusion::logical_expr::Expr> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
/// For example: `TopK: k=10`
|
||||
fn fmt_for_explain(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "NDCQuery: query={:#?}", self.query)
|
||||
}
|
||||
|
||||
fn with_exprs_and_inputs(
|
||||
&self,
|
||||
_exprs: Vec<datafusion::logical_expr::Expr>,
|
||||
_inputs: Vec<LogicalPlan>,
|
||||
) -> datafusion::error::Result<Self> {
|
||||
Ok(self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct NDCPushDown {
|
||||
http_context: Arc<execute::HttpContext>,
|
||||
query: Arc<ndc_models::QueryRequest>,
|
||||
data_connector: Arc<metadata_resolve::DataConnectorLink>,
|
||||
projected_schema: SchemaRef,
|
||||
cache: PlanProperties,
|
||||
}
|
||||
|
||||
impl NDCPushDown {
|
||||
pub(crate) fn new(
|
||||
http_context: Arc<HttpContext>,
|
||||
schema: SchemaRef,
|
||||
query: Arc<ndc_models::QueryRequest>,
|
||||
data_connector: Arc<metadata_resolve::DataConnectorLink>,
|
||||
) -> Self {
|
||||
let cache = Self::compute_properties(schema.clone());
|
||||
Self {
|
||||
http_context,
|
||||
query,
|
||||
data_connector,
|
||||
projected_schema: schema,
|
||||
cache,
|
||||
}
|
||||
}
|
||||
|
||||
/// This function creates the cache object that stores the plan properties such as schema, equivalence properties, ordering, partitioning, etc.
|
||||
fn compute_properties(schema: SchemaRef) -> PlanProperties {
|
||||
let eq_properties = EquivalenceProperties::new(schema);
|
||||
PlanProperties::new(
|
||||
eq_properties,
|
||||
Partitioning::UnknownPartitioning(1),
|
||||
ExecutionMode::Bounded,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl DisplayAs for NDCPushDown {
|
||||
fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "NDCPushDown")
|
||||
}
|
||||
}
|
||||
|
||||
impl ExecutionPlan for NDCPushDown {
|
||||
fn name(&self) -> &'static str {
|
||||
"NDCPushdown"
|
||||
}
|
||||
|
||||
fn as_any(&self) -> &dyn Any {
|
||||
self
|
||||
}
|
||||
|
||||
fn properties(&self) -> &PlanProperties {
|
||||
&self.cache
|
||||
}
|
||||
|
||||
fn children(&self) -> Vec<&Arc<dyn ExecutionPlan>> {
|
||||
vec![]
|
||||
}
|
||||
|
||||
fn with_new_children(
|
||||
self: Arc<Self>,
|
||||
_: Vec<Arc<dyn ExecutionPlan>>,
|
||||
) -> datafusion::error::Result<Arc<dyn ExecutionPlan>> {
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn execute(
|
||||
&self,
|
||||
_partition: usize,
|
||||
context: Arc<datafusion::execution::TaskContext>,
|
||||
) -> datafusion::error::Result<datafusion::execution::SendableRecordBatchStream> {
|
||||
let otel_cx = context
|
||||
.session_config()
|
||||
.get_extension::<tracing_util::Context>()
|
||||
.ok_or_else(|| {
|
||||
DataFusionError::External(Box::new(ExecutionPlanError::TracingContextNotFound))
|
||||
})?;
|
||||
let fut = fetch_from_data_connector(
|
||||
self.projected_schema.clone(),
|
||||
self.http_context.clone(),
|
||||
self.query.clone(),
|
||||
self.data_connector.clone(),
|
||||
)
|
||||
.with_context((*otel_cx).clone())
|
||||
.map_err(|e| DataFusionError::External(Box::new(e)));
|
||||
let stream = futures::stream::once(fut);
|
||||
Ok(Box::pin(RecordBatchStreamAdapter::new(
|
||||
self.projected_schema.clone(),
|
||||
stream,
|
||||
)))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn fetch_from_data_connector(
|
||||
schema: SchemaRef,
|
||||
http_context: Arc<HttpContext>,
|
||||
query_request: Arc<ndc_models::QueryRequest>,
|
||||
data_connector: Arc<metadata_resolve::DataConnectorLink>,
|
||||
) -> Result<RecordBatch, ExecutionPlanError> {
|
||||
let tracer = tracing_util::global_tracer();
|
||||
let mut ndc_response =
|
||||
execute::fetch_from_data_connector(&http_context, &query_request, &data_connector, None)
|
||||
.await?;
|
||||
let batch = tracer.in_span(
|
||||
"ndc_response_to_record_batch",
|
||||
"Converts NDC Response into datafusion's RecordBatch",
|
||||
SpanVisibility::Internal,
|
||||
|| {
|
||||
let rows = ndc_response
|
||||
.0
|
||||
.pop()
|
||||
.ok_or_else(|| {
|
||||
ExecutionPlanError::NDCResponseFormat("no row sets found".to_string())
|
||||
})?
|
||||
.rows
|
||||
.ok_or_else(|| {
|
||||
ExecutionPlanError::NDCResponseFormat(
|
||||
"no rows found for the row set".to_string(),
|
||||
)
|
||||
})?;
|
||||
let mut decoder = arrow_json::ReaderBuilder::new(schema.clone()).build_decoder()?;
|
||||
decoder.serialize(&rows)?;
|
||||
decoder.flush()?.ok_or_else(|| {
|
||||
ExecutionPlanError::RecordBatchConstruction(
|
||||
"json to arrow decoder did not return any rows".to_string(),
|
||||
)
|
||||
})
|
||||
},
|
||||
)?;
|
||||
Ok(batch)
|
||||
}
|
@ -18,6 +18,7 @@ pub use tracer::{
|
||||
// risking mismatches and multiple globals
|
||||
pub use opentelemetry::propagation::text_map_propagator::TextMapPropagator;
|
||||
pub use opentelemetry::trace::get_active_span;
|
||||
pub use opentelemetry::trace::FutureExt;
|
||||
pub use opentelemetry::trace::Status;
|
||||
pub use opentelemetry::Context;
|
||||
pub use opentelemetry_contrib::trace::propagator::trace_context_response::TraceContextResponsePropagator;
|
||||
|
Loading…
Reference in New Issue
Block a user