From e72f53d9effd9aa2ef8363d9f3493297e76d8db0 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 17 Nov 2023 07:36:40 +0800 Subject: [PATCH] Minor mbtiles summary improvements (#1004) Partial fix of #1002 * [x] Move all summary code from `mbtiles/src/mbtiles.rs` to `mbtiles/src/summary.rs` * [x] Move Metadata and Validation function to separate files * [x] Remove `filename` String from the summary stats - not much point because it is accessible from the mbtiles struct itself --------- Co-authored-by: Yuri Astrakhan --- mbtiles/src/bin/main.rs | 1 + mbtiles/src/copier.rs | 5 +- mbtiles/src/lib.rs | 25 +- mbtiles/src/mbtiles.rs | 1056 +--------------------------- mbtiles/src/metadata.rs | 278 ++++++++ mbtiles/src/summary.rs | 366 ++++++++++ mbtiles/src/validation.rs | 456 ++++++++++++ tests/expected/mbtiles/summary.txt | 2 +- 8 files changed, 1127 insertions(+), 1062 deletions(-) create mode 100644 mbtiles/src/metadata.rs create mode 100644 mbtiles/src/summary.rs create mode 100644 mbtiles/src/validation.rs diff --git a/mbtiles/src/bin/main.rs b/mbtiles/src/bin/main.rs index 491a3505..252d9226 100644 --- a/mbtiles/src/bin/main.rs +++ b/mbtiles/src/bin/main.rs @@ -120,6 +120,7 @@ async fn main_int() -> anyhow::Result<()> { Commands::Summary { file } => { let mbt = Mbtiles::new(file.as_path())?; let mut conn = mbt.open_readonly().await?; + println!("MBTiles file summary for {mbt}"); println!("{}", mbt.summary(&mut conn).await?); } } diff --git a/mbtiles/src/copier.rs b/mbtiles/src/copier.rs index 1a0bf2e8..a3d31a01 100644 --- a/mbtiles/src/copier.rs +++ b/mbtiles/src/copier.rs @@ -10,13 +10,12 @@ use sqlite_hashes::rusqlite::params_from_iter; use sqlx::{query, Executor as _, Row, SqliteConnection}; use crate::errors::MbtResult; -use crate::mbtiles::MbtType::{Flat, FlatWithHash, Normalized}; -use crate::mbtiles::{MbtType, MbtTypeCli}; use crate::queries::{ create_flat_tables, create_flat_with_hash_tables, create_normalized_tables, create_tiles_with_hash_view, detach_db, is_empty_database, }; -use crate::{MbtError, Mbtiles, AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF}; +use crate::MbtType::{Flat, FlatWithHash, Normalized}; +use crate::{MbtError, MbtType, MbtTypeCli, Mbtiles, AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF}; #[derive(PartialEq, Eq, Default, Debug, Clone, EnumDisplay)] #[enum_display(case = "Kebab")] diff --git a/mbtiles/src/lib.rs b/mbtiles/src/lib.rs index 0c17da7b..0f9397a8 100644 --- a/mbtiles/src/lib.rs +++ b/mbtiles/src/lib.rs @@ -1,26 +1,33 @@ #![doc = include_str!("../README.md")] #![allow(clippy::missing_errors_doc)] +mod copier; +pub use copier::{CopyDuplicateMode, MbtilesCopier}; + mod errors; pub use errors::{MbtError, MbtResult}; mod mbtiles; -pub use mbtiles::{ - calc_agg_tiles_hash, IntegrityCheckType, MbtType, MbtTypeCli, Mbtiles, Metadata, - AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF, -}; +pub use mbtiles::{MbtTypeCli, Mbtiles}; -mod pool; -pub use pool::MbtilesPool; - -mod copier; -pub use copier::{CopyDuplicateMode, MbtilesCopier}; +mod metadata; +pub use metadata::Metadata; mod patcher; pub use patcher::apply_patch; +mod pool; +pub use pool::MbtilesPool; + mod queries; pub use queries::{ create_flat_tables, create_flat_with_hash_tables, create_metadata_table, create_normalized_tables, is_flat_with_hash_tables_type, is_normalized_tables_type, }; + +mod summary; + +mod validation; +pub use validation::{ + calc_agg_tiles_hash, IntegrityCheckType, MbtType, AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF, +}; diff --git a/mbtiles/src/mbtiles.rs b/mbtiles/src/mbtiles.rs index 53cd71ba..e05a198a 100644 --- a/mbtiles/src/mbtiles.rs +++ b/mbtiles/src/mbtiles.rs @@ -1,165 +1,19 @@ #![allow(clippy::missing_errors_doc)] -use std::collections::HashSet; use std::ffi::OsStr; use std::fmt::{Display, Formatter}; -use std::path::{Path, PathBuf}; -use std::str::FromStr; +use std::path::Path; #[cfg(feature = "cli")] use clap::ValueEnum; use enum_display::EnumDisplay; -use futures::TryStreamExt; -use log::{debug, info, warn}; -use martin_tile_utils::{Format, TileInfo}; -use serde::ser::SerializeStruct; -use serde::{Serialize, Serializer}; -use serde_json::{Value as JSONValue, Value}; -use size_format::SizeFormatterBinary; +use log::debug; use sqlite_hashes::register_md5_function; -use sqlx::sqlite::{SqliteConnectOptions, SqliteRow}; -use sqlx::{query, Connection as _, Row, SqliteConnection, SqliteExecutor}; -use tilejson::{tilejson, Bounds, Center, TileJSON}; +use sqlx::sqlite::SqliteConnectOptions; +use sqlx::{query, Connection as _, SqliteConnection, SqliteExecutor}; use crate::errors::{MbtError, MbtResult}; -use crate::queries::{ - has_tiles_with_hash, is_flat_tables_type, is_flat_with_hash_tables_type, - is_normalized_tables_type, -}; -use crate::MbtError::{ - AggHashMismatch, AggHashValueNotFound, FailedIntegrityCheck, IncorrectTileHash, -}; - -#[derive(Clone, Debug, PartialEq, Serialize)] -pub struct Metadata { - pub id: String, - #[serde(serialize_with = "serialize_ti")] - pub tile_info: TileInfo, - pub layer_type: Option, - pub tilejson: TileJSON, - pub json: Option, -} - -#[derive(Clone, Debug, PartialEq, Serialize)] -pub struct ZoomInfo { - pub zoom: u8, - pub tile_count: u64, - pub min_tile_size: u64, - pub max_tile_size: u64, - pub avg_tile_size: f64, - pub bbox: Bounds, -} - -#[derive(Clone, Debug, PartialEq, Serialize)] -pub struct Summary { - pub file_path: String, - pub file_size: Option, - pub mbt_type: MbtType, - pub page_size: u64, - pub page_count: u64, - pub tile_count: u64, - pub min_tile_size: Option, - pub max_tile_size: Option, - pub avg_tile_size: f64, - pub bbox: Option, - pub min_zoom: Option, - pub max_zoom: Option, - pub zoom_info: Vec, -} - -impl Display for Summary { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - writeln!(f, "File: {}", self.file_path)?; - writeln!(f, "Schema: {}", self.mbt_type)?; - - if let Some(file_size) = self.file_size { - let file_size = SizeFormatterBinary::new(file_size); - writeln!(f, "File size: {file_size:.2}B")?; - } else { - writeln!(f, "File size: unknown")?; - } - let page_size = SizeFormatterBinary::new(self.page_size); - writeln!(f, "Page size: {page_size:.2}B")?; - writeln!(f, "Page count: {:.2}", self.page_count)?; - writeln!(f)?; - writeln!( - f, - "|{:^9}|{:^9}|{:^9}|{:^9}|{:^9}| {:^20} |", - "Zoom", "Count", "Smallest", "Largest", "Average", "BBox" - )?; - - for l in &self.zoom_info { - let min = SizeFormatterBinary::new(l.min_tile_size); - let max = SizeFormatterBinary::new(l.max_tile_size); - let avg = SizeFormatterBinary::new(l.avg_tile_size as u64); - let prec = get_zoom_precision(l.zoom); - - writeln!( - f, - "|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |", - l.zoom, - l.tile_count, - format!("{min:.2}B"), - format!("{max:.2}B"), - format!("{avg:.2}B"), - format!("{:.prec$}", l.bbox), - )?; - } - - if self.zoom_info.len() > 1 { - if let (Some(min), Some(max), Some(bbox), Some(max_zoom)) = ( - self.min_tile_size, - self.max_tile_size, - self.bbox, - self.max_zoom, - ) { - let min = SizeFormatterBinary::new(min); - let max = SizeFormatterBinary::new(max); - let avg = SizeFormatterBinary::new(self.avg_tile_size as u64); - let prec = get_zoom_precision(max_zoom); - writeln!( - f, - "|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |", - "all", - self.tile_count, - format!("{min}B"), - format!("{max}B"), - format!("{avg}B"), - format!("{:.prec$}", bbox), - )?; - } - } - - Ok(()) - } -} - -fn serialize_ti(ti: &TileInfo, serializer: S) -> Result { - let mut s = serializer.serialize_struct("TileInfo", 2)?; - s.serialize_field("format", &ti.format.to_string())?; - s.serialize_field( - "encoding", - ti.encoding.content_encoding().unwrap_or_default(), - )?; - s.end() -} - -fn get_zoom_precision(zoom: u8) -> usize { - let lng_delta = webmercator_to_wgs84(40075016.7 / (2_u32.pow(zoom as u32)) as f64, 0f64).0; - let log = lng_delta.log10() - 0.5; - if log > 0_f64 { - 0 - } else { - -log.ceil() as usize - } -} - -/// Metadata key for the aggregate tiles hash value -pub const AGG_TILES_HASH: &str = "agg_tiles_hash"; - -/// Metadata key for a diff file, -/// describing the eventual [`AGG_TILES_HASH`] value once the diff is applied -pub const AGG_TILES_HASH_IN_DIFF: &str = "agg_tiles_hash_after_apply"; +use crate::MbtType; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, EnumDisplay)] #[enum_display(case = "Kebab")] @@ -170,34 +24,6 @@ pub enum MbtTypeCli { Normalized, } -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, EnumDisplay, Serialize)] -#[enum_display(case = "Kebab")] -pub enum MbtType { - Flat, - FlatWithHash, - Normalized { hash_view: bool }, -} - -impl MbtType { - pub fn is_normalized(&self) -> bool { - matches!(self, Self::Normalized { .. }) - } - - pub fn is_normalized_with_view(&self) -> bool { - matches!(self, Self::Normalized { hash_view: true }) - } -} - -#[derive(PartialEq, Eq, Default, Debug, Clone, EnumDisplay)] -#[enum_display(case = "Kebab")] -#[cfg_attr(feature = "cli", derive(ValueEnum))] -pub enum IntegrityCheckType { - #[default] - Quick, - Full, - Off, -} - #[derive(Clone, Debug)] pub struct Mbtiles { filepath: String, @@ -264,17 +90,6 @@ impl Mbtiles { &self.filename } - fn to_val(&self, val: Result, title: &str) -> Option { - match val { - Ok(v) => Some(v), - Err(err) => { - let name = &self.filename; - warn!("Unable to parse metadata {title} value in {name}: {err}"); - None - } - } - } - /// Attach this `MBTiles` file to the given `SQLite` connection as a given name pub async fn attach_to(&self, conn: &mut T, name: &str) -> MbtResult<()> where @@ -288,333 +103,6 @@ impl Mbtiles { Ok(()) } - /// Get a single metadata value from the metadata table - pub async fn get_metadata_value(&self, conn: &mut T, key: &str) -> MbtResult> - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let query = query!("SELECT value from metadata where name = ?", key); - let row = query.fetch_optional(conn).await?; - if let Some(row) = row { - if let Some(value) = row.value { - return Ok(Some(value)); - } - } - Ok(None) - } - - pub async fn validate( - &self, - check_type: IntegrityCheckType, - update_agg_tiles_hash: bool, - ) -> MbtResult { - let mut conn = if update_agg_tiles_hash { - self.open().await? - } else { - self.open_readonly().await? - }; - self.check_integrity(&mut conn, check_type).await?; - self.check_each_tile_hash(&mut conn).await?; - if update_agg_tiles_hash { - self.update_agg_tiles_hash(&mut conn).await - } else { - self.check_agg_tiles_hashes(&mut conn).await - } - } - - /// Compute MBTiles file summary - pub async fn summary(&self, conn: &mut T) -> MbtResult - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let mbt_type = self.detect_type(&mut *conn).await?; - let file_size = PathBuf::from_str(&self.filepath) - .ok() - .and_then(|p| p.metadata().ok()) - .map(|m| m.len()); - - let sql = query!("PRAGMA page_size;"); - let page_size = sql.fetch_one(&mut *conn).await?.page_size.unwrap() as u64; - - let sql = query!("PRAGMA page_count;"); - let page_count = sql.fetch_one(&mut *conn).await?.page_count.unwrap() as u64; - - let zoom_info = query!( - " - SELECT zoom_level AS zoom, - count() AS count, - min(length(tile_data)) AS smallest, - max(length(tile_data)) AS largest, - avg(length(tile_data)) AS average, - min(tile_column) AS min_tile_x, - min(tile_row) AS min_tile_y, - max(tile_column) AS max_tile_x, - max(tile_row) AS max_tile_y - FROM tiles - GROUP BY zoom_level" - ) - .fetch_all(&mut *conn) - .await?; - - let zoom_info: Vec = zoom_info - .into_iter() - .map(|r| { - let zoom = u8::try_from(r.zoom.unwrap()).expect("zoom_level is not a u8"); - ZoomInfo { - zoom, - tile_count: r.count as u64, - min_tile_size: r.smallest.unwrap_or(0) as u64, - max_tile_size: r.largest.unwrap_or(0) as u64, - avg_tile_size: r.average.unwrap_or(0.0), - bbox: Self::xyz_to_bbox( - zoom, - r.min_tile_x.unwrap(), - r.min_tile_y.unwrap(), - r.max_tile_x.unwrap(), - r.max_tile_y.unwrap(), - ), - } - }) - .collect(); - - let tile_count = zoom_info.iter().map(|l| l.tile_count).sum(); - let avg_sum = zoom_info - .iter() - .map(|l| l.avg_tile_size * l.tile_count as f64) - .sum::(); - - Ok(Summary { - file_path: self.filepath.clone(), - file_size, - mbt_type, - page_size, - page_count, - tile_count, - min_tile_size: zoom_info.iter().map(|l| l.min_tile_size).reduce(u64::min), - max_tile_size: zoom_info.iter().map(|l| l.max_tile_size).reduce(u64::max), - avg_tile_size: avg_sum / tile_count as f64, - bbox: zoom_info.iter().map(|l| l.bbox).reduce(|a, b| a + b), - min_zoom: zoom_info.iter().map(|l| l.zoom).reduce(u8::min), - max_zoom: zoom_info.iter().map(|l| l.zoom).reduce(u8::max), - zoom_info, - }) - } - - /// Convert min/max XYZ tile coordinates to a bounding box - fn xyz_to_bbox(zoom: u8, min_x: i32, min_y: i32, max_x: i32, max_y: i32) -> Bounds { - let tile_size = 40075016.7 / (2_u32.pow(zoom as u32)) as f64; - let (min_lng, min_lat) = webmercator_to_wgs84( - -20037508.34 + min_x as f64 * tile_size, - -20037508.34 + min_y as f64 * tile_size, - ); - let (max_lng, max_lat) = webmercator_to_wgs84( - -20037508.34 + (max_x as f64 + 1.0) * tile_size, - -20037508.34 + (max_y as f64 + 1.0) * tile_size, - ); - - Bounds::new(min_lng, min_lat, max_lng, max_lat) - } - - /// Get the aggregate tiles hash value from the metadata table - pub async fn get_agg_tiles_hash(&self, conn: &mut T) -> MbtResult> - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - self.get_metadata_value(&mut *conn, AGG_TILES_HASH).await - } - - pub async fn set_metadata_value( - &self, - conn: &mut T, - key: &str, - value: Option<&str>, - ) -> MbtResult<()> - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - if let Some(value) = value { - query!( - "INSERT OR REPLACE INTO metadata(name, value) VALUES(?, ?)", - key, - value - ) - .execute(conn) - .await?; - } else { - query!("DELETE FROM metadata WHERE name=?", key) - .execute(conn) - .await?; - } - Ok(()) - } - - pub async fn get_metadata(&self, conn: &mut T) -> MbtResult - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let (tj, layer_type, json) = self.parse_metadata(conn).await?; - - Ok(Metadata { - id: self.filename.to_string(), - tile_info: self.detect_format(&tj, conn).await?, - tilejson: tj, - layer_type, - json, - }) - } - - async fn parse_metadata( - &self, - conn: &mut T, - ) -> MbtResult<(TileJSON, Option, Option)> - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let query = query!("SELECT name, value FROM metadata WHERE value IS NOT ''"); - let mut rows = query.fetch(conn); - - let mut tj = tilejson! { tiles: vec![] }; - let mut layer_type: Option = None; - let mut json: Option = None; - - while let Some(row) = rows.try_next().await? { - if let (Some(name), Some(value)) = (row.name, row.value) { - match name.as_ref() { - "name" => tj.name = Some(value), - "version" => tj.version = Some(value), - "bounds" => tj.bounds = self.to_val(Bounds::from_str(value.as_str()), &name), - "center" => tj.center = self.to_val(Center::from_str(value.as_str()), &name), - "minzoom" => tj.minzoom = self.to_val(value.parse(), &name), - "maxzoom" => tj.maxzoom = self.to_val(value.parse(), &name), - "description" => tj.description = Some(value), - "attribution" => tj.attribution = Some(value), - "type" => layer_type = Some(value), - "legend" => tj.legend = Some(value), - "template" => tj.template = Some(value), - "json" => json = self.to_val(serde_json::from_str(&value), &name), - "format" | "generator" => { - tj.other.insert(name, Value::String(value)); - } - _ => { - let file = &self.filename; - info!("{file} has an unrecognized metadata value {name}={value}"); - tj.other.insert(name, Value::String(value)); - } - } - } - } - - if let Some(JSONValue::Object(obj)) = &mut json { - if let Some(value) = obj.remove("vector_layers") { - if let Ok(v) = serde_json::from_value(value) { - tj.vector_layers = Some(v); - } else { - warn!( - "Unable to parse metadata vector_layers value in {}", - self.filename - ); - } - } - } - - Ok((tj, layer_type, json)) - } - - async fn detect_format(&self, tilejson: &TileJSON, conn: &mut T) -> MbtResult - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let mut tile_info = None; - let mut tested_zoom = -1_i64; - - // First, pick any random tile - let query = query!("SELECT zoom_level, tile_column, tile_row, tile_data FROM tiles WHERE zoom_level >= 0 LIMIT 1"); - let row = query.fetch_optional(&mut *conn).await?; - if let Some(r) = row { - tile_info = self.parse_tile(r.zoom_level, r.tile_column, r.tile_row, r.tile_data); - tested_zoom = r.zoom_level.unwrap_or(-1); - } - - // Afterwards, iterate over tiles in all allowed zooms and check for consistency - for z in tilejson.minzoom.unwrap_or(0)..=tilejson.maxzoom.unwrap_or(18) { - if i64::from(z) == tested_zoom { - continue; - } - let query = query! {"SELECT tile_column, tile_row, tile_data FROM tiles WHERE zoom_level = ? LIMIT 1", z}; - let row = query.fetch_optional(&mut *conn).await?; - if let Some(r) = row { - match ( - tile_info, - self.parse_tile(Some(z.into()), r.tile_column, r.tile_row, r.tile_data), - ) { - (_, None) => {} - (None, new) => tile_info = new, - (Some(old), Some(new)) if old == new => {} - (Some(old), Some(new)) => { - return Err(MbtError::InconsistentMetadata(old, new)); - } - } - } - } - - if let Some(Value::String(fmt)) = tilejson.other.get("format") { - let file = &self.filename; - match (tile_info, Format::parse(fmt)) { - (_, None) => { - warn!("Unknown format value in metadata: {fmt}"); - } - (None, Some(fmt)) => { - if fmt.is_detectable() { - warn!("Metadata table sets detectable '{fmt}' tile format, but it could not be verified for file {file}"); - } else { - info!("Using '{fmt}' tile format from metadata table in file {file}"); - } - tile_info = Some(fmt.into()); - } - (Some(info), Some(fmt)) if info.format == fmt => { - debug!("Detected tile format {info} matches metadata.format '{fmt}' in file {file}"); - } - (Some(info), _) => { - warn!("Found inconsistency: metadata.format='{fmt}', but tiles were detected as {info:?} in file {file}. Tiles will be returned as {info:?}."); - } - } - } - - if let Some(info) = tile_info { - if info.format != Format::Mvt && tilejson.vector_layers.is_some() { - warn!( - "{} has vector_layers metadata but non-vector tiles", - self.filename - ); - } - Ok(info) - } else { - Err(MbtError::NoTilesFound) - } - } - - fn parse_tile( - &self, - z: Option, - x: Option, - y: Option, - tile: Option>, - ) -> Option { - if let (Some(z), Some(x), Some(y), Some(tile)) = (z, x, y, tile) { - let info = TileInfo::detect(&tile); - if let Some(info) = info { - debug!( - "Tile {z}/{x}/{} is detected as {info} in file {}", - (1 << z) - 1 - y, - self.filename, - ); - } - info - } else { - None - } - } - pub async fn get_tile( &self, conn: &mut T, @@ -641,234 +129,6 @@ impl Mbtiles { let mut conn = self.open_readonly().await?; self.detect_type(&mut conn).await } - - pub async fn detect_type(&self, conn: &mut T) -> MbtResult - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - debug!("Detecting MBTiles type for {self}"); - let typ = if is_normalized_tables_type(&mut *conn).await? { - MbtType::Normalized { - hash_view: has_tiles_with_hash(&mut *conn).await?, - } - } else if is_flat_with_hash_tables_type(&mut *conn).await? { - MbtType::FlatWithHash - } else if is_flat_tables_type(&mut *conn).await? { - MbtType::Flat - } else { - return Err(MbtError::InvalidDataFormat(self.filepath.clone())); - }; - - self.check_for_uniqueness_constraint(&mut *conn, typ) - .await?; - - Ok(typ) - } - - async fn check_for_uniqueness_constraint( - &self, - conn: &mut T, - mbt_type: MbtType, - ) -> MbtResult<()> - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let table_name = match mbt_type { - MbtType::Flat => "tiles", - MbtType::FlatWithHash => "tiles_with_hash", - MbtType::Normalized { .. } => "map", - }; - - let indexes = query("SELECT name FROM pragma_index_list(?) WHERE [unique] = 1") - .bind(table_name) - .fetch_all(&mut *conn) - .await?; - - // Ensure there is some index on tiles that has a unique constraint on (zoom_level, tile_row, tile_column) - for index in indexes { - let mut unique_idx_cols = HashSet::new(); - let rows = query("SELECT DISTINCT name FROM pragma_index_info(?)") - .bind(index.get::("name")) - .fetch_all(&mut *conn) - .await?; - - for row in rows { - unique_idx_cols.insert(row.get("name")); - } - - if unique_idx_cols - .symmetric_difference(&HashSet::from([ - "zoom_level".to_string(), - "tile_column".to_string(), - "tile_row".to_string(), - ])) - .collect::>() - .is_empty() - { - return Ok(()); - } - } - - Err(MbtError::NoUniquenessConstraint(self.filepath.clone())) - } - - /// Perform `SQLite` internal integrity check - pub async fn check_integrity( - &self, - conn: &mut T, - integrity_check: IntegrityCheckType, - ) -> MbtResult<()> - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - if integrity_check == IntegrityCheckType::Off { - info!("Skipping integrity check for {self}"); - return Ok(()); - } - - let sql = if integrity_check == IntegrityCheckType::Full { - "PRAGMA integrity_check;" - } else { - "PRAGMA quick_check;" - }; - - let result: Vec = query(sql) - .map(|row: SqliteRow| row.get(0)) - .fetch_all(&mut *conn) - .await?; - - if result.len() > 1 - || result.get(0).ok_or(FailedIntegrityCheck( - self.filepath.to_string(), - vec!["SQLite could not perform integrity check".to_string()], - ))? != "ok" - { - return Err(FailedIntegrityCheck(self.filepath().to_string(), result)); - } - - info!("{integrity_check:?} integrity check passed for {self}"); - Ok(()) - } - - pub async fn check_agg_tiles_hashes(&self, conn: &mut T) -> MbtResult - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let Some(stored) = self.get_agg_tiles_hash(&mut *conn).await? else { - return Err(AggHashValueNotFound(self.filepath().to_string())); - }; - let computed = calc_agg_tiles_hash(&mut *conn).await?; - if stored != computed { - let file = self.filepath().to_string(); - return Err(AggHashMismatch(computed, stored, file)); - } - - info!("The agg_tiles_hashes={computed} has been verified for {self}"); - Ok(computed) - } - - /// Compute new aggregate tiles hash and save it to the metadata table (if needed) - pub async fn update_agg_tiles_hash(&self, conn: &mut T) -> MbtResult - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - let old_hash = self.get_agg_tiles_hash(&mut *conn).await?; - let hash = calc_agg_tiles_hash(&mut *conn).await?; - if old_hash.as_ref() == Some(&hash) { - info!("Metadata value agg_tiles_hash is already set to the correct hash `{hash}` in {self}"); - } else { - if let Some(old_hash) = old_hash { - info!("Updating agg_tiles_hash from {old_hash} to {hash} in {self}"); - } else { - info!("Adding a new metadata value agg_tiles_hash = {hash} in {self}"); - } - self.set_metadata_value(&mut *conn, AGG_TILES_HASH, Some(&hash)) - .await?; - } - Ok(hash) - } - - pub async fn check_each_tile_hash(&self, conn: &mut T) -> MbtResult<()> - where - for<'e> &'e mut T: SqliteExecutor<'e>, - { - // Note that hex() always returns upper-case HEX values - let sql = match self.detect_type(&mut *conn).await? { - MbtType::Flat => { - info!("Skipping per-tile hash validation because this is a flat MBTiles file"); - return Ok(()); - } - MbtType::FlatWithHash => { - "SELECT expected, computed FROM ( - SELECT - upper(tile_hash) AS expected, - md5_hex(tile_data) AS computed - FROM tiles_with_hash - ) AS t - WHERE expected != computed - LIMIT 1;" - } - MbtType::Normalized { .. } => { - "SELECT expected, computed FROM ( - SELECT - upper(tile_id) AS expected, - md5_hex(tile_data) AS computed - FROM images - ) AS t - WHERE expected != computed - LIMIT 1;" - } - }; - - query(sql) - .fetch_optional(&mut *conn) - .await? - .map_or(Ok(()), |v| { - Err(IncorrectTileHash( - self.filepath().to_string(), - v.get(0), - v.get(1), - )) - })?; - - info!("All tile hashes are valid for {self}"); - Ok(()) - } -} - -/// Compute the hash of the combined tiles in the mbtiles file tiles table/view. -/// This should work on all mbtiles files perf `MBTiles` specification. -pub async fn calc_agg_tiles_hash(conn: &mut T) -> MbtResult -where - for<'e> &'e mut T: SqliteExecutor<'e>, -{ - debug!("Calculating agg_tiles_hash"); - let query = query( - // The md5_concat func will return NULL if there are no rows in the tiles table. - // For our use case, we will treat it as an empty string, and hash that. - // `tile_data` values must be stored as a blob per MBTiles spec - // `md5` functions will fail if the value is not text/blob/null - // - // Note that ORDER BY controls the output ordering, which is important for the hash value, - // and having it at the top level would not order values properly. - // See https://sqlite.org/forum/forumpost/228bb96e12a746ce - " -SELECT coalesce( - (SELECT md5_concat_hex( - cast(zoom_level AS text), - cast(tile_column AS text), - cast(tile_row AS text), - tile_data - ) - OVER (ORDER BY zoom_level, tile_column, tile_row ROWS - BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) - FROM tiles - LIMIT 1), - md5_hex('') -); -", - ); - Ok(query.fetch_one(conn).await?.get::(0)) } pub async fn attach_hash_fn(conn: &mut SqliteConnection) -> MbtResult<()> { @@ -881,314 +141,12 @@ pub async fn attach_hash_fn(conn: &mut SqliteConnection) -> MbtResult<()> { Ok(()) } -fn webmercator_to_wgs84(x: f64, y: f64) -> (f64, f64) { - let lng = (x / 6378137.0).to_degrees(); - let lat = (f64::atan(f64::sinh(y / 6378137.0))).to_degrees(); - (lng, lat) -} - #[cfg(test)] -mod tests { - use std::collections::HashMap; - - use approx::assert_relative_eq; - use insta::assert_yaml_snapshot; - use martin_tile_utils::Encoding; - use sqlx::Executor as _; - use tilejson::VectorLayer; - +pub(crate) mod tests { use super::*; - use crate::create_flat_tables; - async fn open(filepath: &str) -> MbtResult<(SqliteConnection, Mbtiles)> { + pub async fn open(filepath: &str) -> MbtResult<(SqliteConnection, Mbtiles)> { let mbt = Mbtiles::new(filepath)?; mbt.open().await.map(|conn| (conn, mbt)) } - - #[actix_rt::test] - async fn mbtiles_meta() -> MbtResult<()> { - let filepath = "../tests/fixtures/mbtiles/geography-class-jpg.mbtiles"; - let mbt = Mbtiles::new(filepath)?; - assert_eq!(mbt.filepath(), filepath); - assert_eq!(mbt.filename(), "geography-class-jpg"); - Ok(()) - } - - #[actix_rt::test] - async fn metadata_jpeg() -> MbtResult<()> { - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/geography-class-jpg.mbtiles").await?; - let metadata = mbt.get_metadata(&mut conn).await?; - let tj = metadata.tilejson; - - assert_eq!(tj.description.unwrap(), "One of the example maps that comes with TileMill - a bright & colorful world map that blends retro and high-tech with its folded paper texture and interactive flag tooltips. "); - assert!(tj.legend.unwrap().starts_with("
\n\n
\n{{admin}}\n\n
{{/__teaser__}}{{#__full__}}{{/__full__}}"); - assert_eq!(tj.version.unwrap(), "1.0.0"); - assert_eq!(metadata.id, "geography-class-jpg"); - assert_eq!(metadata.tile_info, Format::Jpeg.into()); - Ok(()) - } - - #[actix_rt::test] - async fn metadata_mvt() -> MbtResult<()> { - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?; - let metadata = mbt.get_metadata(&mut conn).await?; - let tj = metadata.tilejson; - - assert_eq!(tj.maxzoom.unwrap(), 6); - assert_eq!(tj.minzoom.unwrap(), 0); - assert_eq!(tj.name.unwrap(), "Major cities from Natural Earth data"); - assert_eq!(tj.version.unwrap(), "2"); - assert_eq!( - tj.vector_layers, - Some(vec![VectorLayer { - id: "cities".to_string(), - fields: vec![("name".to_string(), "String".to_string())] - .into_iter() - .collect(), - description: Some(String::new()), - minzoom: Some(0), - maxzoom: Some(6), - other: HashMap::default() - }]) - ); - assert_eq!(metadata.id, "world_cities"); - assert_eq!( - metadata.tile_info, - TileInfo::new(Format::Mvt, Encoding::Gzip) - ); - assert_eq!(metadata.layer_type, Some("overlay".to_string())); - Ok(()) - } - - #[actix_rt::test] - async fn metadata_get_key() -> MbtResult<()> { - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?; - - let res = mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(); - assert_eq!(res, "-123.123590,-37.818085,174.763027,59.352706"); - let res = mbt.get_metadata_value(&mut conn, "name").await?.unwrap(); - assert_eq!(res, "Major cities from Natural Earth data"); - let res = mbt.get_metadata_value(&mut conn, "maxzoom").await?.unwrap(); - assert_eq!(res, "6"); - let res = mbt.get_metadata_value(&mut conn, "nonexistent_key").await?; - assert_eq!(res, None); - let res = mbt.get_metadata_value(&mut conn, "").await?; - assert_eq!(res, None); - Ok(()) - } - - #[actix_rt::test] - async fn metadata_set_key() -> MbtResult<()> { - let (mut conn, mbt) = open("file:metadata_set_key_mem_db?mode=memory&cache=shared").await?; - - conn.execute("CREATE TABLE metadata (name text NOT NULL PRIMARY KEY, value text);") - .await?; - - mbt.set_metadata_value(&mut conn, "bounds", Some("0.0, 0.0, 0.0, 0.0")) - .await?; - assert_eq!( - mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(), - "0.0, 0.0, 0.0, 0.0" - ); - - mbt.set_metadata_value( - &mut conn, - "bounds", - Some("-123.123590,-37.818085,174.763027,59.352706"), - ) - .await?; - assert_eq!( - mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(), - "-123.123590,-37.818085,174.763027,59.352706" - ); - - mbt.set_metadata_value(&mut conn, "bounds", None).await?; - assert_eq!(mbt.get_metadata_value(&mut conn, "bounds").await?, None); - - Ok(()) - } - - #[actix_rt::test] - async fn detect_type() -> MbtResult<()> { - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?; - let res = mbt.detect_type(&mut conn).await?; - assert_eq!(res, MbtType::Flat); - - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/zoomed_world_cities.mbtiles").await?; - let res = mbt.detect_type(&mut conn).await?; - assert_eq!(res, MbtType::FlatWithHash); - - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/geography-class-jpg.mbtiles").await?; - let res = mbt.detect_type(&mut conn).await?; - assert_eq!(res, MbtType::Normalized { hash_view: false }); - - let (mut conn, mbt) = open(":memory:").await?; - let res = mbt.detect_type(&mut conn).await; - assert!(matches!(res, Err(MbtError::InvalidDataFormat(_)))); - - Ok(()) - } - - #[actix_rt::test] - async fn validate_valid_file() -> MbtResult<()> { - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/zoomed_world_cities.mbtiles").await?; - mbt.check_integrity(&mut conn, IntegrityCheckType::Quick) - .await?; - Ok(()) - } - - #[actix_rt::test] - async fn validate_invalid_file() -> MbtResult<()> { - let (mut conn, mbt) = - open("../tests/fixtures/files/invalid_zoomed_world_cities.mbtiles").await?; - let result = mbt.check_agg_tiles_hashes(&mut conn).await; - assert!(matches!(result, Err(MbtError::AggHashMismatch(..)))); - Ok(()) - } - - #[actix_rt::test] - async fn summary_empty_file() -> MbtResult<()> { - let (mut conn, mbt) = open("file:mbtiles_empty_summary?mode=memory&cache=shared").await?; - create_flat_tables(&mut conn).await.unwrap(); - let res = mbt.summary(&mut conn).await?; - assert_yaml_snapshot!(res, @r###" - --- - file_path: "file:mbtiles_empty_summary?mode=memory&cache=shared" - file_size: ~ - mbt_type: Flat - page_size: 4096 - page_count: 5 - tile_count: 0 - min_tile_size: ~ - max_tile_size: ~ - avg_tile_size: NaN - bbox: ~ - min_zoom: ~ - max_zoom: ~ - zoom_info: [] - "###); - - Ok(()) - } - #[actix_rt::test] - async fn meter_to_lnglat() { - let (lng, lat) = webmercator_to_wgs84(-20037508.34, -20037508.34); - assert_relative_eq!(lng, -179.99999997494382, epsilon = f64::EPSILON); - assert_relative_eq!(lat, -85.05112877764508, epsilon = f64::EPSILON); - - let (lng, lat) = webmercator_to_wgs84(20037508.34, 20037508.34); - assert_relative_eq!(lng, 179.99999997494382, epsilon = f64::EPSILON); - assert_relative_eq!(lat, 85.05112877764508, epsilon = f64::EPSILON); - - let (lng, lat) = webmercator_to_wgs84(0.0, 0.0); - assert_relative_eq!(lng, 0.0, epsilon = f64::EPSILON); - assert_relative_eq!(lat, 0.0, epsilon = f64::EPSILON); - - let (lng, lat) = webmercator_to_wgs84(3000.0, 9000.0); - assert_relative_eq!(lng, 0.026949458523585643, epsilon = f64::EPSILON); - assert_relative_eq!(lat, 0.08084834874097371, epsilon = f64::EPSILON); - } - - #[actix_rt::test] - async fn stat() -> MbtResult<()> { - let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?; - let res = mbt.summary(&mut conn).await?; - - assert_yaml_snapshot!(res, @r###" - --- - file_path: "../tests/fixtures/mbtiles/world_cities.mbtiles" - file_size: 49152 - mbt_type: Flat - page_size: 4096 - page_count: 12 - tile_count: 196 - min_tile_size: 64 - max_tile_size: 1107 - avg_tile_size: 96.2295918367347 - bbox: - - -179.99999997494382 - - -85.05112877764508 - - 180.00000015460688 - - 85.05112879314403 - min_zoom: 0 - max_zoom: 6 - zoom_info: - - zoom: 0 - tile_count: 1 - min_tile_size: 1107 - max_tile_size: 1107 - avg_tile_size: 1107 - bbox: - - -179.99999997494382 - - -85.05112877764508 - - 180.00000015460688 - - 85.05112879314403 - - zoom: 1 - tile_count: 4 - min_tile_size: 160 - max_tile_size: 650 - avg_tile_size: 366.5 - bbox: - - -179.99999997494382 - - -85.05112877764508 - - 180.00000015460688 - - 85.05112879314403 - - zoom: 2 - tile_count: 7 - min_tile_size: 137 - max_tile_size: 495 - avg_tile_size: 239.57142857142858 - bbox: - - -179.99999997494382 - - -66.51326042021836 - - 180.00000015460688 - - 66.51326049182072 - - zoom: 3 - tile_count: 17 - min_tile_size: 67 - max_tile_size: 246 - avg_tile_size: 134 - bbox: - - -134.99999995874995 - - -40.9798980140281 - - 180.00000015460688 - - 66.51326049182072 - - zoom: 4 - tile_count: 38 - min_tile_size: 64 - max_tile_size: 175 - avg_tile_size: 86 - bbox: - - -134.99999995874995 - - -40.9798980140281 - - 180.00000015460688 - - 66.51326049182072 - - zoom: 5 - tile_count: 57 - min_tile_size: 64 - max_tile_size: 107 - avg_tile_size: 72.7719298245614 - bbox: - - -123.74999995470151 - - -40.9798980140281 - - 180.00000015460688 - - 61.60639642757953 - - zoom: 6 - tile_count: 72 - min_tile_size: 64 - max_tile_size: 97 - avg_tile_size: 68.29166666666667 - bbox: - - -123.74999995470151 - - -40.9798980140281 - - 180.00000015460688 - - 61.60639642757953 - "###); - - Ok(()) - } } diff --git a/mbtiles/src/metadata.rs b/mbtiles/src/metadata.rs new file mode 100644 index 00000000..725e1e3b --- /dev/null +++ b/mbtiles/src/metadata.rs @@ -0,0 +1,278 @@ +use std::fmt::Display; +use std::str::FromStr; + +use futures::TryStreamExt; +use log::{info, warn}; +use martin_tile_utils::TileInfo; +use serde::ser::SerializeStruct; +use serde::{Serialize, Serializer}; +use serde_json::{Value as JSONValue, Value}; +use sqlx::{query, SqliteExecutor}; +use tilejson::{tilejson, Bounds, Center, TileJSON}; + +use crate::errors::MbtResult; +use crate::Mbtiles; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct Metadata { + pub id: String, + #[serde(serialize_with = "serialize_ti")] + pub tile_info: TileInfo, + pub layer_type: Option, + pub tilejson: TileJSON, + pub json: Option, +} + +fn serialize_ti(ti: &TileInfo, serializer: S) -> Result { + let mut s = serializer.serialize_struct("TileInfo", 2)?; + s.serialize_field("format", &ti.format.to_string())?; + s.serialize_field( + "encoding", + ti.encoding.content_encoding().unwrap_or_default(), + )?; + s.end() +} + +impl Mbtiles { + fn to_val(&self, val: Result, title: &str) -> Option { + match val { + Ok(v) => Some(v), + Err(err) => { + let name = &self.filename(); + warn!("Unable to parse metadata {title} value in {name}: {err}"); + None + } + } + } + + /// Get a single metadata value from the metadata table + pub async fn get_metadata_value(&self, conn: &mut T, key: &str) -> MbtResult> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let query = query!("SELECT value from metadata where name = ?", key); + let row = query.fetch_optional(conn).await?; + if let Some(row) = row { + if let Some(value) = row.value { + return Ok(Some(value)); + } + } + Ok(None) + } + + pub async fn set_metadata_value( + &self, + conn: &mut T, + key: &str, + value: Option<&str>, + ) -> MbtResult<()> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + if let Some(value) = value { + query!( + "INSERT OR REPLACE INTO metadata(name, value) VALUES(?, ?)", + key, + value + ) + .execute(conn) + .await?; + } else { + query!("DELETE FROM metadata WHERE name=?", key) + .execute(conn) + .await?; + } + Ok(()) + } + + pub async fn get_metadata(&self, conn: &mut T) -> MbtResult + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let (tj, layer_type, json) = self.parse_metadata(conn).await?; + + Ok(Metadata { + id: self.filename().to_string(), + tile_info: self.detect_format(&tj, conn).await?, + tilejson: tj, + layer_type, + json, + }) + } + + async fn parse_metadata( + &self, + conn: &mut T, + ) -> MbtResult<(TileJSON, Option, Option)> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let query = query!("SELECT name, value FROM metadata WHERE value IS NOT ''"); + let mut rows = query.fetch(conn); + + let mut tj = tilejson! { tiles: vec![] }; + let mut layer_type: Option = None; + let mut json: Option = None; + + while let Some(row) = rows.try_next().await? { + if let (Some(name), Some(value)) = (row.name, row.value) { + match name.as_ref() { + "name" => tj.name = Some(value), + "version" => tj.version = Some(value), + "bounds" => tj.bounds = self.to_val(Bounds::from_str(value.as_str()), &name), + "center" => tj.center = self.to_val(Center::from_str(value.as_str()), &name), + "minzoom" => tj.minzoom = self.to_val(value.parse(), &name), + "maxzoom" => tj.maxzoom = self.to_val(value.parse(), &name), + "description" => tj.description = Some(value), + "attribution" => tj.attribution = Some(value), + "type" => layer_type = Some(value), + "legend" => tj.legend = Some(value), + "template" => tj.template = Some(value), + "json" => json = self.to_val(serde_json::from_str(&value), &name), + "format" | "generator" => { + tj.other.insert(name, Value::String(value)); + } + _ => { + let file = &self.filename(); + info!("{file} has an unrecognized metadata value {name}={value}"); + tj.other.insert(name, Value::String(value)); + } + } + } + } + + if let Some(JSONValue::Object(obj)) = &mut json { + if let Some(value) = obj.remove("vector_layers") { + if let Ok(v) = serde_json::from_value(value) { + tj.vector_layers = Some(v); + } else { + warn!( + "Unable to parse metadata vector_layers value in {}", + self.filename() + ); + } + } + } + + Ok((tj, layer_type, json)) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use martin_tile_utils::{Encoding, Format}; + use sqlx::Executor as _; + use tilejson::VectorLayer; + + use super::*; + use crate::mbtiles::tests::open; + + #[actix_rt::test] + async fn mbtiles_meta() -> MbtResult<()> { + let filepath = "../tests/fixtures/mbtiles/geography-class-jpg.mbtiles"; + let mbt = Mbtiles::new(filepath)?; + assert_eq!(mbt.filepath(), filepath); + assert_eq!(mbt.filename(), "geography-class-jpg"); + Ok(()) + } + + #[actix_rt::test] + async fn metadata_jpeg() -> MbtResult<()> { + let (mut conn, mbt) = open("../tests/fixtures/mbtiles/geography-class-jpg.mbtiles").await?; + let metadata = mbt.get_metadata(&mut conn).await?; + let tj = metadata.tilejson; + + assert_eq!(tj.description.unwrap(), "One of the example maps that comes with TileMill - a bright & colorful world map that blends retro and high-tech with its folded paper texture and interactive flag tooltips. "); + assert!(tj.legend.unwrap().starts_with("
\n\n
\n{{admin}}\n\n
{{/__teaser__}}{{#__full__}}{{/__full__}}"); + assert_eq!(tj.version.unwrap(), "1.0.0"); + assert_eq!(metadata.id, "geography-class-jpg"); + assert_eq!(metadata.tile_info, Format::Jpeg.into()); + Ok(()) + } + + #[actix_rt::test] + async fn metadata_mvt() -> MbtResult<()> { + let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?; + let metadata = mbt.get_metadata(&mut conn).await?; + let tj = metadata.tilejson; + + assert_eq!(tj.maxzoom.unwrap(), 6); + assert_eq!(tj.minzoom.unwrap(), 0); + assert_eq!(tj.name.unwrap(), "Major cities from Natural Earth data"); + assert_eq!(tj.version.unwrap(), "2"); + assert_eq!( + tj.vector_layers, + Some(vec![VectorLayer { + id: "cities".to_string(), + fields: vec![("name".to_string(), "String".to_string())] + .into_iter() + .collect(), + description: Some(String::new()), + minzoom: Some(0), + maxzoom: Some(6), + other: HashMap::default() + }]) + ); + assert_eq!(metadata.id, "world_cities"); + assert_eq!( + metadata.tile_info, + TileInfo::new(Format::Mvt, Encoding::Gzip) + ); + assert_eq!(metadata.layer_type, Some("overlay".to_string())); + Ok(()) + } + + #[actix_rt::test] + async fn metadata_get_key() -> MbtResult<()> { + let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?; + + let res = mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(); + assert_eq!(res, "-123.123590,-37.818085,174.763027,59.352706"); + let res = mbt.get_metadata_value(&mut conn, "name").await?.unwrap(); + assert_eq!(res, "Major cities from Natural Earth data"); + let res = mbt.get_metadata_value(&mut conn, "maxzoom").await?.unwrap(); + assert_eq!(res, "6"); + let res = mbt.get_metadata_value(&mut conn, "nonexistent_key").await?; + assert_eq!(res, None); + let res = mbt.get_metadata_value(&mut conn, "").await?; + assert_eq!(res, None); + Ok(()) + } + + #[actix_rt::test] + async fn metadata_set_key() -> MbtResult<()> { + let (mut conn, mbt) = open("file:metadata_set_key_mem_db?mode=memory&cache=shared").await?; + + conn.execute("CREATE TABLE metadata (name text NOT NULL PRIMARY KEY, value text);") + .await?; + + mbt.set_metadata_value(&mut conn, "bounds", Some("0.0, 0.0, 0.0, 0.0")) + .await?; + assert_eq!( + mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(), + "0.0, 0.0, 0.0, 0.0" + ); + + mbt.set_metadata_value( + &mut conn, + "bounds", + Some("-123.123590,-37.818085,174.763027,59.352706"), + ) + .await?; + assert_eq!( + mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(), + "-123.123590,-37.818085,174.763027,59.352706" + ); + + mbt.set_metadata_value(&mut conn, "bounds", None).await?; + assert_eq!(mbt.get_metadata_value(&mut conn, "bounds").await?, None); + + Ok(()) + } +} diff --git a/mbtiles/src/summary.rs b/mbtiles/src/summary.rs new file mode 100644 index 00000000..2bfe3185 --- /dev/null +++ b/mbtiles/src/summary.rs @@ -0,0 +1,366 @@ +use std::fmt::{Display, Formatter}; +use std::path::PathBuf; +use std::str::FromStr; + +use serde::Serialize; +use size_format::SizeFormatterBinary; +use sqlx::{query, SqliteExecutor}; +use tilejson::Bounds; + +use crate::{MbtResult, MbtType, Mbtiles}; + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct ZoomInfo { + pub zoom: u8, + pub tile_count: u64, + pub min_tile_size: u64, + pub max_tile_size: u64, + pub avg_tile_size: f64, + pub bbox: Bounds, +} + +#[derive(Clone, Debug, PartialEq, Serialize)] +pub struct Summary { + pub file_size: Option, + pub mbt_type: MbtType, + pub page_size: u64, + pub page_count: u64, + pub tile_count: u64, + pub min_tile_size: Option, + pub max_tile_size: Option, + pub avg_tile_size: f64, + pub bbox: Option, + pub min_zoom: Option, + pub max_zoom: Option, + pub zoom_info: Vec, +} + +impl Display for Summary { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + writeln!(f, "Schema: {}", self.mbt_type)?; + + if let Some(file_size) = self.file_size { + let file_size = SizeFormatterBinary::new(file_size); + writeln!(f, "File size: {file_size:.2}B")?; + } else { + writeln!(f, "File size: unknown")?; + } + let page_size = SizeFormatterBinary::new(self.page_size); + writeln!(f, "Page size: {page_size:.2}B")?; + writeln!(f, "Page count: {:.2}", self.page_count)?; + writeln!(f)?; + writeln!( + f, + "|{:^9}|{:^9}|{:^9}|{:^9}|{:^9}| {:^20} |", + "Zoom", "Count", "Smallest", "Largest", "Average", "BBox" + )?; + + for l in &self.zoom_info { + let min = SizeFormatterBinary::new(l.min_tile_size); + let max = SizeFormatterBinary::new(l.max_tile_size); + let avg = SizeFormatterBinary::new(l.avg_tile_size as u64); + let prec = get_zoom_precision(l.zoom); + + writeln!( + f, + "|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |", + l.zoom, + l.tile_count, + format!("{min:.2}B"), + format!("{max:.2}B"), + format!("{avg:.2}B"), + format!("{:.prec$}", l.bbox), + )?; + } + + if self.zoom_info.len() > 1 { + if let (Some(min), Some(max), Some(bbox), Some(max_zoom)) = ( + self.min_tile_size, + self.max_tile_size, + self.bbox, + self.max_zoom, + ) { + let min = SizeFormatterBinary::new(min); + let max = SizeFormatterBinary::new(max); + let avg = SizeFormatterBinary::new(self.avg_tile_size as u64); + let prec = get_zoom_precision(max_zoom); + writeln!( + f, + "|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |", + "all", + self.tile_count, + format!("{min}B"), + format!("{max}B"), + format!("{avg}B"), + format!("{:.prec$}", bbox), + )?; + } + } + + Ok(()) + } +} + +impl Mbtiles { + /// Compute MBTiles file summary + pub async fn summary(&self, conn: &mut T) -> MbtResult + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let mbt_type = self.detect_type(&mut *conn).await?; + let file_size = PathBuf::from_str(self.filepath()) + .ok() + .and_then(|p| p.metadata().ok()) + .map(|m| m.len()); + + let sql = query!("PRAGMA page_size;"); + let page_size = sql.fetch_one(&mut *conn).await?.page_size.unwrap() as u64; + + let sql = query!("PRAGMA page_count;"); + let page_count = sql.fetch_one(&mut *conn).await?.page_count.unwrap() as u64; + + let zoom_info = query!( + " + SELECT zoom_level AS zoom, + count() AS count, + min(length(tile_data)) AS smallest, + max(length(tile_data)) AS largest, + avg(length(tile_data)) AS average, + min(tile_column) AS min_tile_x, + min(tile_row) AS min_tile_y, + max(tile_column) AS max_tile_x, + max(tile_row) AS max_tile_y + FROM tiles + GROUP BY zoom_level" + ) + .fetch_all(&mut *conn) + .await?; + + let zoom_info: Vec = zoom_info + .into_iter() + .map(|r| { + let zoom = u8::try_from(r.zoom.unwrap()).expect("zoom_level is not a u8"); + ZoomInfo { + zoom, + tile_count: r.count as u64, + min_tile_size: r.smallest.unwrap_or(0) as u64, + max_tile_size: r.largest.unwrap_or(0) as u64, + avg_tile_size: r.average.unwrap_or(0.0), + bbox: xyz_to_bbox( + zoom, + r.min_tile_x.unwrap(), + r.min_tile_y.unwrap(), + r.max_tile_x.unwrap(), + r.max_tile_y.unwrap(), + ), + } + }) + .collect(); + + let tile_count = zoom_info.iter().map(|l| l.tile_count).sum(); + let avg_sum = zoom_info + .iter() + .map(|l| l.avg_tile_size * l.tile_count as f64) + .sum::(); + + Ok(Summary { + file_size, + mbt_type, + page_size, + page_count, + tile_count, + min_tile_size: zoom_info.iter().map(|l| l.min_tile_size).reduce(u64::min), + max_tile_size: zoom_info.iter().map(|l| l.max_tile_size).reduce(u64::max), + avg_tile_size: avg_sum / tile_count as f64, + bbox: zoom_info.iter().map(|l| l.bbox).reduce(|a, b| a + b), + min_zoom: zoom_info.iter().map(|l| l.zoom).reduce(u8::min), + max_zoom: zoom_info.iter().map(|l| l.zoom).reduce(u8::max), + zoom_info, + }) + } +} + +/// Convert min/max XYZ tile coordinates to a bounding box +fn xyz_to_bbox(zoom: u8, min_x: i32, min_y: i32, max_x: i32, max_y: i32) -> Bounds { + let tile_size = 40075016.7 / (2_u32.pow(zoom as u32)) as f64; + let (min_lng, min_lat) = webmercator_to_wgs84( + -20037508.34 + min_x as f64 * tile_size, + -20037508.34 + min_y as f64 * tile_size, + ); + let (max_lng, max_lat) = webmercator_to_wgs84( + -20037508.34 + (max_x as f64 + 1.0) * tile_size, + -20037508.34 + (max_y as f64 + 1.0) * tile_size, + ); + + Bounds::new(min_lng, min_lat, max_lng, max_lat) +} + +fn get_zoom_precision(zoom: u8) -> usize { + let lng_delta = webmercator_to_wgs84(40075016.7 / (2_u32.pow(zoom as u32)) as f64, 0f64).0; + let log = lng_delta.log10() - 0.5; + if log > 0_f64 { + 0 + } else { + -log.ceil() as usize + } +} + +fn webmercator_to_wgs84(x: f64, y: f64) -> (f64, f64) { + let lng = (x / 6378137.0).to_degrees(); + let lat = (f64::atan(f64::sinh(y / 6378137.0))).to_degrees(); + (lng, lat) +} + +#[cfg(test)] +mod tests { + use approx::assert_relative_eq; + use insta::assert_yaml_snapshot; + + use crate::summary::webmercator_to_wgs84; + use crate::{create_flat_tables, MbtResult, Mbtiles}; + + #[actix_rt::test] + async fn meter_to_lnglat() { + let (lng, lat) = webmercator_to_wgs84(-20037508.34, -20037508.34); + assert_relative_eq!(lng, -179.99999997494382, epsilon = f64::EPSILON); + assert_relative_eq!(lat, -85.05112877764508, epsilon = f64::EPSILON); + + let (lng, lat) = webmercator_to_wgs84(20037508.34, 20037508.34); + assert_relative_eq!(lng, 179.99999997494382, epsilon = f64::EPSILON); + assert_relative_eq!(lat, 85.05112877764508, epsilon = f64::EPSILON); + + let (lng, lat) = webmercator_to_wgs84(0.0, 0.0); + assert_relative_eq!(lng, 0.0, epsilon = f64::EPSILON); + assert_relative_eq!(lat, 0.0, epsilon = f64::EPSILON); + + let (lng, lat) = webmercator_to_wgs84(3000.0, 9000.0); + assert_relative_eq!(lng, 0.026949458523585643, epsilon = f64::EPSILON); + assert_relative_eq!(lat, 0.08084834874097371, epsilon = f64::EPSILON); + } + + #[actix_rt::test] + async fn summary_empty_file() -> MbtResult<()> { + let mbt = Mbtiles::new("file:mbtiles_empty_summary?mode=memory&cache=shared")?; + let mut conn = mbt.open().await?; + + create_flat_tables(&mut conn).await.unwrap(); + let res = mbt.summary(&mut conn).await?; + assert_yaml_snapshot!(res, @r###" + --- + file_size: ~ + mbt_type: Flat + page_size: 4096 + page_count: 5 + tile_count: 0 + min_tile_size: ~ + max_tile_size: ~ + avg_tile_size: NaN + bbox: ~ + min_zoom: ~ + max_zoom: ~ + zoom_info: [] + "###); + + Ok(()) + } + + #[actix_rt::test] + async fn summary() -> MbtResult<()> { + let mbt = Mbtiles::new("../tests/fixtures/mbtiles/world_cities.mbtiles")?; + let mut conn = mbt.open().await?; + + let res = mbt.summary(&mut conn).await?; + + assert_yaml_snapshot!(res, @r###" + --- + file_size: 49152 + mbt_type: Flat + page_size: 4096 + page_count: 12 + tile_count: 196 + min_tile_size: 64 + max_tile_size: 1107 + avg_tile_size: 96.2295918367347 + bbox: + - -179.99999997494382 + - -85.05112877764508 + - 180.00000015460688 + - 85.05112879314403 + min_zoom: 0 + max_zoom: 6 + zoom_info: + - zoom: 0 + tile_count: 1 + min_tile_size: 1107 + max_tile_size: 1107 + avg_tile_size: 1107 + bbox: + - -179.99999997494382 + - -85.05112877764508 + - 180.00000015460688 + - 85.05112879314403 + - zoom: 1 + tile_count: 4 + min_tile_size: 160 + max_tile_size: 650 + avg_tile_size: 366.5 + bbox: + - -179.99999997494382 + - -85.05112877764508 + - 180.00000015460688 + - 85.05112879314403 + - zoom: 2 + tile_count: 7 + min_tile_size: 137 + max_tile_size: 495 + avg_tile_size: 239.57142857142858 + bbox: + - -179.99999997494382 + - -66.51326042021836 + - 180.00000015460688 + - 66.51326049182072 + - zoom: 3 + tile_count: 17 + min_tile_size: 67 + max_tile_size: 246 + avg_tile_size: 134 + bbox: + - -134.99999995874995 + - -40.9798980140281 + - 180.00000015460688 + - 66.51326049182072 + - zoom: 4 + tile_count: 38 + min_tile_size: 64 + max_tile_size: 175 + avg_tile_size: 86 + bbox: + - -134.99999995874995 + - -40.9798980140281 + - 180.00000015460688 + - 66.51326049182072 + - zoom: 5 + tile_count: 57 + min_tile_size: 64 + max_tile_size: 107 + avg_tile_size: 72.7719298245614 + bbox: + - -123.74999995470151 + - -40.9798980140281 + - 180.00000015460688 + - 61.60639642757953 + - zoom: 6 + tile_count: 72 + min_tile_size: 64 + max_tile_size: 97 + avg_tile_size: 68.29166666666667 + bbox: + - -123.74999995470151 + - -40.9798980140281 + - 180.00000015460688 + - 61.60639642757953 + "###); + + Ok(()) + } +} diff --git a/mbtiles/src/validation.rs b/mbtiles/src/validation.rs new file mode 100644 index 00000000..c1e944cc --- /dev/null +++ b/mbtiles/src/validation.rs @@ -0,0 +1,456 @@ +use std::collections::HashSet; + +#[cfg(feature = "cli")] +use clap::ValueEnum; +use enum_display::EnumDisplay; +use log::{debug, info, warn}; +use martin_tile_utils::{Format, TileInfo}; +use serde::Serialize; +use serde_json::Value; +use sqlx::sqlite::SqliteRow; +use sqlx::{query, Row, SqliteExecutor}; +use tilejson::TileJSON; + +use crate::errors::{MbtError, MbtResult}; +use crate::queries::{ + has_tiles_with_hash, is_flat_tables_type, is_flat_with_hash_tables_type, + is_normalized_tables_type, +}; +use crate::MbtError::{ + AggHashMismatch, AggHashValueNotFound, FailedIntegrityCheck, IncorrectTileHash, +}; +use crate::Mbtiles; + +/// Metadata key for the aggregate tiles hash value +pub const AGG_TILES_HASH: &str = "agg_tiles_hash"; + +/// Metadata key for a diff file, +/// describing the eventual [`AGG_TILES_HASH`] value once the diff is applied +pub const AGG_TILES_HASH_IN_DIFF: &str = "agg_tiles_hash_after_apply"; + +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, EnumDisplay, Serialize)] +#[enum_display(case = "Kebab")] +pub enum MbtType { + Flat, + FlatWithHash, + Normalized { hash_view: bool }, +} + +impl MbtType { + pub fn is_normalized(&self) -> bool { + matches!(self, Self::Normalized { .. }) + } + + pub fn is_normalized_with_view(&self) -> bool { + matches!(self, Self::Normalized { hash_view: true }) + } +} + +#[derive(PartialEq, Eq, Default, Debug, Clone, EnumDisplay)] +#[enum_display(case = "Kebab")] +#[cfg_attr(feature = "cli", derive(ValueEnum))] +pub enum IntegrityCheckType { + #[default] + Quick, + Full, + Off, +} + +impl Mbtiles { + pub async fn validate( + &self, + check_type: IntegrityCheckType, + update_agg_tiles_hash: bool, + ) -> MbtResult { + let mut conn = if update_agg_tiles_hash { + self.open().await? + } else { + self.open_readonly().await? + }; + self.check_integrity(&mut conn, check_type).await?; + self.check_each_tile_hash(&mut conn).await?; + if update_agg_tiles_hash { + self.update_agg_tiles_hash(&mut conn).await + } else { + self.check_agg_tiles_hashes(&mut conn).await + } + } + + /// Get the aggregate tiles hash value from the metadata table + pub async fn get_agg_tiles_hash(&self, conn: &mut T) -> MbtResult> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + self.get_metadata_value(&mut *conn, AGG_TILES_HASH).await + } + + /// Detect tile format and verify that it is consistent across some tiles + pub async fn detect_format(&self, tilejson: &TileJSON, conn: &mut T) -> MbtResult + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let mut tile_info = None; + let mut tested_zoom = -1_i64; + + // First, pick any random tile + let query = query!("SELECT zoom_level, tile_column, tile_row, tile_data FROM tiles WHERE zoom_level >= 0 LIMIT 1"); + let row = query.fetch_optional(&mut *conn).await?; + if let Some(r) = row { + tile_info = self.parse_tile(r.zoom_level, r.tile_column, r.tile_row, r.tile_data); + tested_zoom = r.zoom_level.unwrap_or(-1); + } + + // Afterwards, iterate over tiles in all allowed zooms and check for consistency + for z in tilejson.minzoom.unwrap_or(0)..=tilejson.maxzoom.unwrap_or(18) { + if i64::from(z) == tested_zoom { + continue; + } + let query = query! {"SELECT tile_column, tile_row, tile_data FROM tiles WHERE zoom_level = ? LIMIT 1", z}; + let row = query.fetch_optional(&mut *conn).await?; + if let Some(r) = row { + match ( + tile_info, + self.parse_tile(Some(z.into()), r.tile_column, r.tile_row, r.tile_data), + ) { + (_, None) => {} + (None, new) => tile_info = new, + (Some(old), Some(new)) if old == new => {} + (Some(old), Some(new)) => { + return Err(MbtError::InconsistentMetadata(old, new)); + } + } + } + } + + if let Some(Value::String(fmt)) = tilejson.other.get("format") { + let file = self.filename(); + match (tile_info, Format::parse(fmt)) { + (_, None) => { + warn!("Unknown format value in metadata: {fmt}"); + } + (None, Some(fmt)) => { + if fmt.is_detectable() { + warn!("Metadata table sets detectable '{fmt}' tile format, but it could not be verified for file {file}"); + } else { + info!("Using '{fmt}' tile format from metadata table in file {file}"); + } + tile_info = Some(fmt.into()); + } + (Some(info), Some(fmt)) if info.format == fmt => { + debug!("Detected tile format {info} matches metadata.format '{fmt}' in file {file}"); + } + (Some(info), _) => { + warn!("Found inconsistency: metadata.format='{fmt}', but tiles were detected as {info:?} in file {file}. Tiles will be returned as {info:?}."); + } + } + } + + if let Some(info) = tile_info { + if info.format != Format::Mvt && tilejson.vector_layers.is_some() { + warn!( + "{} has vector_layers metadata but non-vector tiles", + self.filename() + ); + } + Ok(info) + } else { + Err(MbtError::NoTilesFound) + } + } + + fn parse_tile( + &self, + z: Option, + x: Option, + y: Option, + tile: Option>, + ) -> Option { + if let (Some(z), Some(x), Some(y), Some(tile)) = (z, x, y, tile) { + let info = TileInfo::detect(&tile); + if let Some(info) = info { + debug!( + "Tile {z}/{x}/{} is detected as {info} in file {}", + (1 << z) - 1 - y, + self.filename(), + ); + } + info + } else { + None + } + } + + pub async fn detect_type(&self, conn: &mut T) -> MbtResult + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + debug!("Detecting MBTiles type for {self}"); + let typ = if is_normalized_tables_type(&mut *conn).await? { + MbtType::Normalized { + hash_view: has_tiles_with_hash(&mut *conn).await?, + } + } else if is_flat_with_hash_tables_type(&mut *conn).await? { + MbtType::FlatWithHash + } else if is_flat_tables_type(&mut *conn).await? { + MbtType::Flat + } else { + return Err(MbtError::InvalidDataFormat(self.filepath().to_string())); + }; + + self.check_for_uniqueness_constraint(&mut *conn, typ) + .await?; + + Ok(typ) + } + + async fn check_for_uniqueness_constraint( + &self, + conn: &mut T, + mbt_type: MbtType, + ) -> MbtResult<()> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let table_name = match mbt_type { + MbtType::Flat => "tiles", + MbtType::FlatWithHash => "tiles_with_hash", + MbtType::Normalized { .. } => "map", + }; + + let indexes = query("SELECT name FROM pragma_index_list(?) WHERE [unique] = 1") + .bind(table_name) + .fetch_all(&mut *conn) + .await?; + + // Ensure there is some index on tiles that has a unique constraint on (zoom_level, tile_row, tile_column) + for index in indexes { + let mut unique_idx_cols = HashSet::new(); + let rows = query("SELECT DISTINCT name FROM pragma_index_info(?)") + .bind(index.get::("name")) + .fetch_all(&mut *conn) + .await?; + + for row in rows { + unique_idx_cols.insert(row.get("name")); + } + + if unique_idx_cols + .symmetric_difference(&HashSet::from([ + "zoom_level".to_string(), + "tile_column".to_string(), + "tile_row".to_string(), + ])) + .collect::>() + .is_empty() + { + return Ok(()); + } + } + + Err(MbtError::NoUniquenessConstraint( + self.filepath().to_string(), + )) + } + + /// Perform `SQLite` internal integrity check + pub async fn check_integrity( + &self, + conn: &mut T, + integrity_check: IntegrityCheckType, + ) -> MbtResult<()> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + if integrity_check == IntegrityCheckType::Off { + info!("Skipping integrity check for {self}"); + return Ok(()); + } + + let sql = if integrity_check == IntegrityCheckType::Full { + "PRAGMA integrity_check;" + } else { + "PRAGMA quick_check;" + }; + + let result: Vec = query(sql) + .map(|row: SqliteRow| row.get(0)) + .fetch_all(&mut *conn) + .await?; + + if result.len() > 1 + || result.get(0).ok_or(FailedIntegrityCheck( + self.filepath().to_string(), + vec!["SQLite could not perform integrity check".to_string()], + ))? != "ok" + { + return Err(FailedIntegrityCheck(self.filepath().to_string(), result)); + } + + info!("{integrity_check:?} integrity check passed for {self}"); + Ok(()) + } + + pub async fn check_agg_tiles_hashes(&self, conn: &mut T) -> MbtResult + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let Some(stored) = self.get_agg_tiles_hash(&mut *conn).await? else { + return Err(AggHashValueNotFound(self.filepath().to_string())); + }; + let computed = calc_agg_tiles_hash(&mut *conn).await?; + if stored != computed { + let file = self.filepath().to_string(); + return Err(AggHashMismatch(computed, stored, file)); + } + + info!("The agg_tiles_hashes={computed} has been verified for {self}"); + Ok(computed) + } + + /// Compute new aggregate tiles hash and save it to the metadata table (if needed) + pub async fn update_agg_tiles_hash(&self, conn: &mut T) -> MbtResult + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + let old_hash = self.get_agg_tiles_hash(&mut *conn).await?; + let hash = calc_agg_tiles_hash(&mut *conn).await?; + if old_hash.as_ref() == Some(&hash) { + info!("Metadata value agg_tiles_hash is already set to the correct hash `{hash}` in {self}"); + } else { + if let Some(old_hash) = old_hash { + info!("Updating agg_tiles_hash from {old_hash} to {hash} in {self}"); + } else { + info!("Adding a new metadata value agg_tiles_hash = {hash} in {self}"); + } + self.set_metadata_value(&mut *conn, AGG_TILES_HASH, Some(&hash)) + .await?; + } + Ok(hash) + } + + pub async fn check_each_tile_hash(&self, conn: &mut T) -> MbtResult<()> + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + // Note that hex() always returns upper-case HEX values + let sql = match self.detect_type(&mut *conn).await? { + MbtType::Flat => { + info!("Skipping per-tile hash validation because this is a flat MBTiles file"); + return Ok(()); + } + MbtType::FlatWithHash => { + "SELECT expected, computed FROM ( + SELECT + upper(tile_hash) AS expected, + md5_hex(tile_data) AS computed + FROM tiles_with_hash + ) AS t + WHERE expected != computed + LIMIT 1;" + } + MbtType::Normalized { .. } => { + "SELECT expected, computed FROM ( + SELECT + upper(tile_id) AS expected, + md5_hex(tile_data) AS computed + FROM images + ) AS t + WHERE expected != computed + LIMIT 1;" + } + }; + + query(sql) + .fetch_optional(&mut *conn) + .await? + .map_or(Ok(()), |v| { + Err(IncorrectTileHash( + self.filepath().to_string(), + v.get(0), + v.get(1), + )) + })?; + + info!("All tile hashes are valid for {self}"); + Ok(()) + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + use crate::mbtiles::tests::open; + + #[actix_rt::test] + async fn detect_type() -> MbtResult<()> { + let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?; + let res = mbt.detect_type(&mut conn).await?; + assert_eq!(res, MbtType::Flat); + + let (mut conn, mbt) = open("../tests/fixtures/mbtiles/zoomed_world_cities.mbtiles").await?; + let res = mbt.detect_type(&mut conn).await?; + assert_eq!(res, MbtType::FlatWithHash); + + let (mut conn, mbt) = open("../tests/fixtures/mbtiles/geography-class-jpg.mbtiles").await?; + let res = mbt.detect_type(&mut conn).await?; + assert_eq!(res, MbtType::Normalized { hash_view: false }); + + let (mut conn, mbt) = open(":memory:").await?; + let res = mbt.detect_type(&mut conn).await; + assert!(matches!(res, Err(MbtError::InvalidDataFormat(_)))); + + Ok(()) + } + + #[actix_rt::test] + async fn validate_valid_file() -> MbtResult<()> { + let (mut conn, mbt) = open("../tests/fixtures/mbtiles/zoomed_world_cities.mbtiles").await?; + mbt.check_integrity(&mut conn, IntegrityCheckType::Quick) + .await?; + Ok(()) + } + + #[actix_rt::test] + async fn validate_invalid_file() -> MbtResult<()> { + let (mut conn, mbt) = + open("../tests/fixtures/files/invalid_zoomed_world_cities.mbtiles").await?; + let result = mbt.check_agg_tiles_hashes(&mut conn).await; + assert!(matches!(result, Err(MbtError::AggHashMismatch(..)))); + Ok(()) + } +} + +/// Compute the hash of the combined tiles in the mbtiles file tiles table/view. +/// This should work on all mbtiles files perf `MBTiles` specification. +pub async fn calc_agg_tiles_hash(conn: &mut T) -> MbtResult +where + for<'e> &'e mut T: SqliteExecutor<'e>, +{ + debug!("Calculating agg_tiles_hash"); + let query = query( + // The md5_concat func will return NULL if there are no rows in the tiles table. + // For our use case, we will treat it as an empty string, and hash that. + // `tile_data` values must be stored as a blob per MBTiles spec + // `md5` functions will fail if the value is not text/blob/null + // + // Note that ORDER BY controls the output ordering, which is important for the hash value, + // and having it at the top level would not order values properly. + // See https://sqlite.org/forum/forumpost/228bb96e12a746ce + " +SELECT coalesce( + (SELECT md5_concat_hex( + cast(zoom_level AS text), + cast(tile_column AS text), + cast(tile_row AS text), + tile_data + ) + OVER (ORDER BY zoom_level, tile_column, tile_row ROWS + BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) + FROM tiles + LIMIT 1), + md5_hex('') +); +", + ); + Ok(query.fetch_one(conn).await?.get::(0)) +} diff --git a/tests/expected/mbtiles/summary.txt b/tests/expected/mbtiles/summary.txt index 28dc3ecf..bad18a81 100644 --- a/tests/expected/mbtiles/summary.txt +++ b/tests/expected/mbtiles/summary.txt @@ -1,4 +1,4 @@ -File: ./tests/fixtures/mbtiles/world_cities.mbtiles +MBTiles file summary for ./tests/fixtures/mbtiles/world_cities.mbtiles Schema: flat File size: 48.00KiB Page size: 4.00KiB