From 6320e0fff3ad7e1b262cd7115353c4671b47f0b1 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 30 May 2024 14:28:34 -0400 Subject: [PATCH] Add `agg_tiles_hash_before_apply`, warnings, and validate on patch (#1266) Implement #1244 --- mbtiles/src/bin/mbtiles.rs | 17 +- mbtiles/src/copier.rs | 252 ++++++++++++------ mbtiles/src/errors.rs | 20 +- mbtiles/src/lib.rs | 2 +- mbtiles/src/mbtiles.rs | 12 +- mbtiles/src/patcher.rs | 54 +++- mbtiles/src/validation.rs | 93 ++++++- mbtiles/tests/copy.rs | 27 +- .../snapshots/copy__databases@flat__dif.snap | 1 + .../copy__databases@flat__dif_empty.snap | 1 + .../snapshots/copy__databases@hash__dif.snap | 1 + .../copy__databases@hash__dif_empty.snap | 1 + .../snapshots/copy__databases@norm__dif.snap | 1 + .../copy__databases@norm__dif_empty.snap | 1 + tests/expected/mbtiles/meta-all.txt | 1 + tests/fixtures/mbtiles/world_cities.mbtiles | Bin 49152 -> 49152 bytes .../mbtiles/world_cities_modified.mbtiles | Bin 49152 -> 49152 bytes 17 files changed, 363 insertions(+), 121 deletions(-) diff --git a/mbtiles/src/bin/mbtiles.rs b/mbtiles/src/bin/mbtiles.rs index 7bf40685..26992c23 100644 --- a/mbtiles/src/bin/mbtiles.rs +++ b/mbtiles/src/bin/mbtiles.rs @@ -66,6 +66,9 @@ enum Commands { base_file: PathBuf, /// Diff file patch_file: PathBuf, + /// Force patching operation, ignoring some warnings that otherwise would prevent the operation. Use with caution. + #[arg(short, long)] + force: bool, }, /// Update metadata to match the content of the file #[command(name = "meta-update", alias = "update-meta")] @@ -156,6 +159,12 @@ pub struct SharedCopyOpts { /// Skip generating a global hash for mbtiles validation. By default, `mbtiles` will compute `agg_tiles_hash` metadata value. #[arg(long)] skip_agg_tiles_hash: bool, + /// Force copy operation, ignoring some warnings that otherwise would prevent the operation. Use with caution. + #[arg(short, long)] + force: bool, + /// Perform agg_hash validation on the original and destination files. + #[arg(long)] + validate: bool, } impl SharedCopyOpts { @@ -181,6 +190,8 @@ impl SharedCopyOpts { zoom_levels: self.zoom_levels, bbox: self.bbox, skip_agg_tiles_hash: self.skip_agg_tiles_hash, + force: self.force, + validate: self.validate, // Constants dst_type: None, // Taken from dst_type_cli } @@ -233,8 +244,9 @@ async fn main_int() -> anyhow::Result<()> { Commands::ApplyPatch { base_file, patch_file, + force, } => { - apply_patch(base_file, patch_file).await?; + apply_patch(base_file, patch_file, force).await?; } Commands::UpdateMetadata { file, update_zoom } => { let mbt = Mbtiles::new(file.as_path())?; @@ -258,7 +270,7 @@ async fn main_int() -> anyhow::Result<()> { } }); let mbt = Mbtiles::new(file.as_path())?; - mbt.validate(integrity_check, agg_hash).await?; + mbt.open_and_validate(integrity_check, agg_hash).await?; } Commands::Summary { file } => { let mbt = Mbtiles::new(file.as_path())?; @@ -597,6 +609,7 @@ mod tests { command: ApplyPatch { base_file: PathBuf::from("src_file"), patch_file: PathBuf::from("diff_file"), + force: false, } } ); diff --git a/mbtiles/src/copier.rs b/mbtiles/src/copier.rs index c152205b..a550b846 100644 --- a/mbtiles/src/copier.rs +++ b/mbtiles/src/copier.rs @@ -3,21 +3,24 @@ use std::path::PathBuf; use enum_display::EnumDisplay; use itertools::Itertools as _; -use log::{debug, info, trace}; +use log::{debug, info, trace, warn}; use martin_tile_utils::{bbox_to_xyz, MAX_ZOOM}; use serde::{Deserialize, Serialize}; use sqlite_hashes::rusqlite::Connection; -use sqlx::{query, Executor as _, Row, SqliteConnection}; +use sqlx::{query, Connection as _, Executor as _, Row, SqliteConnection}; use tilejson::Bounds; use crate::errors::MbtResult; +use crate::mbtiles::PatchFileInfo; use crate::queries::{ create_tiles_with_hash_view, detach_db, init_mbtiles_schema, is_empty_database, }; +use crate::AggHashType::Verify; +use crate::IntegrityCheckType::Quick; use crate::MbtType::{Flat, FlatWithHash, Normalized}; use crate::{ invert_y_value, reset_db_settings, CopyType, MbtError, MbtType, MbtTypeCli, Mbtiles, - AGG_TILES_HASH, AGG_TILES_HASH_AFTER_APPLY, + AGG_TILES_HASH, AGG_TILES_HASH_AFTER_APPLY, AGG_TILES_HASH_BEFORE_APPLY, }; #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, EnumDisplay)] @@ -68,12 +71,16 @@ pub struct MbtilesCopier { pub apply_patch: Option, /// Skip generating a global hash for mbtiles validation. By default, `mbtiles` will compute `agg_tiles_hash` metadata value. pub skip_agg_tiles_hash: bool, + /// Ignore some warnings and continue with the copying operation + pub force: bool, + /// Perform `agg_hash` validation on the original and destination files. + pub validate: bool, } #[derive(Clone, Debug)] struct MbtileCopierInt { - src_mbtiles: Mbtiles, - dst_mbtiles: Mbtiles, + src_mbt: Mbtiles, + dst_mbt: Mbtiles, options: MbtilesCopier, } @@ -114,23 +121,30 @@ impl MbtileCopierInt { } Ok(MbtileCopierInt { - src_mbtiles: Mbtiles::new(&options.src_file)?, - dst_mbtiles: Mbtiles::new(&options.dst_file)?, + src_mbt: Mbtiles::new(&options.src_file)?, + dst_mbt: Mbtiles::new(&options.dst_file)?, options, }) } pub async fn run(self) -> MbtResult { - if self.options.diff_with_file.is_none() && self.options.apply_patch.is_none() { - self.run_simple().await + if let Some(diff_file) = &self.options.diff_with_file { + let mbt = Mbtiles::new(diff_file)?; + self.run_with_diff(mbt).await + } else if let Some(patch_file) = &self.options.apply_patch { + let mbt = Mbtiles::new(patch_file)?; + self.run_with_patch(mbt).await } else { - self.run_with_diff_or_patch().await + self.run_simple().await } } - pub async fn run_simple(self) -> MbtResult { - let src_type = self.src_mbtiles.open_and_detect_type().await?; - let mut conn = self.dst_mbtiles.open_or_new().await?; + async fn run_simple(self) -> MbtResult { + let mut conn = self.src_mbt.open_readonly().await?; + let src_type = self.src_mbt.detect_type(&mut conn).await?; + conn.close().await?; + + conn = self.dst_mbt.open_or_new().await?; let is_empty_db = is_empty_database(&mut conn).await?; let on_duplicate = if let Some(on_duplicate) = self.options.on_duplicate { @@ -141,24 +155,24 @@ impl MbtileCopierInt { return Err(MbtError::DestinationFileExists(self.options.dst_file)); }; - self.src_mbtiles.attach_to(&mut conn, "sourceDb").await?; + self.src_mbt.attach_to(&mut conn, "sourceDb").await?; let dst_type = if is_empty_db { self.options.dst_type().unwrap_or(src_type) } else { - self.validate_dst_type(self.dst_mbtiles.detect_type(&mut conn).await?)? + self.validate_dst_type(self.dst_mbt.detect_type(&mut conn).await?)? }; info!( "Copying {src_mbt} ({src_type}) {what}to a {is_new} file {dst_mbt} ({dst_type})", - src_mbt = self.src_mbtiles, + src_mbt = self.src_mbt, what = self.copy_text(), is_new = if is_empty_db { "new" } else { "existing" }, - dst_mbt = self.dst_mbtiles, + dst_mbt = self.dst_mbt, ); if is_empty_db { - self.init_new_schema(&mut conn, src_type, dst_type).await?; + self.init_schema(&mut conn, src_type, dst_type).await?; } self.copy_with_rusqlite( @@ -166,12 +180,11 @@ impl MbtileCopierInt { on_duplicate, dst_type, get_select_from(src_type, dst_type), - false, ) .await?; if self.options.copy.copy_tiles() && !self.options.skip_agg_tiles_hash { - self.dst_mbtiles.update_agg_tiles_hash(&mut conn).await?; + self.dst_mbt.update_agg_tiles_hash(&mut conn).await?; } detach_db(&mut conn, "sourceDb").await?; @@ -179,62 +192,149 @@ impl MbtileCopierInt { Ok(conn) } - pub async fn run_with_diff_or_patch(self) -> MbtResult { - let ((Some(dif_file), None) | (None, Some(dif_file))) = - (&self.options.diff_with_file, &self.options.apply_patch) - else { - unreachable!() - }; - let dif_mbt = Mbtiles::new(dif_file)?; - let dif_type = dif_mbt.open_and_detect_type().await?; - let is_creating_diff = self.options.diff_with_file.is_some(); + /// Compare two files, and write their difference to the diff file + async fn run_with_diff(self, dif_mbt: Mbtiles) -> MbtResult { + let mut dif_conn = dif_mbt.open_readonly().await?; + let dif_info = dif_mbt.examine_diff(&mut dif_conn).await?; + dif_mbt.assert_hashes(&dif_info, self.options.force)?; + dif_conn.close().await?; - let src_type = self.src_mbtiles.open_and_detect_type().await?; + let src_info = self.validate_src_file().await?; - let mut conn = self.dst_mbtiles.open_or_new().await?; + let mut conn = self.dst_mbt.open_or_new().await?; if !is_empty_database(&mut conn).await? { return Err(MbtError::NonEmptyTargetFile(self.options.dst_file)); } - self.src_mbtiles.attach_to(&mut conn, "sourceDb").await?; + self.src_mbt.attach_to(&mut conn, "sourceDb").await?; dif_mbt.attach_to(&mut conn, "diffDb").await?; - let what = self.copy_text(); - let src_path = &self.src_mbtiles.filepath(); - let dst_path = &self.dst_mbtiles.filepath(); - let dif_path = dif_mbt.filepath(); - let dst_type = self.options.dst_type().unwrap_or(src_type); - if is_creating_diff { - info!("Comparing {src_path} ({src_type}) and {dif_path} ({dif_type}) {what}into a new file {dst_path} ({dst_type})"); - } else { - info!("Applying patch from {dif_path} ({dif_type}) to {src_path} ({src_type}) {what}into a new file {dst_path} ({dst_type})"); - } - - self.init_new_schema(&mut conn, src_type, dst_type).await?; + let dst_type = self.options.dst_type().unwrap_or(src_info.mbt_type); + info!( + "Comparing {src_mbt} ({src_type}) and {dif_path} ({dif_type}) {what}into a new file {dst_path} ({dst_type})", + src_mbt = self.src_mbt, + src_type = src_info.mbt_type, + dif_path = dif_mbt.filepath(), + dif_type = dif_info.mbt_type, + what = self.copy_text(), + dst_path = self.dst_mbt.filepath() + ); + self.init_schema(&mut conn, src_info.mbt_type, dst_type) + .await?; self.copy_with_rusqlite( &mut conn, CopyDuplicateMode::Override, dst_type, - &(if is_creating_diff { - get_select_from_with_diff(dif_type, dst_type) - } else { - get_select_from_apply_patch(src_type, dif_type, dst_type) - }), - true, + &get_select_from_with_diff(dif_info.mbt_type, dst_type), ) .await?; + if let Some(hash) = src_info.agg_tiles_hash { + self.dst_mbt + .set_metadata_value(&mut conn, AGG_TILES_HASH_BEFORE_APPLY, &hash) + .await?; + } + if let Some(hash) = dif_info.agg_tiles_hash { + self.dst_mbt + .set_metadata_value(&mut conn, AGG_TILES_HASH_AFTER_APPLY, &hash) + .await?; + }; + + // TODO: perhaps disable all except --copy all when using with diffs, or else is not making much sense if self.options.copy.copy_tiles() && !self.options.skip_agg_tiles_hash { - self.dst_mbtiles.update_agg_tiles_hash(&mut conn).await?; + self.dst_mbt.update_agg_tiles_hash(&mut conn).await?; + } + + detach_db(&mut conn, "diffDb").await?; + detach_db(&mut conn, "sourceDb").await?; + self.validate(&self.dst_mbt, &mut conn).await?; + + Ok(conn) + } + + /// Apply a patch file to the source file and write the result to the destination file + async fn run_with_patch(self, dif_mbt: Mbtiles) -> MbtResult { + let mut dif_conn = dif_mbt.open_readonly().await?; + let dif_info = dif_mbt.examine_diff(&mut dif_conn).await?; + self.validate(&dif_mbt, &mut dif_conn).await?; + dif_mbt.validate_diff_info(&dif_info, self.options.force)?; + dif_conn.close().await?; + + let src_type = self.validate_src_file().await?.mbt_type; + + let mut conn = self.dst_mbt.open_or_new().await?; + if !is_empty_database(&mut conn).await? { + return Err(MbtError::NonEmptyTargetFile(self.options.dst_file)); + } + + self.src_mbt.attach_to(&mut conn, "sourceDb").await?; + dif_mbt.attach_to(&mut conn, "diffDb").await?; + + let dst_type = self.options.dst_type().unwrap_or(src_type); + info!("Applying patch from {dif_path} ({dif_type}) to {src_mbt} ({src_type}) {what}into a new file {dst_path} ({dst_type})", + dif_path = dif_mbt.filepath(), + dif_type = dif_info.mbt_type, + src_mbt = self.src_mbt, + what = self.copy_text(), + dst_path = self.dst_mbt.filepath()); + + self.init_schema(&mut conn, src_type, dst_type).await?; + self.copy_with_rusqlite( + &mut conn, + CopyDuplicateMode::Override, + dst_type, + &get_select_from_apply_patch(src_type, dif_info.mbt_type, dst_type), + ) + .await?; + + // TODO: perhaps disable all except --copy all when using with diffs, or else is not making much sense + if self.options.copy.copy_tiles() && !self.options.skip_agg_tiles_hash { + self.dst_mbt.update_agg_tiles_hash(&mut conn).await?; + + let new_hash = self.dst_mbt.get_agg_tiles_hash(&mut conn).await?; + match (dif_info.agg_tiles_hash_after_apply, new_hash) { + (Some(expected), Some(actual)) if expected != actual => { + let err = MbtError::AggHashMismatchAfterApply( + dif_mbt.filepath().to_string(), + expected, + self.dst_mbt.filepath().to_string(), + actual, + ); + if !self.options.force { + return Err(err); + } + warn!("{err}"); + } + _ => {} + } } detach_db(&mut conn, "diffDb").await?; detach_db(&mut conn, "sourceDb").await?; + self.validate(&self.dst_mbt, &mut conn).await?; + Ok(conn) } + async fn validate(&self, mbt: &Mbtiles, conn: &mut SqliteConnection) -> MbtResult<()> { + if self.options.validate { + mbt.validate(conn, Quick, Verify).await?; + } + Ok(()) + } + + async fn validate_src_file(&self) -> MbtResult { + let mut src_conn = self.src_mbt.open_readonly().await?; + let src_info = self.src_mbt.examine_diff(&mut src_conn).await?; + self.validate(&self.src_mbt, &mut src_conn).await?; + self.src_mbt.assert_hashes(&src_info, self.options.force)?; + src_conn.close().await?; + + Ok(src_info) + } + fn copy_text(&self) -> &str { match self.options.copy { CopyType::All => "", @@ -249,7 +349,6 @@ impl MbtileCopierInt { on_duplicate: CopyDuplicateMode, dst_type: MbtType, select_from: &str, - is_diff: bool, ) -> Result<(), MbtError> { // SAFETY: This must be scoped to make sure the handle is dropped before we continue using conn // Make sure not to execute any other queries while the handle is locked @@ -266,7 +365,7 @@ impl MbtileCopierInt { } if self.options.copy.copy_metadata() { - self.copy_metadata(&rusqlite_conn, is_diff, on_duplicate) + self.copy_metadata(&rusqlite_conn, on_duplicate) } else { debug!("Skipping copying metadata"); Ok(()) @@ -276,38 +375,35 @@ impl MbtileCopierInt { fn copy_metadata( &self, rusqlite_conn: &Connection, - is_diff: bool, on_duplicate: CopyDuplicateMode, ) -> Result<(), MbtError> { let on_dupl = on_duplicate.to_sql(); let sql; - if is_diff { - // Insert all rows from diffDb.metadata if they do not exist or are different in sourceDb.metadata. - // Also insert all names from sourceDb.metadata that do not exist in diffDb.metadata, with their value set to NULL. - // Rename agg_tiles_hash to agg_tiles_hash_after_apply because agg_tiles_hash will be auto-added later - if self.options.diff_with_file.is_some() { - // Include agg_tiles_hash value even if it is the same because we will still need it when applying the diff - sql = format!( - " + + // Insert all rows from diffDb.metadata if they do not exist or are different in sourceDb.metadata. + // Also insert all names from sourceDb.metadata that do not exist in diffDb.metadata, with their value set to NULL. + // Skip agg_tiles_hash because that requires special handling + if self.options.diff_with_file.is_some() { + // Include agg_tiles_hash value even if it is the same because we will still need it when applying the diff + sql = format!( + " INSERT {on_dupl} INTO metadata (name, value) - SELECT IIF(name = '{AGG_TILES_HASH}','{AGG_TILES_HASH_AFTER_APPLY}', name) as name - , value + SELECT name, value FROM ( SELECT COALESCE(difMD.name, srcMD.name) as name , difMD.value as value FROM sourceDb.metadata AS srcMD FULL JOIN diffDb.metadata AS difMD ON srcMD.name = difMD.name - WHERE srcMD.value != difMD.value OR srcMD.value ISNULL OR difMD.value ISNULL OR srcMD.name = '{AGG_TILES_HASH}' + WHERE srcMD.value != difMD.value OR srcMD.value ISNULL OR difMD.value ISNULL ) joinedMD - WHERE name != '{AGG_TILES_HASH_AFTER_APPLY}'" - ); - debug!("Copying metadata, taking into account diff file with {sql}"); - } else { - sql = format!( - " + WHERE name NOT IN ('{AGG_TILES_HASH}', '{AGG_TILES_HASH_BEFORE_APPLY}', '{AGG_TILES_HASH_AFTER_APPLY}')" + ); + debug!("Copying metadata, taking into account diff file with {sql}"); + } else if self.options.apply_patch.is_some() { + sql = format!( + " INSERT {on_dupl} INTO metadata (name, value) - SELECT IIF(name = '{AGG_TILES_HASH_AFTER_APPLY}','{AGG_TILES_HASH}', name) as name - , value + SELECT name, value FROM ( SELECT COALESCE(srcMD.name, difMD.name) as name , COALESCE(difMD.value, srcMD.value) as value @@ -315,10 +411,9 @@ impl MbtileCopierInt { ON srcMD.name = difMD.name WHERE difMD.name ISNULL OR difMD.value NOTNULL ) joinedMD - WHERE name != '{AGG_TILES_HASH}'" - ); - debug!("Copying metadata, and applying the diff file with {sql}"); - } + WHERE name NOT IN ('{AGG_TILES_HASH}', '{AGG_TILES_HASH_BEFORE_APPLY}', '{AGG_TILES_HASH_AFTER_APPLY}')" + ); + debug!("Copying metadata, and applying the diff file with {sql}"); } else { sql = format!( " @@ -404,7 +499,7 @@ impl MbtileCopierInt { Ok(dst_type) } - async fn init_new_schema( + async fn init_schema( &self, conn: &mut SqliteConnection, src: MbtType, @@ -826,6 +921,7 @@ mod tests { src_file: src.clone(), dst_file: dst.clone(), diff_with_file: Some(diff_file.clone()), + force: true, ..Default::default() }; let mut dst_conn = opt.run().await?; diff --git a/mbtiles/src/errors.rs b/mbtiles/src/errors.rs index b68137fb..2f4e6092 100644 --- a/mbtiles/src/errors.rs +++ b/mbtiles/src/errors.rs @@ -3,7 +3,7 @@ use std::path::PathBuf; use martin_tile_utils::{TileInfo, MAX_ZOOM}; use sqlite_hashes::rusqlite; -use crate::MbtType; +use crate::{MbtType, AGG_TILES_HASH, AGG_TILES_HASH_AFTER_APPLY, AGG_TILES_HASH_BEFORE_APPLY}; #[derive(thiserror::Error, Debug)] pub enum MbtError { @@ -77,6 +77,24 @@ pub enum MbtError { #[error("Invalid zoom value {0}={1}, expecting an integer between 0..{MAX_ZOOM}")] InvalidZoomValue(&'static str, String), + + #[error("A file {0} does not have an {AGG_TILES_HASH} metadata entry, probably because it was not created by this tool. Use `--force` to ignore this warning, or run this to update hash value: `mbtiles validate --agg-hash update {0}`")] + CannotDiffFileWithoutHash(String), + + #[error("File {0} has {AGG_TILES_HASH_BEFORE_APPLY} or {AGG_TILES_HASH_AFTER_APPLY} metadata entry, indicating it is a patch file which should not be diffed with another file. Use `--force` to ignore this warning.")] + DiffingDiffFile(String), + + #[error("A file {0} does not seem to be a patch diff file because it has no {AGG_TILES_HASH_BEFORE_APPLY} and {AGG_TILES_HASH_AFTER_APPLY} metadata entries. These entries are automatically created when using `mbtiles diff` and `mbitiles copy --diff-with-file`. Use `--force` to ignore this warning.")] + PatchFileHasNoHashes(String), + + #[error("A file {0} does not have {AGG_TILES_HASH_BEFORE_APPLY} metadata, probably because it was created by an older version of the `mbtiles` tool. Use `--force` to ignore this warning, but ensure you are applying the patch to the right file.")] + PatchFileHasNoBeforeHash(String), + + #[error("The {AGG_TILES_HASH_BEFORE_APPLY}='{1}' in patch file {0} does not match {AGG_TILES_HASH}='{3}' in the file {2}")] + AggHashMismatchWithDiff(String, String, String, String), + + #[error("The {AGG_TILES_HASH_AFTER_APPLY}='{1}' in patch file {0} does not match {AGG_TILES_HASH}='{3}' in the file {2} after the patch was applied")] + AggHashMismatchAfterApply(String, String, String, String), } pub type MbtResult = Result; diff --git a/mbtiles/src/lib.rs b/mbtiles/src/lib.rs index 272915f8..1f54ebb6 100644 --- a/mbtiles/src/lib.rs +++ b/mbtiles/src/lib.rs @@ -32,7 +32,7 @@ pub use update::UpdateZoomType; mod validation; pub use validation::{ calc_agg_tiles_hash, AggHashType, IntegrityCheckType, MbtType, AGG_TILES_HASH, - AGG_TILES_HASH_AFTER_APPLY, + AGG_TILES_HASH_AFTER_APPLY, AGG_TILES_HASH_BEFORE_APPLY, }; /// `MBTiles` uses a TMS (Tile Map Service) scheme for its tile coordinates (inverted along the Y axis). diff --git a/mbtiles/src/mbtiles.rs b/mbtiles/src/mbtiles.rs index 9808fb19..ebd2e5c2 100644 --- a/mbtiles/src/mbtiles.rs +++ b/mbtiles/src/mbtiles.rs @@ -42,6 +42,13 @@ impl CopyType { } } +pub struct PatchFileInfo { + pub mbt_type: MbtType, + pub agg_tiles_hash: Option, + pub agg_tiles_hash_before_apply: Option, + pub agg_tiles_hash_after_apply: Option, +} + #[derive(Clone, Debug)] pub struct Mbtiles { filepath: String, @@ -212,11 +219,6 @@ impl Mbtiles { ), } } - - pub async fn open_and_detect_type(&self) -> MbtResult { - let mut conn = self.open_readonly().await?; - self.detect_type(&mut conn).await - } } pub async fn attach_hash_fn(conn: &mut SqliteConnection) -> MbtResult<()> { diff --git a/mbtiles/src/patcher.rs b/mbtiles/src/patcher.rs index 6e983ffe..6650021c 100644 --- a/mbtiles/src/patcher.rs +++ b/mbtiles/src/patcher.rs @@ -1,25 +1,53 @@ use std::path::PathBuf; -use log::{debug, info}; -use sqlx::query; +use log::{debug, info, warn}; +use sqlx::{query, Connection as _}; use crate::queries::detach_db; use crate::MbtType::{Flat, FlatWithHash, Normalized}; -use crate::{MbtResult, MbtType, Mbtiles, AGG_TILES_HASH, AGG_TILES_HASH_AFTER_APPLY}; +use crate::{ + MbtError, MbtResult, MbtType, Mbtiles, AGG_TILES_HASH, AGG_TILES_HASH_AFTER_APPLY, + AGG_TILES_HASH_BEFORE_APPLY, +}; -pub async fn apply_patch(base_file: PathBuf, patch_file: PathBuf) -> MbtResult<()> { +pub async fn apply_patch(base_file: PathBuf, patch_file: PathBuf, force: bool) -> MbtResult<()> { let base_mbt = Mbtiles::new(base_file)?; let patch_mbt = Mbtiles::new(patch_file)?; - let patch_type = patch_mbt.open_and_detect_type().await?; + + let mut conn = patch_mbt.open_readonly().await?; + let patch_info = patch_mbt.examine_diff(&mut conn).await?; + patch_mbt.validate_diff_info(&patch_info, force)?; + let patch_type = patch_info.mbt_type; + conn.close().await?; let mut conn = base_mbt.open().await?; - let base_type = base_mbt.detect_type(&mut conn).await?; + let base_info = base_mbt.examine_diff(&mut conn).await?; + let base_hash = base_mbt.get_agg_tiles_hash(&mut conn).await?; + base_mbt.assert_hashes(&base_info, force)?; - info!("Applying patch file {patch_mbt} ({patch_type}) to {base_mbt} ({base_type})"); + match (force, base_hash, patch_info.agg_tiles_hash_before_apply) { + (false, Some(base_hash), Some(expected_hash)) if base_hash != expected_hash => { + return Err(MbtError::AggHashMismatchWithDiff( + patch_mbt.filepath().to_string(), + expected_hash, + base_mbt.filepath().to_string(), + base_hash, + )); + } + (true, Some(base_hash), Some(expected_hash)) if base_hash != expected_hash => { + warn!("Aggregate tiles hash mismatch: Patch file expected {expected_hash} but found {base_hash} in {base_mbt} (force mode)"); + } + _ => {} + } + + info!( + "Applying patch file {patch_mbt} ({patch_type}) to {base_mbt} ({base_type})", + base_type = base_info.mbt_type + ); patch_mbt.attach_to(&mut conn, "patchDb").await?; - let select_from = get_select_from(base_type, patch_type); - let (main_table, insert1, insert2) = get_insert_sql(base_type, select_from); + let select_from = get_select_from(base_info.mbt_type, patch_type); + let (main_table, insert1, insert2) = get_insert_sql(base_info.mbt_type, select_from); let sql = format!("{insert1} WHERE tile_data NOTNULL"); query(&sql).execute(&mut conn).await?; @@ -38,7 +66,7 @@ pub async fn apply_patch(base_file: PathBuf, patch_file: PathBuf) -> MbtResult<( ); query(&sql).execute(&mut conn).await?; - if base_type.is_normalized() { + if base_info.mbt_type.is_normalized() { debug!("Removing unused tiles from the images table (normalized schema)"); let sql = "DELETE FROM images WHERE tile_id NOT IN (SELECT tile_id FROM map)"; query(sql).execute(&mut conn).await?; @@ -53,7 +81,7 @@ pub async fn apply_patch(base_file: PathBuf, patch_file: PathBuf) -> MbtResult<( SELECT IIF(name = '{AGG_TILES_HASH_AFTER_APPLY}', '{AGG_TILES_HASH}', name) as name, value FROM patchDb.metadata - WHERE name NOTNULL AND name != '{AGG_TILES_HASH}';" + WHERE name NOTNULL AND name NOT IN ('{AGG_TILES_HASH}', '{AGG_TILES_HASH_BEFORE_APPLY}');" ); query(&sql).execute(&mut conn).await?; @@ -151,7 +179,7 @@ mod tests { // Apply patch to the src data in in-memory DB let patch_file = PathBuf::from("../tests/fixtures/mbtiles/world_cities_diff.mbtiles"); - apply_patch(src, patch_file).await?; + apply_patch(src, patch_file, true).await?; // Verify the data is the same as the file the patch was generated from Mbtiles::new("../tests/fixtures/mbtiles/world_cities_modified.mbtiles")? @@ -183,7 +211,7 @@ mod tests { // Apply patch to the src data in in-memory DB let patch_file = PathBuf::from("../tests/fixtures/mbtiles/geography-class-jpg-diff.mbtiles"); - apply_patch(src, patch_file).await?; + apply_patch(src, patch_file, true).await?; // Verify the data is the same as the file the patch was generated from Mbtiles::new("../tests/fixtures/mbtiles/geography-class-jpg-modified.mbtiles")? diff --git a/mbtiles/src/validation.rs b/mbtiles/src/validation.rs index 1ac919f2..8db5cac1 100644 --- a/mbtiles/src/validation.rs +++ b/mbtiles/src/validation.rs @@ -7,10 +7,11 @@ use martin_tile_utils::{Format, TileInfo, MAX_ZOOM}; use serde::Serialize; use serde_json::Value; use sqlx::sqlite::SqliteRow; -use sqlx::{query, Row, SqliteExecutor}; +use sqlx::{query, Row, SqliteConnection, SqliteExecutor}; use tilejson::TileJSON; use crate::errors::{MbtError, MbtResult}; +use crate::mbtiles::PatchFileInfo; use crate::queries::{ has_tiles_with_hash, is_flat_tables_type, is_flat_with_hash_tables_type, is_normalized_tables_type, @@ -27,6 +28,9 @@ pub const AGG_TILES_HASH: &str = "agg_tiles_hash"; /// Metadata key for a diff file, describing the eventual [`AGG_TILES_HASH`] value of the resulting tileset once the diff is applied pub const AGG_TILES_HASH_AFTER_APPLY: &str = "agg_tiles_hash_after_apply"; +/// Metadata key for a diff file, describing the expected [`AGG_TILES_HASH`] value of the tileset to which the diff will be applied. +pub const AGG_TILES_HASH_BEFORE_APPLY: &str = "agg_tiles_hash_before_apply"; + #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, EnumDisplay, Serialize)] #[enum_display(case = "Kebab")] pub enum MbtType { @@ -71,7 +75,7 @@ pub enum AggHashType { } impl Mbtiles { - pub async fn validate( + pub async fn open_and_validate( &self, check_type: IntegrityCheckType, agg_hash: AggHashType, @@ -81,12 +85,24 @@ impl Mbtiles { } else { self.open_readonly().await? }; - self.check_integrity(&mut conn, check_type).await?; - self.check_tiles_type_validity(&mut conn).await?; - self.check_each_tile_hash(&mut conn).await?; + self.validate(&mut conn, check_type, agg_hash).await + } + + pub async fn validate( + &self, + conn: &mut T, + check_type: IntegrityCheckType, + agg_hash: AggHashType, + ) -> MbtResult + where + for<'e> &'e mut T: SqliteExecutor<'e>, + { + self.check_integrity(&mut *conn, check_type).await?; + self.check_tiles_type_validity(&mut *conn).await?; + self.check_each_tile_hash(&mut *conn).await?; match agg_hash { - AggHashType::Verify => self.check_agg_tiles_hashes(&mut conn).await, - AggHashType::Update => self.update_agg_tiles_hash(&mut conn).await, + AggHashType::Verify => self.check_agg_tiles_hashes(conn).await, + AggHashType::Update => self.update_agg_tiles_hash(conn).await, AggHashType::Off => Ok(String::new()), } } @@ -453,6 +469,69 @@ LIMIT 1;" info!("All tile hashes are valid for {self}"); Ok(()) } + + pub async fn examine_diff(&self, conn: &mut SqliteConnection) -> MbtResult { + let info = PatchFileInfo { + mbt_type: self.detect_type(&mut *conn).await?, + agg_tiles_hash: self.get_agg_tiles_hash(&mut *conn).await?, + agg_tiles_hash_before_apply: self + .get_metadata_value(&mut *conn, AGG_TILES_HASH_BEFORE_APPLY) + .await?, + agg_tiles_hash_after_apply: self + .get_metadata_value(&mut *conn, AGG_TILES_HASH_AFTER_APPLY) + .await?, + }; + + Ok(info) + } + + pub fn assert_hashes(&self, info: &PatchFileInfo, force: bool) -> MbtResult<()> { + if info.agg_tiles_hash.is_none() { + if !force { + return Err(MbtError::CannotDiffFileWithoutHash( + self.filepath().to_string(), + )); + } + warn!("File {self} has no {AGG_TILES_HASH} metadata field, probably because it was created by an older version of the `mbtiles` tool. Use this command to update the value:\nmbtiles validate --agg-hash update {self}"); + } else if info.agg_tiles_hash_before_apply.is_some() + || info.agg_tiles_hash_after_apply.is_some() + { + if !force { + return Err(MbtError::DiffingDiffFile(self.filepath().to_string())); + } + warn!("File {self} has {AGG_TILES_HASH_BEFORE_APPLY} or {AGG_TILES_HASH_AFTER_APPLY} metadata field, indicating it is a patch file which should not be diffed with another file."); + } + Ok(()) + } + + pub fn validate_diff_info(&self, info: &PatchFileInfo, force: bool) -> MbtResult<()> { + match ( + &info.agg_tiles_hash_before_apply, + &info.agg_tiles_hash_after_apply, + ) { + (Some(before), Some(after)) => { + info!( + "The patch file {self} expects to be applied to a tileset with {AGG_TILES_HASH}={before}, and should result in hash {after} after applying", + ); + } + (None, Some(_)) => { + if !force { + return Err(MbtError::PatchFileHasNoBeforeHash( + self.filepath().to_string(), + )); + } + warn!( + "The patch file {self} has no {AGG_TILES_HASH_BEFORE_APPLY} metadata field, probably because it was created by an older version of the `mbtiles` tool."); + } + _ => { + if !force { + return Err(MbtError::PatchFileHasNoHashes(self.filepath().to_string())); + } + warn!("The patch file {self} has no {AGG_TILES_HASH_AFTER_APPLY} metadata field, probably because it was not properly created by the `mbtiles` tool."); + } + } + Ok(()) + } } /// Compute the hash of the combined tiles in the mbtiles file tiles table/view. diff --git a/mbtiles/tests/copy.rs b/mbtiles/tests/copy.rs index e0525cd3..5d1347b9 100644 --- a/mbtiles/tests/copy.rs +++ b/mbtiles/tests/copy.rs @@ -242,7 +242,7 @@ fn databases() -> Databases { copy!(result.path("empty_no_hash", mbt_typ), path(&empty_mbt)); let dmp = dump(&mut empty_cn).await.unwrap(); assert_dump!(&dmp, "{typ}__empty"); - let hash = empty_mbt.validate(Off, Verify).await.unwrap(); + let hash = empty_mbt.open_and_validate(Off, Verify).await.unwrap(); allow_duplicates! { assert_snapshot!(hash, @"D41D8CD98F00B204E9800998ECF8427E"); } @@ -265,7 +265,7 @@ fn databases() -> Databases { copy!(result.path("v1_no_hash", mbt_typ), path(&v1_mbt)); let dmp = dump(&mut v1_cn).await.unwrap(); assert_dump!(&dmp, "{typ}__v1"); - let hash = v1_mbt.validate(Off, Verify).await.unwrap(); + let hash = v1_mbt.open_and_validate(Off, Verify).await.unwrap(); allow_duplicates! { assert_snapshot!(hash, @"9ED9178D7025276336C783C2B54D6258"); } @@ -276,7 +276,7 @@ fn databases() -> Databases { new_file!(databases, mbt_typ, METADATA_V2, TILES_V2, "{typ}__v2"); let dmp = dump(&mut v2_cn).await.unwrap(); assert_dump!(&dmp, "{typ}__v2"); - let hash = v2_mbt.validate(Off, Verify).await.unwrap(); + let hash = v2_mbt.open_and_validate(Off, Verify).await.unwrap(); allow_duplicates! { assert_snapshot!(hash, @"3BCDEE3F52407FF1315629298CB99133"); } @@ -291,7 +291,7 @@ fn databases() -> Databases { }; let dmp = dump(&mut dif_cn).await.unwrap(); assert_dump!(&dmp, "{typ}__dif"); - let hash = dif_mbt.validate(Off, Verify).await.unwrap(); + let hash = dif_mbt.open_and_validate(Off, Verify).await.unwrap(); allow_duplicates! { assert_snapshot!(hash, @"B86122579EDCDD4C51F3910894FCC1A1"); } @@ -300,7 +300,7 @@ fn databases() -> Databases { // ----------------- v1_clone ----------------- let (v1_clone_mbt, v1_clone_cn) = open!(databases, "{typ}__v1-clone"); let dmp = copy_dump!(result.path("v1", mbt_typ), path(&v1_clone_mbt)); - let hash = v1_clone_mbt.validate(Off, Verify).await.unwrap(); + let hash = v1_clone_mbt.open_and_validate(Off, Verify).await.unwrap(); allow_duplicates! { assert_snapshot!(hash, @"9ED9178D7025276336C783C2B54D6258"); } @@ -322,7 +322,7 @@ fn databases() -> Databases { }; let dmp = dump(&mut dif_empty_cn).await.unwrap(); assert_dump!(&dmp, "{typ}__dif_empty"); - let hash = dif_empty_mbt.validate(Off, Verify).await.unwrap(); + let hash = dif_empty_mbt.open_and_validate(Off, Verify).await.unwrap(); allow_duplicates! { assert_snapshot!(hash, @"D41D8CD98F00B204E9800998ECF8427E"); } @@ -483,8 +483,8 @@ async fn diff_and_patch( eprintln!("TEST: Applying the difference ({b_db} - {a_db} = {dif_db}) to {a_db}, should get {b_db}"); let (clone_mbt, mut clone_cn) = open!(diff_and_patch, "{prefix}__1"); copy!(databases.path(a_db, *dst_type), path(&clone_mbt)); - apply_patch(path(&clone_mbt), path(&dif_mbt)).await?; - let hash = clone_mbt.validate(Off, Verify).await?; + apply_patch(path(&clone_mbt), path(&dif_mbt), false).await?; + let hash = clone_mbt.open_and_validate(Off, Verify).await?; assert_eq!(hash, databases.hash(b_db, *dst_type)); let dmp = dump(&mut clone_cn).await?; pretty_assert_eq!(&dmp, expected_b); @@ -492,8 +492,8 @@ async fn diff_and_patch( eprintln!("TEST: Applying the difference ({b_db} - {a_db} = {dif_db}) to {b_db}, should not modify it"); let (clone_mbt, mut clone_cn) = open!(diff_and_patch, "{prefix}__2"); copy!(databases.path(b_db, *dst_type), path(&clone_mbt)); - apply_patch(path(&clone_mbt), path(&dif_mbt)).await?; - let hash = clone_mbt.validate(Off, Verify).await?; + apply_patch(path(&clone_mbt), path(&dif_mbt), true).await?; + let hash = clone_mbt.open_and_validate(Off, Verify).await?; assert_eq!(hash, databases.hash(b_db, *dst_type)); let dmp = dump(&mut clone_cn).await?; pretty_assert_eq!(&dmp, expected_b); @@ -522,10 +522,9 @@ async fn patch_on_copy( apply_patch => Some(databases.path("dif", dif_type)), dst_type_cli => v2_type, }; - pretty_assert_eq!( - &dump(&mut v2_cn).await?, - databases.dump("v2", v2_type.unwrap_or(v1_type)) - ); + let actual = dump(&mut v2_cn).await?; + let expected = databases.dump("v2", v2_type.unwrap_or(v1_type)); + pretty_assert_eq!(&actual, expected); Ok(()) } diff --git a/mbtiles/tests/snapshots/copy__databases@flat__dif.snap b/mbtiles/tests/snapshots/copy__databases@flat__dif.snap index 36193a4f..31778798 100644 --- a/mbtiles/tests/snapshots/copy__databases@flat__dif.snap +++ b/mbtiles/tests/snapshots/copy__databases@flat__dif.snap @@ -12,6 +12,7 @@ CREATE TABLE metadata ( values = [ '( "agg_tiles_hash", "B86122579EDCDD4C51F3910894FCC1A1" )', '( "agg_tiles_hash_after_apply", "3BCDEE3F52407FF1315629298CB99133" )', + '( "agg_tiles_hash_before_apply", "9ED9178D7025276336C783C2B54D6258" )', '( "md-edit", "value - v2" )', '( "md-new", "value - new" )', '( "md-remove", NULL )', diff --git a/mbtiles/tests/snapshots/copy__databases@flat__dif_empty.snap b/mbtiles/tests/snapshots/copy__databases@flat__dif_empty.snap index b8830949..e2b5de43 100644 --- a/mbtiles/tests/snapshots/copy__databases@flat__dif_empty.snap +++ b/mbtiles/tests/snapshots/copy__databases@flat__dif_empty.snap @@ -12,6 +12,7 @@ CREATE TABLE metadata ( values = [ '( "agg_tiles_hash", "D41D8CD98F00B204E9800998ECF8427E" )', '( "agg_tiles_hash_after_apply", "9ED9178D7025276336C783C2B54D6258" )', + '( "agg_tiles_hash_before_apply", "9ED9178D7025276336C783C2B54D6258" )', ] [[]] diff --git a/mbtiles/tests/snapshots/copy__databases@hash__dif.snap b/mbtiles/tests/snapshots/copy__databases@hash__dif.snap index ae6e8941..d9feedca 100644 --- a/mbtiles/tests/snapshots/copy__databases@hash__dif.snap +++ b/mbtiles/tests/snapshots/copy__databases@hash__dif.snap @@ -12,6 +12,7 @@ CREATE TABLE metadata ( values = [ '( "agg_tiles_hash", "B86122579EDCDD4C51F3910894FCC1A1" )', '( "agg_tiles_hash_after_apply", "3BCDEE3F52407FF1315629298CB99133" )', + '( "agg_tiles_hash_before_apply", "9ED9178D7025276336C783C2B54D6258" )', '( "md-edit", "value - v2" )', '( "md-new", "value - new" )', '( "md-remove", NULL )', diff --git a/mbtiles/tests/snapshots/copy__databases@hash__dif_empty.snap b/mbtiles/tests/snapshots/copy__databases@hash__dif_empty.snap index 7dc8868c..aedfc881 100644 --- a/mbtiles/tests/snapshots/copy__databases@hash__dif_empty.snap +++ b/mbtiles/tests/snapshots/copy__databases@hash__dif_empty.snap @@ -12,6 +12,7 @@ CREATE TABLE metadata ( values = [ '( "agg_tiles_hash", "D41D8CD98F00B204E9800998ECF8427E" )', '( "agg_tiles_hash_after_apply", "9ED9178D7025276336C783C2B54D6258" )', + '( "agg_tiles_hash_before_apply", "9ED9178D7025276336C783C2B54D6258" )', ] [[]] diff --git a/mbtiles/tests/snapshots/copy__databases@norm__dif.snap b/mbtiles/tests/snapshots/copy__databases@norm__dif.snap index 3dda5192..0231bb4f 100644 --- a/mbtiles/tests/snapshots/copy__databases@norm__dif.snap +++ b/mbtiles/tests/snapshots/copy__databases@norm__dif.snap @@ -50,6 +50,7 @@ CREATE TABLE metadata ( values = [ '( "agg_tiles_hash", "B86122579EDCDD4C51F3910894FCC1A1" )', '( "agg_tiles_hash_after_apply", "3BCDEE3F52407FF1315629298CB99133" )', + '( "agg_tiles_hash_before_apply", "9ED9178D7025276336C783C2B54D6258" )', '( "md-edit", "value - v2" )', '( "md-new", "value - new" )', '( "md-remove", NULL )', diff --git a/mbtiles/tests/snapshots/copy__databases@norm__dif_empty.snap b/mbtiles/tests/snapshots/copy__databases@norm__dif_empty.snap index d8488420..e45a492f 100644 --- a/mbtiles/tests/snapshots/copy__databases@norm__dif_empty.snap +++ b/mbtiles/tests/snapshots/copy__databases@norm__dif_empty.snap @@ -33,6 +33,7 @@ CREATE TABLE metadata ( values = [ '( "agg_tiles_hash", "D41D8CD98F00B204E9800998ECF8427E" )', '( "agg_tiles_hash_after_apply", "9ED9178D7025276336C783C2B54D6258" )', + '( "agg_tiles_hash_before_apply", "9ED9178D7025276336C783C2B54D6258" )', ] [[]] diff --git a/tests/expected/mbtiles/meta-all.txt b/tests/expected/mbtiles/meta-all.txt index 2949d92f..42cf8fee 100644 --- a/tests/expected/mbtiles/meta-all.txt +++ b/tests/expected/mbtiles/meta-all.txt @@ -109,4 +109,5 @@ json: count: 68 geometry: Point layer: cities +agg_tiles_hash: 84792BF4EE9AEDDC5B1A60E707011FEE diff --git a/tests/fixtures/mbtiles/world_cities.mbtiles b/tests/fixtures/mbtiles/world_cities.mbtiles index d5fc059555366aaa27317ea8a171ec50701fdabc..92c23a9e95fcbb693466f4f3071e5a18b845526c 100644 GIT binary patch delta 168 zcmZo@U~Xt&o*>Q0Gf~Ewm4`vEXu-ym1^iOnT#Fd^FZ0jmpTytFU%?;FugEXR&&+j| zYthEW7A|8$9%fD7#Psy|lFXdc;`ofj;tUHDb4w#9HxpM^OGj517iUu^Lq{_MS91e% z14BbMSJ%x~rSwImxcLhh_zU>g@$ck+!9R+_Mp|We>B1KYf5T CpB~`= diff --git a/tests/fixtures/mbtiles/world_cities_modified.mbtiles b/tests/fixtures/mbtiles/world_cities_modified.mbtiles index e6d104bf730a9af5db838195f345bae9e6b817e2..85f88c640de135d281edd05e33649fbac8fd91d0 100644 GIT binary patch delta 193 zcmZo@U~Xt&o*>OAGEv5vRfIvWXu-ym1^iOnTwRR(m$|z5C-JxPSMZ1PEAk8SGjm<# zpTDs&hih|}R3r-%1HfFDZ*G&^A0)HWA2ARb3J-$- j53Ucn59kk?4}%ZH56}58Sg6uw@UkgFk(Mq0}l2