Minor mbtiles summary improvements (#1004)

Partial fix of #1002 

* [x] Move all summary code from `mbtiles/src/mbtiles.rs` to
`mbtiles/src/summary.rs`
* [x] Move Metadata and Validation function to separate files
* [x] Remove `filename` String from the summary stats - not much point
because it is accessible from the mbtiles struct itself

---------

Co-authored-by: Yuri Astrakhan <YuriAstrakhan@gmail.com>
This commit is contained in:
Lucas 2023-11-17 07:36:40 +08:00 committed by GitHub
parent b56d019f88
commit e72f53d9ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 1127 additions and 1062 deletions

View File

@ -120,6 +120,7 @@ async fn main_int() -> anyhow::Result<()> {
Commands::Summary { file } => {
let mbt = Mbtiles::new(file.as_path())?;
let mut conn = mbt.open_readonly().await?;
println!("MBTiles file summary for {mbt}");
println!("{}", mbt.summary(&mut conn).await?);
}
}

View File

@ -10,13 +10,12 @@ use sqlite_hashes::rusqlite::params_from_iter;
use sqlx::{query, Executor as _, Row, SqliteConnection};
use crate::errors::MbtResult;
use crate::mbtiles::MbtType::{Flat, FlatWithHash, Normalized};
use crate::mbtiles::{MbtType, MbtTypeCli};
use crate::queries::{
create_flat_tables, create_flat_with_hash_tables, create_normalized_tables,
create_tiles_with_hash_view, detach_db, is_empty_database,
};
use crate::{MbtError, Mbtiles, AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF};
use crate::MbtType::{Flat, FlatWithHash, Normalized};
use crate::{MbtError, MbtType, MbtTypeCli, Mbtiles, AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF};
#[derive(PartialEq, Eq, Default, Debug, Clone, EnumDisplay)]
#[enum_display(case = "Kebab")]

View File

@ -1,26 +1,33 @@
#![doc = include_str!("../README.md")]
#![allow(clippy::missing_errors_doc)]
mod copier;
pub use copier::{CopyDuplicateMode, MbtilesCopier};
mod errors;
pub use errors::{MbtError, MbtResult};
mod mbtiles;
pub use mbtiles::{
calc_agg_tiles_hash, IntegrityCheckType, MbtType, MbtTypeCli, Mbtiles, Metadata,
AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF,
};
pub use mbtiles::{MbtTypeCli, Mbtiles};
mod pool;
pub use pool::MbtilesPool;
mod copier;
pub use copier::{CopyDuplicateMode, MbtilesCopier};
mod metadata;
pub use metadata::Metadata;
mod patcher;
pub use patcher::apply_patch;
mod pool;
pub use pool::MbtilesPool;
mod queries;
pub use queries::{
create_flat_tables, create_flat_with_hash_tables, create_metadata_table,
create_normalized_tables, is_flat_with_hash_tables_type, is_normalized_tables_type,
};
mod summary;
mod validation;
pub use validation::{
calc_agg_tiles_hash, IntegrityCheckType, MbtType, AGG_TILES_HASH, AGG_TILES_HASH_IN_DIFF,
};

File diff suppressed because it is too large Load Diff

278
mbtiles/src/metadata.rs Normal file
View File

@ -0,0 +1,278 @@
use std::fmt::Display;
use std::str::FromStr;
use futures::TryStreamExt;
use log::{info, warn};
use martin_tile_utils::TileInfo;
use serde::ser::SerializeStruct;
use serde::{Serialize, Serializer};
use serde_json::{Value as JSONValue, Value};
use sqlx::{query, SqliteExecutor};
use tilejson::{tilejson, Bounds, Center, TileJSON};
use crate::errors::MbtResult;
use crate::Mbtiles;
#[derive(Clone, Debug, PartialEq, Serialize)]
pub struct Metadata {
pub id: String,
#[serde(serialize_with = "serialize_ti")]
pub tile_info: TileInfo,
pub layer_type: Option<String>,
pub tilejson: TileJSON,
pub json: Option<JSONValue>,
}
fn serialize_ti<S: Serializer>(ti: &TileInfo, serializer: S) -> Result<S::Ok, S::Error> {
let mut s = serializer.serialize_struct("TileInfo", 2)?;
s.serialize_field("format", &ti.format.to_string())?;
s.serialize_field(
"encoding",
ti.encoding.content_encoding().unwrap_or_default(),
)?;
s.end()
}
impl Mbtiles {
fn to_val<V, E: Display>(&self, val: Result<V, E>, title: &str) -> Option<V> {
match val {
Ok(v) => Some(v),
Err(err) => {
let name = &self.filename();
warn!("Unable to parse metadata {title} value in {name}: {err}");
None
}
}
}
/// Get a single metadata value from the metadata table
pub async fn get_metadata_value<T>(&self, conn: &mut T, key: &str) -> MbtResult<Option<String>>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let query = query!("SELECT value from metadata where name = ?", key);
let row = query.fetch_optional(conn).await?;
if let Some(row) = row {
if let Some(value) = row.value {
return Ok(Some(value));
}
}
Ok(None)
}
pub async fn set_metadata_value<T>(
&self,
conn: &mut T,
key: &str,
value: Option<&str>,
) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
if let Some(value) = value {
query!(
"INSERT OR REPLACE INTO metadata(name, value) VALUES(?, ?)",
key,
value
)
.execute(conn)
.await?;
} else {
query!("DELETE FROM metadata WHERE name=?", key)
.execute(conn)
.await?;
}
Ok(())
}
pub async fn get_metadata<T>(&self, conn: &mut T) -> MbtResult<Metadata>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let (tj, layer_type, json) = self.parse_metadata(conn).await?;
Ok(Metadata {
id: self.filename().to_string(),
tile_info: self.detect_format(&tj, conn).await?,
tilejson: tj,
layer_type,
json,
})
}
async fn parse_metadata<T>(
&self,
conn: &mut T,
) -> MbtResult<(TileJSON, Option<String>, Option<Value>)>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let query = query!("SELECT name, value FROM metadata WHERE value IS NOT ''");
let mut rows = query.fetch(conn);
let mut tj = tilejson! { tiles: vec![] };
let mut layer_type: Option<String> = None;
let mut json: Option<JSONValue> = None;
while let Some(row) = rows.try_next().await? {
if let (Some(name), Some(value)) = (row.name, row.value) {
match name.as_ref() {
"name" => tj.name = Some(value),
"version" => tj.version = Some(value),
"bounds" => tj.bounds = self.to_val(Bounds::from_str(value.as_str()), &name),
"center" => tj.center = self.to_val(Center::from_str(value.as_str()), &name),
"minzoom" => tj.minzoom = self.to_val(value.parse(), &name),
"maxzoom" => tj.maxzoom = self.to_val(value.parse(), &name),
"description" => tj.description = Some(value),
"attribution" => tj.attribution = Some(value),
"type" => layer_type = Some(value),
"legend" => tj.legend = Some(value),
"template" => tj.template = Some(value),
"json" => json = self.to_val(serde_json::from_str(&value), &name),
"format" | "generator" => {
tj.other.insert(name, Value::String(value));
}
_ => {
let file = &self.filename();
info!("{file} has an unrecognized metadata value {name}={value}");
tj.other.insert(name, Value::String(value));
}
}
}
}
if let Some(JSONValue::Object(obj)) = &mut json {
if let Some(value) = obj.remove("vector_layers") {
if let Ok(v) = serde_json::from_value(value) {
tj.vector_layers = Some(v);
} else {
warn!(
"Unable to parse metadata vector_layers value in {}",
self.filename()
);
}
}
}
Ok((tj, layer_type, json))
}
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use martin_tile_utils::{Encoding, Format};
use sqlx::Executor as _;
use tilejson::VectorLayer;
use super::*;
use crate::mbtiles::tests::open;
#[actix_rt::test]
async fn mbtiles_meta() -> MbtResult<()> {
let filepath = "../tests/fixtures/mbtiles/geography-class-jpg.mbtiles";
let mbt = Mbtiles::new(filepath)?;
assert_eq!(mbt.filepath(), filepath);
assert_eq!(mbt.filename(), "geography-class-jpg");
Ok(())
}
#[actix_rt::test]
async fn metadata_jpeg() -> MbtResult<()> {
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/geography-class-jpg.mbtiles").await?;
let metadata = mbt.get_metadata(&mut conn).await?;
let tj = metadata.tilejson;
assert_eq!(tj.description.unwrap(), "One of the example maps that comes with TileMill - a bright & colorful world map that blends retro and high-tech with its folded paper texture and interactive flag tooltips. ");
assert!(tj.legend.unwrap().starts_with("<div style="));
assert_eq!(tj.maxzoom.unwrap(), 1);
assert_eq!(tj.minzoom.unwrap(), 0);
assert_eq!(tj.name.unwrap(), "Geography Class");
assert_eq!(tj.template.unwrap(),"{{#__location__}}{{/__location__}}{{#__teaser__}}<div style=\"text-align:center;\">\n\n<img src=\"data:image/png;base64,{{flag_png}}\" style=\"-moz-box-shadow:0px 1px 3px #222;-webkit-box-shadow:0px 1px 5px #222;box-shadow:0px 1px 3px #222;\"><br>\n<strong>{{admin}}</strong>\n\n</div>{{/__teaser__}}{{#__full__}}{{/__full__}}");
assert_eq!(tj.version.unwrap(), "1.0.0");
assert_eq!(metadata.id, "geography-class-jpg");
assert_eq!(metadata.tile_info, Format::Jpeg.into());
Ok(())
}
#[actix_rt::test]
async fn metadata_mvt() -> MbtResult<()> {
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?;
let metadata = mbt.get_metadata(&mut conn).await?;
let tj = metadata.tilejson;
assert_eq!(tj.maxzoom.unwrap(), 6);
assert_eq!(tj.minzoom.unwrap(), 0);
assert_eq!(tj.name.unwrap(), "Major cities from Natural Earth data");
assert_eq!(tj.version.unwrap(), "2");
assert_eq!(
tj.vector_layers,
Some(vec![VectorLayer {
id: "cities".to_string(),
fields: vec![("name".to_string(), "String".to_string())]
.into_iter()
.collect(),
description: Some(String::new()),
minzoom: Some(0),
maxzoom: Some(6),
other: HashMap::default()
}])
);
assert_eq!(metadata.id, "world_cities");
assert_eq!(
metadata.tile_info,
TileInfo::new(Format::Mvt, Encoding::Gzip)
);
assert_eq!(metadata.layer_type, Some("overlay".to_string()));
Ok(())
}
#[actix_rt::test]
async fn metadata_get_key() -> MbtResult<()> {
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?;
let res = mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap();
assert_eq!(res, "-123.123590,-37.818085,174.763027,59.352706");
let res = mbt.get_metadata_value(&mut conn, "name").await?.unwrap();
assert_eq!(res, "Major cities from Natural Earth data");
let res = mbt.get_metadata_value(&mut conn, "maxzoom").await?.unwrap();
assert_eq!(res, "6");
let res = mbt.get_metadata_value(&mut conn, "nonexistent_key").await?;
assert_eq!(res, None);
let res = mbt.get_metadata_value(&mut conn, "").await?;
assert_eq!(res, None);
Ok(())
}
#[actix_rt::test]
async fn metadata_set_key() -> MbtResult<()> {
let (mut conn, mbt) = open("file:metadata_set_key_mem_db?mode=memory&cache=shared").await?;
conn.execute("CREATE TABLE metadata (name text NOT NULL PRIMARY KEY, value text);")
.await?;
mbt.set_metadata_value(&mut conn, "bounds", Some("0.0, 0.0, 0.0, 0.0"))
.await?;
assert_eq!(
mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(),
"0.0, 0.0, 0.0, 0.0"
);
mbt.set_metadata_value(
&mut conn,
"bounds",
Some("-123.123590,-37.818085,174.763027,59.352706"),
)
.await?;
assert_eq!(
mbt.get_metadata_value(&mut conn, "bounds").await?.unwrap(),
"-123.123590,-37.818085,174.763027,59.352706"
);
mbt.set_metadata_value(&mut conn, "bounds", None).await?;
assert_eq!(mbt.get_metadata_value(&mut conn, "bounds").await?, None);
Ok(())
}
}

366
mbtiles/src/summary.rs Normal file
View File

@ -0,0 +1,366 @@
use std::fmt::{Display, Formatter};
use std::path::PathBuf;
use std::str::FromStr;
use serde::Serialize;
use size_format::SizeFormatterBinary;
use sqlx::{query, SqliteExecutor};
use tilejson::Bounds;
use crate::{MbtResult, MbtType, Mbtiles};
#[derive(Clone, Debug, PartialEq, Serialize)]
pub struct ZoomInfo {
pub zoom: u8,
pub tile_count: u64,
pub min_tile_size: u64,
pub max_tile_size: u64,
pub avg_tile_size: f64,
pub bbox: Bounds,
}
#[derive(Clone, Debug, PartialEq, Serialize)]
pub struct Summary {
pub file_size: Option<u64>,
pub mbt_type: MbtType,
pub page_size: u64,
pub page_count: u64,
pub tile_count: u64,
pub min_tile_size: Option<u64>,
pub max_tile_size: Option<u64>,
pub avg_tile_size: f64,
pub bbox: Option<Bounds>,
pub min_zoom: Option<u8>,
pub max_zoom: Option<u8>,
pub zoom_info: Vec<ZoomInfo>,
}
impl Display for Summary {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "Schema: {}", self.mbt_type)?;
if let Some(file_size) = self.file_size {
let file_size = SizeFormatterBinary::new(file_size);
writeln!(f, "File size: {file_size:.2}B")?;
} else {
writeln!(f, "File size: unknown")?;
}
let page_size = SizeFormatterBinary::new(self.page_size);
writeln!(f, "Page size: {page_size:.2}B")?;
writeln!(f, "Page count: {:.2}", self.page_count)?;
writeln!(f)?;
writeln!(
f,
"|{:^9}|{:^9}|{:^9}|{:^9}|{:^9}| {:^20} |",
"Zoom", "Count", "Smallest", "Largest", "Average", "BBox"
)?;
for l in &self.zoom_info {
let min = SizeFormatterBinary::new(l.min_tile_size);
let max = SizeFormatterBinary::new(l.max_tile_size);
let avg = SizeFormatterBinary::new(l.avg_tile_size as u64);
let prec = get_zoom_precision(l.zoom);
writeln!(
f,
"|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |",
l.zoom,
l.tile_count,
format!("{min:.2}B"),
format!("{max:.2}B"),
format!("{avg:.2}B"),
format!("{:.prec$}", l.bbox),
)?;
}
if self.zoom_info.len() > 1 {
if let (Some(min), Some(max), Some(bbox), Some(max_zoom)) = (
self.min_tile_size,
self.max_tile_size,
self.bbox,
self.max_zoom,
) {
let min = SizeFormatterBinary::new(min);
let max = SizeFormatterBinary::new(max);
let avg = SizeFormatterBinary::new(self.avg_tile_size as u64);
let prec = get_zoom_precision(max_zoom);
writeln!(
f,
"|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |",
"all",
self.tile_count,
format!("{min}B"),
format!("{max}B"),
format!("{avg}B"),
format!("{:.prec$}", bbox),
)?;
}
}
Ok(())
}
}
impl Mbtiles {
/// Compute MBTiles file summary
pub async fn summary<T>(&self, conn: &mut T) -> MbtResult<Summary>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let mbt_type = self.detect_type(&mut *conn).await?;
let file_size = PathBuf::from_str(self.filepath())
.ok()
.and_then(|p| p.metadata().ok())
.map(|m| m.len());
let sql = query!("PRAGMA page_size;");
let page_size = sql.fetch_one(&mut *conn).await?.page_size.unwrap() as u64;
let sql = query!("PRAGMA page_count;");
let page_count = sql.fetch_one(&mut *conn).await?.page_count.unwrap() as u64;
let zoom_info = query!(
"
SELECT zoom_level AS zoom,
count() AS count,
min(length(tile_data)) AS smallest,
max(length(tile_data)) AS largest,
avg(length(tile_data)) AS average,
min(tile_column) AS min_tile_x,
min(tile_row) AS min_tile_y,
max(tile_column) AS max_tile_x,
max(tile_row) AS max_tile_y
FROM tiles
GROUP BY zoom_level"
)
.fetch_all(&mut *conn)
.await?;
let zoom_info: Vec<ZoomInfo> = zoom_info
.into_iter()
.map(|r| {
let zoom = u8::try_from(r.zoom.unwrap()).expect("zoom_level is not a u8");
ZoomInfo {
zoom,
tile_count: r.count as u64,
min_tile_size: r.smallest.unwrap_or(0) as u64,
max_tile_size: r.largest.unwrap_or(0) as u64,
avg_tile_size: r.average.unwrap_or(0.0),
bbox: xyz_to_bbox(
zoom,
r.min_tile_x.unwrap(),
r.min_tile_y.unwrap(),
r.max_tile_x.unwrap(),
r.max_tile_y.unwrap(),
),
}
})
.collect();
let tile_count = zoom_info.iter().map(|l| l.tile_count).sum();
let avg_sum = zoom_info
.iter()
.map(|l| l.avg_tile_size * l.tile_count as f64)
.sum::<f64>();
Ok(Summary {
file_size,
mbt_type,
page_size,
page_count,
tile_count,
min_tile_size: zoom_info.iter().map(|l| l.min_tile_size).reduce(u64::min),
max_tile_size: zoom_info.iter().map(|l| l.max_tile_size).reduce(u64::max),
avg_tile_size: avg_sum / tile_count as f64,
bbox: zoom_info.iter().map(|l| l.bbox).reduce(|a, b| a + b),
min_zoom: zoom_info.iter().map(|l| l.zoom).reduce(u8::min),
max_zoom: zoom_info.iter().map(|l| l.zoom).reduce(u8::max),
zoom_info,
})
}
}
/// Convert min/max XYZ tile coordinates to a bounding box
fn xyz_to_bbox(zoom: u8, min_x: i32, min_y: i32, max_x: i32, max_y: i32) -> Bounds {
let tile_size = 40075016.7 / (2_u32.pow(zoom as u32)) as f64;
let (min_lng, min_lat) = webmercator_to_wgs84(
-20037508.34 + min_x as f64 * tile_size,
-20037508.34 + min_y as f64 * tile_size,
);
let (max_lng, max_lat) = webmercator_to_wgs84(
-20037508.34 + (max_x as f64 + 1.0) * tile_size,
-20037508.34 + (max_y as f64 + 1.0) * tile_size,
);
Bounds::new(min_lng, min_lat, max_lng, max_lat)
}
fn get_zoom_precision(zoom: u8) -> usize {
let lng_delta = webmercator_to_wgs84(40075016.7 / (2_u32.pow(zoom as u32)) as f64, 0f64).0;
let log = lng_delta.log10() - 0.5;
if log > 0_f64 {
0
} else {
-log.ceil() as usize
}
}
fn webmercator_to_wgs84(x: f64, y: f64) -> (f64, f64) {
let lng = (x / 6378137.0).to_degrees();
let lat = (f64::atan(f64::sinh(y / 6378137.0))).to_degrees();
(lng, lat)
}
#[cfg(test)]
mod tests {
use approx::assert_relative_eq;
use insta::assert_yaml_snapshot;
use crate::summary::webmercator_to_wgs84;
use crate::{create_flat_tables, MbtResult, Mbtiles};
#[actix_rt::test]
async fn meter_to_lnglat() {
let (lng, lat) = webmercator_to_wgs84(-20037508.34, -20037508.34);
assert_relative_eq!(lng, -179.99999997494382, epsilon = f64::EPSILON);
assert_relative_eq!(lat, -85.05112877764508, epsilon = f64::EPSILON);
let (lng, lat) = webmercator_to_wgs84(20037508.34, 20037508.34);
assert_relative_eq!(lng, 179.99999997494382, epsilon = f64::EPSILON);
assert_relative_eq!(lat, 85.05112877764508, epsilon = f64::EPSILON);
let (lng, lat) = webmercator_to_wgs84(0.0, 0.0);
assert_relative_eq!(lng, 0.0, epsilon = f64::EPSILON);
assert_relative_eq!(lat, 0.0, epsilon = f64::EPSILON);
let (lng, lat) = webmercator_to_wgs84(3000.0, 9000.0);
assert_relative_eq!(lng, 0.026949458523585643, epsilon = f64::EPSILON);
assert_relative_eq!(lat, 0.08084834874097371, epsilon = f64::EPSILON);
}
#[actix_rt::test]
async fn summary_empty_file() -> MbtResult<()> {
let mbt = Mbtiles::new("file:mbtiles_empty_summary?mode=memory&cache=shared")?;
let mut conn = mbt.open().await?;
create_flat_tables(&mut conn).await.unwrap();
let res = mbt.summary(&mut conn).await?;
assert_yaml_snapshot!(res, @r###"
---
file_size: ~
mbt_type: Flat
page_size: 4096
page_count: 5
tile_count: 0
min_tile_size: ~
max_tile_size: ~
avg_tile_size: NaN
bbox: ~
min_zoom: ~
max_zoom: ~
zoom_info: []
"###);
Ok(())
}
#[actix_rt::test]
async fn summary() -> MbtResult<()> {
let mbt = Mbtiles::new("../tests/fixtures/mbtiles/world_cities.mbtiles")?;
let mut conn = mbt.open().await?;
let res = mbt.summary(&mut conn).await?;
assert_yaml_snapshot!(res, @r###"
---
file_size: 49152
mbt_type: Flat
page_size: 4096
page_count: 12
tile_count: 196
min_tile_size: 64
max_tile_size: 1107
avg_tile_size: 96.2295918367347
bbox:
- -179.99999997494382
- -85.05112877764508
- 180.00000015460688
- 85.05112879314403
min_zoom: 0
max_zoom: 6
zoom_info:
- zoom: 0
tile_count: 1
min_tile_size: 1107
max_tile_size: 1107
avg_tile_size: 1107
bbox:
- -179.99999997494382
- -85.05112877764508
- 180.00000015460688
- 85.05112879314403
- zoom: 1
tile_count: 4
min_tile_size: 160
max_tile_size: 650
avg_tile_size: 366.5
bbox:
- -179.99999997494382
- -85.05112877764508
- 180.00000015460688
- 85.05112879314403
- zoom: 2
tile_count: 7
min_tile_size: 137
max_tile_size: 495
avg_tile_size: 239.57142857142858
bbox:
- -179.99999997494382
- -66.51326042021836
- 180.00000015460688
- 66.51326049182072
- zoom: 3
tile_count: 17
min_tile_size: 67
max_tile_size: 246
avg_tile_size: 134
bbox:
- -134.99999995874995
- -40.9798980140281
- 180.00000015460688
- 66.51326049182072
- zoom: 4
tile_count: 38
min_tile_size: 64
max_tile_size: 175
avg_tile_size: 86
bbox:
- -134.99999995874995
- -40.9798980140281
- 180.00000015460688
- 66.51326049182072
- zoom: 5
tile_count: 57
min_tile_size: 64
max_tile_size: 107
avg_tile_size: 72.7719298245614
bbox:
- -123.74999995470151
- -40.9798980140281
- 180.00000015460688
- 61.60639642757953
- zoom: 6
tile_count: 72
min_tile_size: 64
max_tile_size: 97
avg_tile_size: 68.29166666666667
bbox:
- -123.74999995470151
- -40.9798980140281
- 180.00000015460688
- 61.60639642757953
"###);
Ok(())
}
}

456
mbtiles/src/validation.rs Normal file
View File

@ -0,0 +1,456 @@
use std::collections::HashSet;
#[cfg(feature = "cli")]
use clap::ValueEnum;
use enum_display::EnumDisplay;
use log::{debug, info, warn};
use martin_tile_utils::{Format, TileInfo};
use serde::Serialize;
use serde_json::Value;
use sqlx::sqlite::SqliteRow;
use sqlx::{query, Row, SqliteExecutor};
use tilejson::TileJSON;
use crate::errors::{MbtError, MbtResult};
use crate::queries::{
has_tiles_with_hash, is_flat_tables_type, is_flat_with_hash_tables_type,
is_normalized_tables_type,
};
use crate::MbtError::{
AggHashMismatch, AggHashValueNotFound, FailedIntegrityCheck, IncorrectTileHash,
};
use crate::Mbtiles;
/// Metadata key for the aggregate tiles hash value
pub const AGG_TILES_HASH: &str = "agg_tiles_hash";
/// Metadata key for a diff file,
/// describing the eventual [`AGG_TILES_HASH`] value once the diff is applied
pub const AGG_TILES_HASH_IN_DIFF: &str = "agg_tiles_hash_after_apply";
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, EnumDisplay, Serialize)]
#[enum_display(case = "Kebab")]
pub enum MbtType {
Flat,
FlatWithHash,
Normalized { hash_view: bool },
}
impl MbtType {
pub fn is_normalized(&self) -> bool {
matches!(self, Self::Normalized { .. })
}
pub fn is_normalized_with_view(&self) -> bool {
matches!(self, Self::Normalized { hash_view: true })
}
}
#[derive(PartialEq, Eq, Default, Debug, Clone, EnumDisplay)]
#[enum_display(case = "Kebab")]
#[cfg_attr(feature = "cli", derive(ValueEnum))]
pub enum IntegrityCheckType {
#[default]
Quick,
Full,
Off,
}
impl Mbtiles {
pub async fn validate(
&self,
check_type: IntegrityCheckType,
update_agg_tiles_hash: bool,
) -> MbtResult<String> {
let mut conn = if update_agg_tiles_hash {
self.open().await?
} else {
self.open_readonly().await?
};
self.check_integrity(&mut conn, check_type).await?;
self.check_each_tile_hash(&mut conn).await?;
if update_agg_tiles_hash {
self.update_agg_tiles_hash(&mut conn).await
} else {
self.check_agg_tiles_hashes(&mut conn).await
}
}
/// Get the aggregate tiles hash value from the metadata table
pub async fn get_agg_tiles_hash<T>(&self, conn: &mut T) -> MbtResult<Option<String>>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
self.get_metadata_value(&mut *conn, AGG_TILES_HASH).await
}
/// Detect tile format and verify that it is consistent across some tiles
pub async fn detect_format<T>(&self, tilejson: &TileJSON, conn: &mut T) -> MbtResult<TileInfo>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let mut tile_info = None;
let mut tested_zoom = -1_i64;
// First, pick any random tile
let query = query!("SELECT zoom_level, tile_column, tile_row, tile_data FROM tiles WHERE zoom_level >= 0 LIMIT 1");
let row = query.fetch_optional(&mut *conn).await?;
if let Some(r) = row {
tile_info = self.parse_tile(r.zoom_level, r.tile_column, r.tile_row, r.tile_data);
tested_zoom = r.zoom_level.unwrap_or(-1);
}
// Afterwards, iterate over tiles in all allowed zooms and check for consistency
for z in tilejson.minzoom.unwrap_or(0)..=tilejson.maxzoom.unwrap_or(18) {
if i64::from(z) == tested_zoom {
continue;
}
let query = query! {"SELECT tile_column, tile_row, tile_data FROM tiles WHERE zoom_level = ? LIMIT 1", z};
let row = query.fetch_optional(&mut *conn).await?;
if let Some(r) = row {
match (
tile_info,
self.parse_tile(Some(z.into()), r.tile_column, r.tile_row, r.tile_data),
) {
(_, None) => {}
(None, new) => tile_info = new,
(Some(old), Some(new)) if old == new => {}
(Some(old), Some(new)) => {
return Err(MbtError::InconsistentMetadata(old, new));
}
}
}
}
if let Some(Value::String(fmt)) = tilejson.other.get("format") {
let file = self.filename();
match (tile_info, Format::parse(fmt)) {
(_, None) => {
warn!("Unknown format value in metadata: {fmt}");
}
(None, Some(fmt)) => {
if fmt.is_detectable() {
warn!("Metadata table sets detectable '{fmt}' tile format, but it could not be verified for file {file}");
} else {
info!("Using '{fmt}' tile format from metadata table in file {file}");
}
tile_info = Some(fmt.into());
}
(Some(info), Some(fmt)) if info.format == fmt => {
debug!("Detected tile format {info} matches metadata.format '{fmt}' in file {file}");
}
(Some(info), _) => {
warn!("Found inconsistency: metadata.format='{fmt}', but tiles were detected as {info:?} in file {file}. Tiles will be returned as {info:?}.");
}
}
}
if let Some(info) = tile_info {
if info.format != Format::Mvt && tilejson.vector_layers.is_some() {
warn!(
"{} has vector_layers metadata but non-vector tiles",
self.filename()
);
}
Ok(info)
} else {
Err(MbtError::NoTilesFound)
}
}
fn parse_tile(
&self,
z: Option<i64>,
x: Option<i64>,
y: Option<i64>,
tile: Option<Vec<u8>>,
) -> Option<TileInfo> {
if let (Some(z), Some(x), Some(y), Some(tile)) = (z, x, y, tile) {
let info = TileInfo::detect(&tile);
if let Some(info) = info {
debug!(
"Tile {z}/{x}/{} is detected as {info} in file {}",
(1 << z) - 1 - y,
self.filename(),
);
}
info
} else {
None
}
}
pub async fn detect_type<T>(&self, conn: &mut T) -> MbtResult<MbtType>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
debug!("Detecting MBTiles type for {self}");
let typ = if is_normalized_tables_type(&mut *conn).await? {
MbtType::Normalized {
hash_view: has_tiles_with_hash(&mut *conn).await?,
}
} else if is_flat_with_hash_tables_type(&mut *conn).await? {
MbtType::FlatWithHash
} else if is_flat_tables_type(&mut *conn).await? {
MbtType::Flat
} else {
return Err(MbtError::InvalidDataFormat(self.filepath().to_string()));
};
self.check_for_uniqueness_constraint(&mut *conn, typ)
.await?;
Ok(typ)
}
async fn check_for_uniqueness_constraint<T>(
&self,
conn: &mut T,
mbt_type: MbtType,
) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let table_name = match mbt_type {
MbtType::Flat => "tiles",
MbtType::FlatWithHash => "tiles_with_hash",
MbtType::Normalized { .. } => "map",
};
let indexes = query("SELECT name FROM pragma_index_list(?) WHERE [unique] = 1")
.bind(table_name)
.fetch_all(&mut *conn)
.await?;
// Ensure there is some index on tiles that has a unique constraint on (zoom_level, tile_row, tile_column)
for index in indexes {
let mut unique_idx_cols = HashSet::new();
let rows = query("SELECT DISTINCT name FROM pragma_index_info(?)")
.bind(index.get::<String, _>("name"))
.fetch_all(&mut *conn)
.await?;
for row in rows {
unique_idx_cols.insert(row.get("name"));
}
if unique_idx_cols
.symmetric_difference(&HashSet::from([
"zoom_level".to_string(),
"tile_column".to_string(),
"tile_row".to_string(),
]))
.collect::<Vec<_>>()
.is_empty()
{
return Ok(());
}
}
Err(MbtError::NoUniquenessConstraint(
self.filepath().to_string(),
))
}
/// Perform `SQLite` internal integrity check
pub async fn check_integrity<T>(
&self,
conn: &mut T,
integrity_check: IntegrityCheckType,
) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
if integrity_check == IntegrityCheckType::Off {
info!("Skipping integrity check for {self}");
return Ok(());
}
let sql = if integrity_check == IntegrityCheckType::Full {
"PRAGMA integrity_check;"
} else {
"PRAGMA quick_check;"
};
let result: Vec<String> = query(sql)
.map(|row: SqliteRow| row.get(0))
.fetch_all(&mut *conn)
.await?;
if result.len() > 1
|| result.get(0).ok_or(FailedIntegrityCheck(
self.filepath().to_string(),
vec!["SQLite could not perform integrity check".to_string()],
))? != "ok"
{
return Err(FailedIntegrityCheck(self.filepath().to_string(), result));
}
info!("{integrity_check:?} integrity check passed for {self}");
Ok(())
}
pub async fn check_agg_tiles_hashes<T>(&self, conn: &mut T) -> MbtResult<String>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let Some(stored) = self.get_agg_tiles_hash(&mut *conn).await? else {
return Err(AggHashValueNotFound(self.filepath().to_string()));
};
let computed = calc_agg_tiles_hash(&mut *conn).await?;
if stored != computed {
let file = self.filepath().to_string();
return Err(AggHashMismatch(computed, stored, file));
}
info!("The agg_tiles_hashes={computed} has been verified for {self}");
Ok(computed)
}
/// Compute new aggregate tiles hash and save it to the metadata table (if needed)
pub async fn update_agg_tiles_hash<T>(&self, conn: &mut T) -> MbtResult<String>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let old_hash = self.get_agg_tiles_hash(&mut *conn).await?;
let hash = calc_agg_tiles_hash(&mut *conn).await?;
if old_hash.as_ref() == Some(&hash) {
info!("Metadata value agg_tiles_hash is already set to the correct hash `{hash}` in {self}");
} else {
if let Some(old_hash) = old_hash {
info!("Updating agg_tiles_hash from {old_hash} to {hash} in {self}");
} else {
info!("Adding a new metadata value agg_tiles_hash = {hash} in {self}");
}
self.set_metadata_value(&mut *conn, AGG_TILES_HASH, Some(&hash))
.await?;
}
Ok(hash)
}
pub async fn check_each_tile_hash<T>(&self, conn: &mut T) -> MbtResult<()>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
// Note that hex() always returns upper-case HEX values
let sql = match self.detect_type(&mut *conn).await? {
MbtType::Flat => {
info!("Skipping per-tile hash validation because this is a flat MBTiles file");
return Ok(());
}
MbtType::FlatWithHash => {
"SELECT expected, computed FROM (
SELECT
upper(tile_hash) AS expected,
md5_hex(tile_data) AS computed
FROM tiles_with_hash
) AS t
WHERE expected != computed
LIMIT 1;"
}
MbtType::Normalized { .. } => {
"SELECT expected, computed FROM (
SELECT
upper(tile_id) AS expected,
md5_hex(tile_data) AS computed
FROM images
) AS t
WHERE expected != computed
LIMIT 1;"
}
};
query(sql)
.fetch_optional(&mut *conn)
.await?
.map_or(Ok(()), |v| {
Err(IncorrectTileHash(
self.filepath().to_string(),
v.get(0),
v.get(1),
))
})?;
info!("All tile hashes are valid for {self}");
Ok(())
}
}
#[cfg(test)]
pub(crate) mod tests {
use super::*;
use crate::mbtiles::tests::open;
#[actix_rt::test]
async fn detect_type() -> MbtResult<()> {
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?;
let res = mbt.detect_type(&mut conn).await?;
assert_eq!(res, MbtType::Flat);
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/zoomed_world_cities.mbtiles").await?;
let res = mbt.detect_type(&mut conn).await?;
assert_eq!(res, MbtType::FlatWithHash);
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/geography-class-jpg.mbtiles").await?;
let res = mbt.detect_type(&mut conn).await?;
assert_eq!(res, MbtType::Normalized { hash_view: false });
let (mut conn, mbt) = open(":memory:").await?;
let res = mbt.detect_type(&mut conn).await;
assert!(matches!(res, Err(MbtError::InvalidDataFormat(_))));
Ok(())
}
#[actix_rt::test]
async fn validate_valid_file() -> MbtResult<()> {
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/zoomed_world_cities.mbtiles").await?;
mbt.check_integrity(&mut conn, IntegrityCheckType::Quick)
.await?;
Ok(())
}
#[actix_rt::test]
async fn validate_invalid_file() -> MbtResult<()> {
let (mut conn, mbt) =
open("../tests/fixtures/files/invalid_zoomed_world_cities.mbtiles").await?;
let result = mbt.check_agg_tiles_hashes(&mut conn).await;
assert!(matches!(result, Err(MbtError::AggHashMismatch(..))));
Ok(())
}
}
/// Compute the hash of the combined tiles in the mbtiles file tiles table/view.
/// This should work on all mbtiles files perf `MBTiles` specification.
pub async fn calc_agg_tiles_hash<T>(conn: &mut T) -> MbtResult<String>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
debug!("Calculating agg_tiles_hash");
let query = query(
// The md5_concat func will return NULL if there are no rows in the tiles table.
// For our use case, we will treat it as an empty string, and hash that.
// `tile_data` values must be stored as a blob per MBTiles spec
// `md5` functions will fail if the value is not text/blob/null
//
// Note that ORDER BY controls the output ordering, which is important for the hash value,
// and having it at the top level would not order values properly.
// See https://sqlite.org/forum/forumpost/228bb96e12a746ce
"
SELECT coalesce(
(SELECT md5_concat_hex(
cast(zoom_level AS text),
cast(tile_column AS text),
cast(tile_row AS text),
tile_data
)
OVER (ORDER BY zoom_level, tile_column, tile_row ROWS
BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
FROM tiles
LIMIT 1),
md5_hex('')
);
",
);
Ok(query.fetch_one(conn).await?.get::<String, _>(0))
}

View File

@ -1,4 +1,4 @@
File: ./tests/fixtures/mbtiles/world_cities.mbtiles
MBTiles file summary for ./tests/fixtures/mbtiles/world_cities.mbtiles
Schema: flat
File size: 48.00KiB
Page size: 4.00KiB