Add mbtiles statistics command (#986)

- [x] Add tile statistics to mbtiles tools
- [x] Add test
- [x] Use 4326 instead of 3857 for tile bounds
- [x] Add document
- [x] Use size-format to prettify output 
- [x]  Statistics  struct Refactor
- [x] Cleanup and reformat

Closes #964 

---------

Co-authored-by: Yuri Astrakhan <yuriastrakhan@gmail.com>
This commit is contained in:
Lucas 2023-11-13 14:03:40 +08:00 committed by GitHub
parent dec09bdabf
commit 0398336114
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 471 additions and 5 deletions

View File

@ -16,6 +16,7 @@ actix-http = "3"
actix-rt = "2"
actix-web = "4"
anyhow = "1.0"
approx = "0.5.1"
async-trait = "0.1"
bit-set = "0.5.3"
brotli = "3"
@ -52,6 +53,7 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_with = "3"
serde_yaml = "0.9"
size_format = "1.0.2"
spreet = { version = "0.9", default-features = false }
sqlite-hashes = { version = "0.5", default-features = false, features = ["md5", "window", "hex"] }
sqlx = { version = "0.7", features = ["sqlite", "runtime-tokio"] }

View File

@ -0,0 +1,19 @@
# Get a tile statistics from MBTiles file
For the concern of efficiency, you could figure out the page size and file size, tile size and bounds(represented as WGS 84 latitude and longitude values) of covered area with `mbtiles stats` command.
```shell
File: /path/to/world_cities.mbtiles
FileSize: 48.00KiB
Schema: flat
Page size: 4.00KiB
| Zoom | Count |Smallest | Largest | Average | BBox |
| 0| 1| 1.08KiB| 1.08KiB| 1.08KiB|-179.99999997494382,-85.05112877764508,180.00000015460688,85.05112879314403|
| 1| 4| 160B| 650B| 366B|-179.99999997494382,-85.05112877764508,180.00000015460688,85.05112879314403|
| 2| 7| 137B| 495B| 239B|-179.99999997494382,-66.51326042021836,180.00000015460688,66.51326049182072|
| 3| 17| 67B| 246B| 134B|-134.99999995874995,-40.9798980140281,180.00000015460688,66.51326049182072|
| 4| 38| 64B| 175B| 86B|-134.99999995874995,-40.9798980140281,180.00000015460688,66.51326049182072|
| 5| 57| 64B| 107B| 72B|-123.74999995470151,-40.9798980140281,180.00000015460688,61.60639642757953|
| 6| 72| 64B| 97B| 68B|-123.74999995470151,-40.9798980140281,180.00000015460688,61.60639642757953|
| all| 196| 64B| 1.0KiB| 96B|
```

View File

@ -26,4 +26,5 @@
- [MBTiles Copying / Diffing](52-mbtiles-copy.md)
- [MBTiles Validation](53-mbtiles-validation.md)
- [MBTiles Schemas](54-mbtiles-schema.md)
- [MBTiles statistics](55-mbtiles-stats.md)
- [Development](60-development.md)

View File

@ -0,0 +1,20 @@
{
"db_name": "SQLite",
"query": "PRAGMA page_size;",
"describe": {
"columns": [
{
"name": "page_size",
"ordinal": 0,
"type_info": "Int"
}
],
"parameters": {
"Right": 0
},
"nullable": [
null
]
},
"hash": "208681caa7185b4014e7eda4120962954cdd3d913e8a786599da8a3f9799ed4b"
}

View File

@ -0,0 +1,68 @@
{
"db_name": "SQLite",
"query": "\n SELECT zoom_level AS zoom,\n count() AS count,\n min(length(tile_data)) AS smallest,\n max(length(tile_data)) AS largest,\n avg(length(tile_data)) AS average,\n min(tile_column) AS min_tile_x,\n min(tile_row) AS min_tile_y,\n max(tile_column) AS max_tile_x,\n max(tile_row) AS max_tile_y\n FROM tiles\n GROUP BY zoom_level",
"describe": {
"columns": [
{
"name": "zoom",
"ordinal": 0,
"type_info": "Int64"
},
{
"name": "count",
"ordinal": 1,
"type_info": "Int"
},
{
"name": "smallest",
"ordinal": 2,
"type_info": "Int"
},
{
"name": "largest",
"ordinal": 3,
"type_info": "Int"
},
{
"name": "average",
"ordinal": 4,
"type_info": "Float"
},
{
"name": "min_tile_x",
"ordinal": 5,
"type_info": "Int"
},
{
"name": "min_tile_y",
"ordinal": 6,
"type_info": "Int"
},
{
"name": "max_tile_x",
"ordinal": 7,
"type_info": "Int"
},
{
"name": "max_tile_y",
"ordinal": 8,
"type_info": "Int"
}
],
"parameters": {
"Right": 0
},
"nullable": [
true,
false,
true,
true,
true,
true,
true,
true,
true
]
},
"hash": "41798c456136acb48ce59769a8abd1c6fb638f84d35457093b5dfbb3c8005433"
}

View File

@ -0,0 +1,20 @@
{
"db_name": "SQLite",
"query": "SELECT page_count * page_size as file_size FROM pragma_page_count(), pragma_page_size();",
"describe": {
"columns": [
{
"name": "file_size",
"ordinal": 0,
"type_info": "Int"
}
],
"parameters": {
"Right": 0
},
"nullable": [
null
]
},
"hash": "b771f1a10c396b9317db059015aa4c2714b57d964b06c0b06d4e44076773b37c"
}

View File

@ -19,9 +19,10 @@ enum-display.workspace = true
futures.workspace = true
log.workspace = true
martin-tile-utils.workspace = true
serde.workspace = true
serde_json.workspace = true
serde.workspace = true
serde_with.workspace = true
size_format.workspace = true
sqlite-hashes.workspace = true
sqlx.workspace = true
thiserror.workspace = true
@ -37,9 +38,10 @@ tokio = { workspace = true, features = ["rt-multi-thread"], optional = true }
[dev-dependencies]
# For testing, might as well use the same async framework as the Martin itself
actix-rt.workspace = true
approx.workspace = true
ctor.workspace = true
env_logger.workspace = true
insta = { workspace = true, features = ["toml"] }
insta = { workspace = true, features = ["toml", "yaml"] }
pretty_assertions.workspace = true
rstest.workspace = true

View File

@ -26,6 +26,9 @@ enum Commands {
/// MBTiles file to read from
file: PathBuf,
},
/// Gets tile statistics from MBTiels file
#[command(name = "stats")]
Stats { file: PathBuf },
/// Gets a single value from the MBTiles metadata table.
#[command(name = "meta-get")]
MetaGetValue {
@ -114,6 +117,13 @@ async fn main_int() -> anyhow::Result<()> {
let mbt = Mbtiles::new(file.as_path())?;
mbt.validate(integrity_check, update_agg_tiles_hash).await?;
}
Commands::Stats { file } => {
let mbt = Mbtiles::new(file.as_path())?;
let mut conn = mbt.open_readonly().await?;
let statistics = mbt.statistics(&mut conn).await?;
println!("{statistics}");
}
}
Ok(())

View File

@ -2,7 +2,7 @@
use std::collections::HashSet;
use std::ffi::OsStr;
use std::fmt::Display;
use std::fmt::{Display, Formatter};
use std::path::Path;
use std::str::FromStr;
@ -15,6 +15,7 @@ use martin_tile_utils::{Format, TileInfo};
use serde::ser::SerializeStruct;
use serde::{Serialize, Serializer};
use serde_json::{Value as JSONValue, Value};
use size_format::SizeFormatterBinary;
use sqlite_hashes::register_md5_function;
use sqlx::sqlite::{SqliteConnectOptions, SqliteRow};
use sqlx::{query, Connection as _, Row, SqliteConnection, SqliteExecutor};
@ -39,6 +40,85 @@ pub struct Metadata {
pub json: Option<JSONValue>,
}
#[derive(Clone, Debug, PartialEq, Serialize)]
pub struct ZoomStats {
pub zoom: u8,
pub count: u64,
pub smallest: u64,
pub largest: u64,
pub average: f64,
pub bbox: Bounds,
}
#[derive(Clone, Debug, PartialEq, Serialize)]
pub struct Statistics {
pub file_path: String,
pub file_size: u64,
pub mbt_type: MbtType,
pub page_size: u64,
pub zoom_stats_list: Vec<ZoomStats>,
pub count: u64,
pub smallest: Option<u64>,
pub largest: Option<u64>,
pub average: f64,
}
impl Display for Statistics {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "File: {}", self.file_path)?;
let file_size = SizeFormatterBinary::new(self.file_size);
writeln!(f, "FileSize: {file_size:.2}B")?;
writeln!(f, "Schema: {}", self.mbt_type)?;
let page_size = SizeFormatterBinary::new(self.page_size);
writeln!(f, "Page size: {page_size:.2}B")?;
writeln!(
f,
"|{:^9}|{:^9}|{:^9}|{:^9}|{:^9}|{:^9}|",
"Zoom", "Count", "Smallest", "Largest", "Average", "BBox"
)?;
for l in &self.zoom_stats_list {
let smallest = SizeFormatterBinary::new(l.smallest);
let largest = SizeFormatterBinary::new(l.largest);
let average = SizeFormatterBinary::new(l.average as u64);
writeln!(
f,
"|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}|",
l.zoom,
l.count,
format!("{smallest:.2}B"),
format!("{largest:.2}B"),
format!("{average:.2}B"),
l.bbox
)?;
}
if self.count != 0 {
if let (Some(smallest), Some(largest)) = (self.smallest, self.largest) {
let smallest = SizeFormatterBinary::new(smallest);
let largest = SizeFormatterBinary::new(largest);
let average = SizeFormatterBinary::new(self.average as u64);
writeln!(
f,
"|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}|",
"all",
self.count,
format!("{smallest}B"),
format!("{largest}B"),
format!("{average}B")
)?
}
}
Ok(())
}
}
#[allow(clippy::trivially_copy_pass_by_ref)]
fn serialize_ti<S: Serializer>(ti: &TileInfo, serializer: S) -> Result<S::Ok, S::Error> {
let mut s = serializer.serialize_struct("TileInfo", 2)?;
s.serialize_field("format", &ti.format.to_string())?;
@ -65,7 +145,7 @@ pub enum MbtTypeCli {
Normalized,
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, EnumDisplay)]
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, EnumDisplay, Serialize)]
#[enum_display(case = "Kebab")]
pub enum MbtType {
Flat,
@ -100,7 +180,7 @@ pub struct Mbtiles {
}
impl Display for Mbtiles {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.filepath)
}
}
@ -216,7 +296,95 @@ impl Mbtiles {
self.check_agg_tiles_hashes(&mut conn).await
}
}
pub async fn statistics<T>(&self, conn: &mut T) -> MbtResult<Statistics>
where
for<'e> &'e mut T: SqliteExecutor<'e>,
{
let file_size = query!(
"SELECT page_count * page_size as file_size FROM pragma_page_count(), pragma_page_size();"
).fetch_one(&mut *conn)
.await?
.file_size
.expect("The file size of the MBTiles file shouldn't be None") as u64;
let page_size = query!("PRAGMA page_size;")
.fetch_one(&mut *conn)
.await?
.page_size
.unwrap() as u64;
let tile_infos_query = query!(
"
SELECT zoom_level AS zoom,
count() AS count,
min(length(tile_data)) AS smallest,
max(length(tile_data)) AS largest,
avg(length(tile_data)) AS average,
min(tile_column) AS min_tile_x,
min(tile_row) AS min_tile_y,
max(tile_column) AS max_tile_x,
max(tile_row) AS max_tile_y
FROM tiles
GROUP BY zoom_level"
);
let mbt_type = self.detect_type(&mut *conn).await?;
let level_rows = tile_infos_query.fetch_all(&mut *conn).await?;
let zoom_stats_list: Vec<ZoomStats> = level_rows
.into_iter()
.map(|r| {
let zoom = r.zoom.unwrap() as u8;
let count = r.count as u64;
let tile_length = 40075016.7 / (2_u32.pow(zoom as u32)) as f64;
let smallest = r.smallest.unwrap_or(0) as u64;
let largest = r.largest.unwrap_or(0) as u64;
let average = r.average.unwrap_or(0.0);
let min_tile_x = r.min_tile_x.unwrap();
let min_tile_y = r.min_tile_y.unwrap();
let max_tile_x = r.max_tile_x.unwrap();
let max_tile_y = r.max_tile_y.unwrap();
let (minx, miny) = webmercator_to_wgs84(
-20037508.34 + min_tile_x as f64 * tile_length,
-20037508.34 + min_tile_y as f64 * tile_length,
);
let (maxx, maxy) = webmercator_to_wgs84(
-20037508.34 + (max_tile_x as f64 + 1.0) * tile_length,
-20037508.34 + (max_tile_y as f64 + 1.0) * tile_length,
);
let bbox = Bounds::new(minx, miny, maxx, maxy);
ZoomStats {
zoom,
count,
smallest,
largest,
average,
bbox,
}
})
.collect();
let count = zoom_stats_list.iter().map(|l| l.count).sum();
let smallest = zoom_stats_list.iter().map(|l| l.smallest).reduce(u64::min);
let largest = zoom_stats_list.iter().map(|l| l.largest).reduce(u64::max);
let average = zoom_stats_list
.iter()
.map(|l| l.average * l.count as f64)
.sum::<f64>()
/ count as f64;
Ok(Statistics {
file_path: self.filepath.clone(),
file_size,
mbt_type,
page_size,
zoom_stats_list,
count,
smallest,
largest,
average,
})
}
/// Get the aggregate tiles hash value from the metadata table
pub async fn get_agg_tiles_hash<T>(&self, conn: &mut T) -> MbtResult<Option<String>>
where
@ -683,15 +851,25 @@ pub async fn attach_hash_fn(conn: &mut SqliteConnection) -> MbtResult<()> {
Ok(())
}
fn webmercator_to_wgs84(x: f64, y: f64) -> (f64, f64) {
let lng = (x / 6378137.0).to_degrees();
let lat = (f64::atan(f64::sinh(y / 6378137.0))).to_degrees();
(lng, lat)
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use insta::assert_yaml_snapshot;
use martin_tile_utils::Encoding;
use sqlx::Executor as _;
use tilejson::VectorLayer;
use crate::create_flat_tables;
use super::*;
use approx::assert_relative_eq;
async fn open(filepath: &str) -> MbtResult<(SqliteConnection, Mbtiles)> {
let mbt = Mbtiles::new(filepath)?;
@ -842,4 +1020,134 @@ mod tests {
assert!(matches!(result, Err(MbtError::AggHashMismatch(..))));
Ok(())
}
#[actix_rt::test]
async fn stats_empty_file() -> MbtResult<()> {
let (mut conn, mbt) = open("file:mbtiles_empty_stats?mode=memory&cache=shared").await?;
create_flat_tables(&mut conn).await.unwrap();
let res = mbt.statistics(&mut conn).await?;
assert_yaml_snapshot!(res, @r###"
---
file_path: "file:mbtiles_empty_stats?mode=memory&cache=shared"
file_size: 20480
mbt_type: Flat
page_size: 4096
zoom_stats_list: []
count: 0
smallest: ~
largest: ~
average: NaN
"###);
Ok(())
}
#[actix_rt::test]
async fn meter_to_lnglat() {
let (lng, lat) = webmercator_to_wgs84(-20037508.34, -20037508.34);
assert_relative_eq!(lng, -179.99999997494382, epsilon = f64::EPSILON);
assert_relative_eq!(lat, -85.05112877764508, epsilon = f64::EPSILON);
let (lng, lat) = webmercator_to_wgs84(20037508.34, 20037508.34);
assert_relative_eq!(lng, 179.99999997494382, epsilon = f64::EPSILON);
assert_relative_eq!(lat, 85.05112877764508, epsilon = f64::EPSILON);
let (lng, lat) = webmercator_to_wgs84(0.0, 0.0);
assert_relative_eq!(lng, 0.0, epsilon = f64::EPSILON);
assert_relative_eq!(lat, 0.0, epsilon = f64::EPSILON);
let (lng, lat) = webmercator_to_wgs84(3000.0, 9000.0);
assert_relative_eq!(lng, 0.026949458523585643, epsilon = f64::EPSILON);
assert_relative_eq!(lat, 0.08084834874097371, epsilon = f64::EPSILON);
}
#[actix_rt::test]
async fn stat() -> MbtResult<()> {
let (mut conn, mbt) = open("../tests/fixtures/mbtiles/world_cities.mbtiles").await?;
let res = mbt.statistics(&mut conn).await?;
assert_yaml_snapshot!(res, @r###"
---
file_path: "../tests/fixtures/mbtiles/world_cities.mbtiles"
file_size: 49152
mbt_type: Flat
page_size: 4096
zoom_stats_list:
- zoom: 0
count: 1
smallest: 1107
largest: 1107
average: 1107
bbox:
- -179.99999997494382
- -85.05112877764508
- 180.00000015460688
- 85.05112879314403
- zoom: 1
count: 4
smallest: 160
largest: 650
average: 366.5
bbox:
- -179.99999997494382
- -85.05112877764508
- 180.00000015460688
- 85.05112879314403
- zoom: 2
count: 7
smallest: 137
largest: 495
average: 239.57142857142858
bbox:
- -179.99999997494382
- -66.51326042021836
- 180.00000015460688
- 66.51326049182072
- zoom: 3
count: 17
smallest: 67
largest: 246
average: 134
bbox:
- -134.99999995874995
- -40.9798980140281
- 180.00000015460688
- 66.51326049182072
- zoom: 4
count: 38
smallest: 64
largest: 175
average: 86
bbox:
- -134.99999995874995
- -40.9798980140281
- 180.00000015460688
- 66.51326049182072
- zoom: 5
count: 57
smallest: 64
largest: 107
average: 72.7719298245614
bbox:
- -123.74999995470151
- -40.9798980140281
- 180.00000015460688
- 61.60639642757953
- zoom: 6
count: 72
smallest: 64
largest: 97
average: 68.29166666666667
bbox:
- -123.74999995470151
- -40.9798980140281
- 180.00000015460688
- 61.60639642757953
count: 196
smallest: 64
largest: 1107
average: 96.2295918367347
"###);
Ok(())
}
}

View File

@ -4,6 +4,7 @@ Usage: mbtiles <COMMAND>
Commands:
meta-all Prints all values in the metadata table in a free-style, unstable YAML format
stats Gets tile statistics from MBTiels file
meta-get Gets a single value from the MBTiles metadata table
meta-set Sets a single value in the MBTiles' file metadata table or deletes it if no value
copy Copy tiles from one mbtiles file to another

View File

@ -0,0 +1,14 @@
File: ./tests/fixtures/mbtiles/world_cities.mbtiles
FileSize: 48.00KiB
Schema: flat
Page size: 4.00KiB
| Zoom | Count |Smallest | Largest | Average | BBox |
| 0| 1| 1.08KiB| 1.08KiB| 1.08KiB|-179.99999997494382,-85.05112877764508,180.00000015460688,85.05112879314403|
| 1| 4| 160B| 650B| 366B|-179.99999997494382,-85.05112877764508,180.00000015460688,85.05112879314403|
| 2| 7| 137B| 495B| 239B|-179.99999997494382,-66.51326042021836,180.00000015460688,66.51326049182072|
| 3| 17| 67B| 246B| 134B|-134.99999995874995,-40.9798980140281,180.00000015460688,66.51326049182072|
| 4| 38| 64B| 175B| 86B|-134.99999995874995,-40.9798980140281,180.00000015460688,66.51326049182072|
| 5| 57| 64B| 107B| 72B|-123.74999995470151,-40.9798980140281,180.00000015460688,61.60639642757953|
| 6| 72| 64B| 97B| 68B|-123.74999995470151,-40.9798980140281,180.00000015460688,61.60639642757953|
| all| 196| 64B| 1.0KiB| 96B|

View File

@ -354,6 +354,7 @@ if [[ "$MBTILES_BIN" != "-" ]]; then
$MBTILES_BIN --help 2>&1 | tee "$TEST_OUT_DIR/help.txt"
$MBTILES_BIN meta-all --help 2>&1 | tee "$TEST_OUT_DIR/meta-all_help.txt"
$MBTILES_BIN meta-all ./tests/fixtures/mbtiles/world_cities.mbtiles 2>&1 | tee "$TEST_OUT_DIR/meta-all.txt"
$MBTILES_BIN stats ./tests/fixtures/mbtiles/world_cities.mbtiles 2>&1 | tee "$TEST_OUT_DIR/stats.txt"
$MBTILES_BIN meta-get --help 2>&1 | tee "$TEST_OUT_DIR/meta-get_help.txt"
$MBTILES_BIN meta-get ./tests/fixtures/mbtiles/world_cities.mbtiles name 2>&1 | tee "$TEST_OUT_DIR/meta-get_name.txt"
$MBTILES_BIN meta-get ./tests/fixtures/mbtiles/world_cities.mbtiles missing_value 2>&1 | tee "$TEST_OUT_DIR/meta-get_missing_value.txt"