Add compression and query to martin-cp (#1019)

* Use gzip compression for MVT tiles by default
* Allow user to set compression with `--encoding` parameter (same as
browser's header, e.g. use `br,gzip' to encode with brotli as first
choice, unless already encoded as gzip)
* Allow user to pass a query to the Postgres functions with
`--url-query`
* A bit of a cleanup for `mbtiles summary` output
This commit is contained in:
Yuri Astrakhan 2023-11-22 00:26:53 -05:00 committed by GitHub
parent 0f2cd100cf
commit d6219a6526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 173 additions and 140 deletions

2
Cargo.lock generated
View File

@ -1905,7 +1905,7 @@ version = "0.1.5"
[[package]]
name = "mbtiles"
version = "0.8.0"
version = "0.8.1"
dependencies = [
"actix-rt",
"anyhow",

View File

@ -4,21 +4,21 @@
Use `mbtiles summary` to get a summary of the contents of an MBTiles file. The command will print a table with the number of tiles per zoom level, the size of the smallest and largest tiles, and the average size of tiles at each zoom level. The command will also print the bounding box of the covered area per zoom level.
```shell
File: tests/fixtures/mbtiles/world_cities.mbtiles
MBTiles file summary for tests/fixtures/mbtiles/world_cities.mbtiles
Schema: flat
File size: 48.00KiB
Page size: 4.00KiB
Page count: 12
| Zoom | Count |Smallest | Largest | Average | BBox |
| 0| 1| 1.08KiB| 1.08KiB| 1.08KiB| -180,-85,180,85 |
| 1| 4| 160B| 650B| 366B| -180,-85,180,85 |
| 2| 7| 137B| 495B| 239B| -180,-67,180,67 |
| 3| 17| 67B| 246B| 134B| -135,-41,180,67 |
| 4| 38| 64B| 175B| 86B| -135,-41,180,67 |
| 5| 57| 64B| 107B| 72B| -124,-41,180,62 |
| 6| 72| 64B| 97B| 68B| -124,-41,180,62 |
| all| 196| 64B| 1.0KiB| 96B| -180,-85,180,85 |
Zoom | Count | Smallest | Largest | Average | Bounding Box
0 | 1 | 1.0KiB | 1.0KiB | 1.0KiB | -180,-85,180,85
1 | 4 | 160B | 650B | 366B | -180,-85,180,85
2 | 7 | 137B | 495B | 239B | -180,-67,180,67
3 | 17 | 67B | 246B | 134B | -135,-41,180,67
4 | 38 | 64B | 175B | 86B | -135,-41,180,67
5 | 57 | 64B | 107B | 72B | -124,-41,180,62
6 | 72 | 64B | 97B | 68B | -124,-41,180,62
all | 196 | 64B | 1.0KiB | 96B | -180,-85,180,85
```
## meta-all

View File

@ -4,6 +4,9 @@ use std::path::PathBuf;
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::Duration;
use actix_http::error::ParseError;
use actix_http::test::TestRequest;
use actix_web::http::header::{AcceptEncoding, Header as _, ACCEPT_ENCODING};
use clap::Parser;
use futures::stream::{self, StreamExt};
use futures::TryStreamExt;
@ -11,9 +14,11 @@ use log::{debug, error, info, log_enabled};
use martin::args::{Args, ExtraArgs, MetaArgs, OsEnv, PgArgs, SrvArgs};
use martin::srv::{get_tile_content, merge_tilejson, RESERVED_KEYWORDS};
use martin::{
append_rect, read_config, Config, IdResolver, MartinError, MartinResult, ServerState,
append_rect, read_config, Config, IdResolver, MartinError, MartinResult, ServerState, Source,
TileCoord, TileData, TileRect,
};
use martin_tile_utils::TileInfo;
use mbtiles::sqlx::SqliteConnection;
use mbtiles::{
init_mbtiles_schema, is_empty_database, CopyDuplicateMode, MbtType, MbtTypeCli, Mbtiles,
};
@ -56,7 +61,15 @@ pub struct CopyArgs {
value_name = "SCHEMA",
value_enum
)]
pub dst_type: Option<MbtTypeCli>,
pub mbt_type: Option<MbtTypeCli>,
/// Optional query parameter (in URL query format) for the sources that support it (e.g. Postgres functions)
#[arg(long)]
pub url_query: Option<String>,
/// Optional accepted encoding parameter as if the browser sent it in the HTTP request.
/// May be multiple values separated by comma, e.g. `gzip,br`.
/// Use `identity` to disable compression.
#[arg(long, alias = "encodings", default_value = "gzip")]
pub encoding: String,
/// Specify the behaviour when generated tile already exists in the destination file.
#[arg(long, value_enum, default_value_t = CopyDuplicateMode::default())]
pub on_duplicate: CopyDuplicateMode,
@ -82,8 +95,8 @@ pub struct CopyArgs {
pub zoom_levels: Vec<u8>,
}
async fn start(copy_args: CopierArgs) -> MartinResult<()> {
info!("Starting Martin v{VERSION}");
async fn start(copy_args: CopierArgs) -> MartinCpResult<()> {
info!("Martin-CP tile copier v{VERSION}");
let env = OsEnv::default();
let save_config = copy_args.meta.save_config.clone();
@ -185,6 +198,20 @@ impl Progress {
}
}
type MartinCpResult<T> = Result<T, MartinCpError>;
#[derive(Debug, thiserror::Error)]
enum MartinCpError {
#[error(transparent)]
Martin(#[from] MartinError),
#[error("Unable to parse encodings argument: {0}")]
EncodingParse(#[from] ParseError),
#[error(transparent)]
Actix(#[from] actix_web::Error),
#[error(transparent)]
Mbt(#[from] mbtiles::MbtError),
}
impl Display for Progress {
#[allow(clippy::cast_precision_loss)]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
@ -225,7 +252,7 @@ fn iterate_tiles(tiles: Vec<TileRect>) -> impl Iterator<Item = TileCoord> {
})
}
async fn run_tile_copy(args: CopyArgs, state: ServerState) -> MartinResult<()> {
async fn run_tile_copy(args: CopyArgs, state: ServerState) -> MartinCpResult<()> {
let output_file = &args.output_file;
let concurrency = args.concurrency.unwrap_or(1);
let (sources, _use_url_query, info) = state.tiles.get_sources(args.source.as_str(), None)?;
@ -235,28 +262,13 @@ async fn run_tile_copy(args: CopyArgs, state: ServerState) -> MartinResult<()> {
let tiles = compute_tile_ranges(&args);
let mbt = Mbtiles::new(output_file)?;
let mut conn = mbt.open_or_new().await?;
let dst_type = if is_empty_database(&mut conn).await? {
let dst_type = match args.dst_type.unwrap_or(MbtTypeCli::Normalized) {
MbtTypeCli::Flat => MbtType::Flat,
MbtTypeCli::FlatWithHash => MbtType::FlatWithHash,
MbtTypeCli::Normalized => MbtType::Normalized { hash_view: true },
};
init_mbtiles_schema(&mut conn, dst_type).await?;
let mut tj = merge_tilejson(sources, String::new());
tj.other.insert(
"format".to_string(),
serde_json::Value::String(tile_info.format.to_string()),
);
tj.other.insert(
"generator".to_string(),
serde_json::Value::String(format!("martin-cp v{VERSION}")),
);
mbt.insert_metadata(&mut conn, &tj).await?;
dst_type
} else {
mbt.detect_type(&mut conn).await?
};
let mbt_type = init_schema(&mbt, &mut conn, sources, tile_info, args.mbt_type).await?;
let query = args.url_query.as_deref();
let req = TestRequest::default()
.insert_header((ACCEPT_ENCODING, args.encoding.as_str()))
.finish();
let accept_encoding = AcceptEncoding::parse(&req)?;
let encodings = Some(&accept_encoding);
let progress = Progress::new(&tiles);
info!(
@ -274,7 +286,7 @@ async fn run_tile_copy(args: CopyArgs, state: ServerState) -> MartinResult<()> {
.try_for_each_concurrent(concurrency, |xyz| {
let tx = tx.clone();
async move {
let tile = get_tile_content(sources, info, &xyz, None, None).await?;
let tile = get_tile_content(sources, info, &xyz, query, encodings).await?;
let data = tile.data;
tx.send(TileXyz { xyz, data })
.await
@ -295,7 +307,7 @@ async fn run_tile_copy(args: CopyArgs, state: ServerState) -> MartinResult<()> {
} else {
batch.push((tile.xyz.z, tile.xyz.x, tile.xyz.y, tile.data));
if batch.len() >= BATCH_SIZE || last_saved.elapsed() > SAVE_EVERY {
mbt.insert_tiles(&mut conn, dst_type, args.on_duplicate, &batch)
mbt.insert_tiles(&mut conn, mbt_type, args.on_duplicate, &batch)
.await?;
batch.clear();
last_saved = Instant::now();
@ -310,7 +322,7 @@ async fn run_tile_copy(args: CopyArgs, state: ServerState) -> MartinResult<()> {
}
}
if !batch.is_empty() {
mbt.insert_tiles(&mut conn, dst_type, args.on_duplicate, &batch)
mbt.insert_tiles(&mut conn, mbt_type, args.on_duplicate, &batch)
.await?;
}
Ok(())
@ -321,6 +333,36 @@ async fn run_tile_copy(args: CopyArgs, state: ServerState) -> MartinResult<()> {
Ok(())
}
async fn init_schema(
mbt: &Mbtiles,
conn: &mut SqliteConnection,
sources: &[&dyn Source],
tile_info: TileInfo,
mbt_type: Option<MbtTypeCli>,
) -> Result<MbtType, MartinError> {
Ok(if is_empty_database(&mut *conn).await? {
let mbt_type = match mbt_type.unwrap_or(MbtTypeCli::Normalized) {
MbtTypeCli::Flat => MbtType::Flat,
MbtTypeCli::FlatWithHash => MbtType::FlatWithHash,
MbtTypeCli::Normalized => MbtType::Normalized { hash_view: true },
};
init_mbtiles_schema(&mut *conn, mbt_type).await?;
let mut tj = merge_tilejson(sources, String::new());
tj.other.insert(
"format".to_string(),
serde_json::Value::String(tile_info.format.to_string()),
);
tj.other.insert(
"generator".to_string(),
serde_json::Value::String(format!("martin-cp v{VERSION}")),
);
mbt.insert_metadata(&mut *conn, &tj).await?;
mbt_type
} else {
mbt.detect_type(&mut *conn).await?
})
}
#[actix_web::main]
async fn main() {
let env = env_logger::Env::default().default_filter_or("martin_cp=info");

View File

@ -354,10 +354,8 @@ pub async fn get_tile_response(
) -> ActixResult<HttpResponse> {
let (sources, use_url_query, info) = sources.get_sources(source_ids, Some(xyz.z))?;
let sources = sources.as_slice();
let query = use_url_query.then_some(query);
let tile = get_tile_content(sources, info, &xyz, query, encodings.as_ref()).await?;
let tile = get_tile_content(sources.as_slice(), info, &xyz, query, encodings.as_ref()).await?;
Ok(if tile.data.is_empty() {
HttpResponse::NoContent().finish()
@ -381,10 +379,9 @@ pub async fn get_tile_content(
if sources.is_empty() {
return Err(ErrorNotFound("No valid sources found"));
}
let query = if let Some(v) = query {
Some(Query::<UrlQuery>::from_query(v)?.into_inner())
} else {
None
let query = match query {
Some(v) if !v.is_empty() => Some(Query::<UrlQuery>::from_query(v)?.into_inner()),
_ => None,
};
let mut tiles = try_join_all(sources.iter().map(|s| s.get_tile(xyz, &query)))

View File

@ -2,7 +2,7 @@ lints.workspace = true
[package]
name = "mbtiles"
version = "0.8.0"
version = "0.8.1"
authors = ["Yuri Astrakhan <YuriAstrakhan@gmail.com>", "MapLibre contributors"]
description = "A simple low-level MbTiles access and processing library, with some tile format detection and other relevant heuristics."
keywords = ["mbtiles", "maps", "tiles", "mvt", "tilejson"]

View File

@ -1,5 +1,8 @@
#![doc = include_str!("../README.md")]
// Re-export sqlx
pub use sqlx;
mod copier;
pub use copier::{CopyDuplicateMode, MbtilesCopier};

View File

@ -13,6 +13,7 @@ use tilejson::{tilejson, Bounds, Center, TileJSON};
use crate::errors::MbtResult;
use crate::Mbtiles;
#[serde_with::skip_serializing_none]
#[derive(Clone, Debug, PartialEq, Serialize)]
pub struct Metadata {
pub id: String,
@ -155,6 +156,9 @@ impl Mbtiles {
);
}
}
if obj.is_empty() {
json = None;
}
}
Ok((tj, layer_type, json))

View File

@ -58,8 +58,8 @@ impl Display for Summary {
writeln!(f)?;
writeln!(
f,
"|{:^9}|{:^9}|{:^9}|{:^9}|{:^9}| {:^20} |",
"Zoom", "Count", "Smallest", "Largest", "Average", "BBox"
" {:^4} | {:^9} | {:^9} | {:^9} | {:^9} | Bounding Box",
"Zoom", "Count", "Smallest", "Largest", "Average"
)?;
for l in &self.zoom_info {
@ -70,13 +70,13 @@ impl Display for Summary {
writeln!(
f,
"|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |",
" {:>4} | {:>9} | {:>9} | {:>9} | {:>9} | {:.prec$}",
l.zoom,
l.tile_count,
format!("{min:.2}B"),
format!("{max:.2}B"),
format!("{avg:.2}B"),
format!("{:.prec$}", l.bbox),
format!("{min:.1}B"),
format!("{max:.1}B"),
format!("{avg:.1}B"),
l.bbox,
)?;
}
@ -93,13 +93,12 @@ impl Display for Summary {
let prec = get_zoom_precision(max_zoom);
writeln!(
f,
"|{:>9}|{:>9}|{:>9}|{:>9}|{:>9}| {:<20} |",
" {:>4} | {:>9} | {:>9} | {:>9} | {:>9} | {bbox:.prec$}",
"all",
self.tile_count,
format!("{min}B"),
format!("{max}B"),
format!("{avg}B"),
format!("{:.prec$}", bbox),
)?;
}
}

View File

@ -1,25 +1,13 @@
[INFO ] cp_flat-with-hash has an unrecognized metadata value foo={"bar":"foo"}
[INFO ] Using 'mvt' tile format from metadata table in file cp_flat-with-hash
id: cp_flat-with-hash
tile_info:
format: mvt
encoding: ''
layer_type: null
tilejson:
tilejson: 3.0.0
tiles: []
vector_layers:
- id: table_source
fields:
gid: int4
bounds:
- -2.0
- -1.0
- 142.84131509869133
- 45.0
name: table_source
foo: '{"bar":"foo"}'
description: public.function_zxy_query_test
name: function_zxy_query_test
format: mvt
generator: martin-cp v0.11.1
json: {}

View File

@ -2,15 +2,13 @@ MBTiles file summary for tests/mbtiles_temp_files/cp_flat-with-hash.mbtiles
Schema: flat-with-hash
Page size: 512B
| Zoom | Count |Smallest | Largest | Average | BBox |
| 0| 1| 1.07KiB| 1.07KiB| 1.07KiB| -180,-85,180,85 |
| 1| 2| 141B| 167B| 154B| -180,-85,0,85 |
| 2| 4| 418B| 988B| 589B| -90,-67,90,67 |
| 3| 7| 52B| 962B| 311B| -45,-41,90,67 |
| 4| 13| 52B| 867B| 249B| -22,-22,157,56 |
| 5| 27| 52B| 741B| 193B| -11,-11,146,49 |
| 6| 69| 52B| 679B| 144B| -6,-6,146,45 |
| 7| 214| 48B| 633B| 118B| -3,-3,143,45 |
| 8| 751| 48B| 420B| 103B| -3,-1,143,45 |
| all| 1088| 48B| 1.0KiB| 117B| -180,-85,180,85 |
Zoom | Count | Smallest | Largest | Average | Bounding Box
0 | 1 | 892B | 892B | 892B | -180,-85,180,85
1 | 4 | 474B | 983B | 609B | -180,-85,180,85
2 | 5 | 150B | 865B | 451B | -90,-67,180,67
3 | 8 | 57B | 839B | 264B | -45,-41,180,67
4 | 13 | 57B | 751B | 216B | -22,-22,157,56
5 | 27 | 57B | 666B | 167B | -11,-11,146,49
6 | 69 | 57B | 636B | 127B | -6,-6,146,45
all | 127 | 57B | 983B | 187B | -180,-85,180,85

View File

@ -1,10 +1,8 @@
[INFO ] cp_flat has an unrecognized metadata value foo={"bar":"foo"}
[INFO ] Using 'mvt' tile format from metadata table in file cp_flat
id: cp_flat
tile_info:
format: mvt
encoding: ''
layer_type: null
encoding: gzip
tilejson:
tilejson: 3.0.0
tiles: []
@ -21,5 +19,4 @@ tilejson:
foo: '{"bar":"foo"}'
format: mvt
generator: martin-cp v0.11.1
json: {}

View File

@ -2,15 +2,13 @@ MBTiles file summary for tests/mbtiles_temp_files/cp_flat.mbtiles
Schema: flat
Page size: 512B
| Zoom | Count |Smallest | Largest | Average | BBox |
| 0| 1| 1.07KiB| 1.07KiB| 1.07KiB| -180,-85,180,85 |
| 1| 2| 141B| 167B| 154B| -180,-85,0,85 |
| 2| 4| 418B| 988B| 589B| -90,-67,90,67 |
| 3| 7| 52B| 962B| 311B| -45,-41,90,67 |
| 4| 13| 52B| 867B| 249B| -22,-22,157,56 |
| 5| 27| 52B| 741B| 193B| -11,-11,146,49 |
| 6| 69| 52B| 679B| 144B| -6,-6,146,45 |
| 7| 214| 48B| 633B| 118B| -3,-3,143,45 |
| 8| 751| 48B| 420B| 103B| -3,-1,143,45 |
| all| 1088| 48B| 1.0KiB| 117B| -180,-85,180,85 |
Zoom | Count | Smallest | Largest | Average | Bounding Box
0 | 1 | 643B | 643B | 643B | -180,-85,180,85
1 | 2 | 150B | 172B | 161B | -180,-85,0,85
2 | 4 | 291B | 690B | 414B | -90,-67,90,67
3 | 7 | 75B | 727B | 263B | -45,-41,90,67
4 | 13 | 75B | 684B | 225B | -22,-22,157,56
5 | 27 | 75B | 659B | 195B | -11,-11,146,49
6 | 69 | 75B | 633B | 155B | -6,-6,146,45
all | 123 | 75B | 727B | 190B | -180,-85,180,85

File diff suppressed because one or more lines are too long

View File

@ -2,15 +2,8 @@ MBTiles file summary for tests/mbtiles_temp_files/cp_normalized.mbtiles
Schema: normalized
Page size: 512B
| Zoom | Count |Smallest | Largest | Average | BBox |
| 0| 1| 1.07KiB| 1.07KiB| 1.07KiB| -180,-85,180,85 |
| 1| 2| 141B| 167B| 154B| -180,-85,0,85 |
| 2| 4| 418B| 988B| 589B| -90,-67,90,67 |
| 3| 7| 52B| 962B| 311B| -45,-41,90,67 |
| 4| 13| 52B| 867B| 249B| -22,-22,157,56 |
| 5| 27| 52B| 741B| 193B| -11,-11,146,49 |
| 6| 69| 52B| 679B| 144B| -6,-6,146,45 |
| 7| 214| 48B| 633B| 118B| -3,-3,143,45 |
| 8| 751| 48B| 420B| 103B| -3,-1,143,45 |
| all| 1088| 48B| 1.0KiB| 117B| -180,-85,180,85 |
Zoom | Count | Smallest | Largest | Average | Bounding Box
0 | 1 | 20.7KiB | 20.7KiB | 20.7KiB | -180,-85,180,85
1 | 4 | 11.8KiB | 20.6KiB | 16.4KiB | -180,-85,180,85
all | 5 | 11.8KiB | 20.7KiB | 17.2KiB | -180,-85,180,85

View File

@ -4,13 +4,13 @@ File size: 48.00KiB
Page size: 4.00KiB
Page count: 12
| Zoom | Count |Smallest | Largest | Average | BBox |
| 0| 1| 1.08KiB| 1.08KiB| 1.08KiB| -180,-85,180,85 |
| 1| 4| 160B| 650B| 366B| -180,-85,180,85 |
| 2| 7| 137B| 495B| 239B| -180,-67,180,67 |
| 3| 17| 67B| 246B| 134B| -135,-41,180,67 |
| 4| 38| 64B| 175B| 86B| -135,-41,180,67 |
| 5| 57| 64B| 107B| 72B| -124,-41,180,62 |
| 6| 72| 64B| 97B| 68B| -124,-41,180,62 |
| all| 196| 64B| 1.0KiB| 96B| -180,-85,180,85 |
Zoom | Count | Smallest | Largest | Average | Bounding Box
0 | 1 | 1.0KiB | 1.0KiB | 1.0KiB | -180,-85,180,85
1 | 4 | 160B | 650B | 366B | -180,-85,180,85
2 | 7 | 137B | 495B | 239B | -180,-67,180,67
3 | 17 | 67B | 246B | 134B | -135,-41,180,67
4 | 38 | 64B | 175B | 86B | -135,-41,180,67
5 | 57 | 64B | 107B | 72B | -124,-41,180,62
6 | 72 | 64B | 97B | 68B | -124,-41,180,62
all | 196 | 64B | 1.0KiB | 96B | -180,-85,180,85

View File

@ -386,13 +386,13 @@ if [[ "$MARTIN_CP_BIN" != "-" ]]; then
test_martin_cp "flat" "${CFG[@]}" \
--source table_source --mbtiles-type flat --concurrency 3 \
--min-zoom 0 --max-zoom 8 "--bbox=-2,-1,142.84,45"
--min-zoom 0 --max-zoom 6 "--bbox=-2,-1,142.84,45"
test_martin_cp "flat-with-hash" "${CFG[@]}" \
--source table_source --mbtiles-type flat-with-hash --concurrency 3 \
--min-zoom 0 --max-zoom 8 "--bbox=-2,-1,142.84,45"
--source function_zxy_query_test --url-query 'foo=bar&token=martin' --encoding 'identity' --mbtiles-type flat-with-hash --concurrency 3 \
--min-zoom 0 --max-zoom 6 "--bbox=-2,-1,142.84,45"
test_martin_cp "normalized" "${CFG[@]}" \
--source table_source --mbtiles-type normalized --concurrency 3 \
--min-zoom 0 --max-zoom 8 "--bbox=-2,-1,142.84,45"
--source geography-class-png --mbtiles-type normalized --concurrency 3 \
--min-zoom 0 --max-zoom 6 "--bbox=-2,-1,142.84,45"
unset DATABASE_URL