diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index 4181339d6..803a86b0c 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -1494,6 +1494,7 @@ dependencies = [ "serde", "serde-jsonlines", "serde_json", + "sha1", "sha2", "similar", "tantivy", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 493668009..c1fabc415 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -56,6 +56,7 @@ rusqlite = { version = "0.28.0", features = [ "bundled", "blob", "hooks" ] } refinery = { version = "0.8", features = [ "rusqlite" ] } r2d2 = "0.8.10" r2d2_sqlite = { version = "0.21.0", features = ["bundled"] } +sha1 = "0.10.5" [features] # by default Tauri runs in production mode diff --git a/src-tauri/src/app/files/database.rs b/src-tauri/src/app/files/database.rs index b4fc12216..193b31fd9 100644 --- a/src-tauri/src/app/files/database.rs +++ b/src-tauri/src/app/files/database.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use anyhow::{Context, Result}; +use sha1::{Digest, Sha1}; use crate::database; @@ -15,12 +16,39 @@ impl Database { } pub fn insert(&self, session_id: &str, file_path: &str, content: &str) -> Result<()> { + let mut hasher = Sha1::new(); + hasher.update(content); + let sha1 = hasher.finalize(); + self.database.transaction(|tx| -> Result<()> { - let mut stmt = insert_stmt(tx).context("Failed to prepare insert statement")?; + let mut stmt = is_content_exist_by_sha1_stmt(tx) + .context("Failed to prepare is_content_exist_by_sha1 statement")?; + let mut rows = stmt + .query(rusqlite::named_params! { + ":sha1": sha1.as_slice(), + }) + .context("Failed to execute is_content_exist_by_sha1 statement")?; + let is_content_exist: bool = rows + .next() + .context("Failed to iterate over is_content_exist_by_sha1 results")? + .is_some(); + + if !is_content_exist { + let mut stmt = + insert_content_stmt(tx).context("Failed to prepare insert statement")?; + stmt.execute(rusqlite::named_params! { + ":sha1": sha1.as_slice(), + ":content": content, + }) + .context("Failed to execute insert statement")?; + } + + let mut stmt = + insert_file_stmt(tx).context("Failed to prepare insert file statement")?; stmt.execute(rusqlite::named_params! { ":session_id": session_id, ":file_path": file_path, - ":content": content, + ":sha1": sha1.as_slice(), }) .context("Failed to execute insert statement")?; Ok(()) @@ -112,22 +140,43 @@ fn list_by_session_id_stmt<'conn>( Ok(tx.prepare_cached( "SELECT `file_path`, `content` FROM `files` + JOIN `contents` ON `files`.`sha1` = `contents`.`sha1` WHERE `session_id` = :session_id", )?) } -fn insert_stmt<'conn>( +fn is_content_exist_by_sha1_stmt<'conn>( + tx: &'conn rusqlite::Transaction, +) -> Result> { + Ok(tx.prepare_cached( + "SELECT 1 + FROM `contents` + WHERE `sha1` = :sha1", + )?) +} + +fn insert_content_stmt<'conn>( + tx: &'conn rusqlite::Transaction, +) -> Result> { + Ok(tx.prepare_cached( + "INSERT INTO `contents` ( + `sha1`, `content` + ) VALUES ( + :sha1, :content + )", + )?) +} + +fn insert_file_stmt<'conn>( tx: &'conn rusqlite::Transaction, ) -> Result> { Ok(tx.prepare_cached( "INSERT INTO `files` ( - `session_id`, `file_path`, `content` + `session_id`, `file_path`, `sha1` ) VALUES ( - :session_id, :file_path, :content - ) - ON CONFLICT(`session_id`, `file_path`) DO UPDATE SET - `content` = :content - ", + :session_id, :file_path, :sha1 + ) ON CONFLICT(`session_id`, `file_path`) + DO UPDATE SET `sha1` = :sha1", )?) } @@ -136,8 +185,9 @@ fn get_by_rowid_stmt<'conn>( ) -> Result> { Ok(tx.prepare_cached( "SELECT `file_path`, `content`, `session_id` - FROM `files` - WHERE `rowid` = :rowid", + FROM `files` + JOIN `contents` ON `files`.`sha1` = `contents`.`sha1` + WHERE `files`.`rowid` = :rowid", )?) } @@ -180,8 +230,11 @@ mod tests { #[test] fn test_upsert() -> Result<()> { + println!("1"); let db = database::Database::memory()?; + println!("2"); let database = Database::new(db); + println!("3"); let session_id = "session_id"; let file_path = "file_path"; diff --git a/src-tauri/src/database/migrations/V2__files.sql b/src-tauri/src/database/migrations/V2__files.sql index 9a2847cb3..135d6c0db 100644 --- a/src-tauri/src/database/migrations/V2__files.sql +++ b/src-tauri/src/database/migrations/V2__files.sql @@ -1,8 +1,13 @@ CREATE TABLE `files` ( `session_id` text NOT NULL, `file_path` text NOT NULL, - `content` blob NOT NULL, + `sha1` blob NOT NULL, PRIMARY KEY (`session_id`, `file_path`) ); CREATE INDEX `files_session_id_index` ON `files` (`session_id`); + +CREATE TABLE `contents` ( + `sha1` blob NOT NULL PRIMARY KEY, + `content` blob NOT NULL +);