From 6b5e101f644fa743e5e24fcf0a09c9cda978b50f Mon Sep 17 00:00:00 2001 From: Nikita Galaiko Date: Mon, 29 May 2023 09:12:33 +0200 Subject: [PATCH] ngram tokenizer --- src-tauri/src/search/searcher.rs | 15 ++++-- src-tauri/src/search/searcher_test.rs | 71 ++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 10 deletions(-) diff --git a/src-tauri/src/search/searcher.rs b/src-tauri/src/search/searcher.rs index 72a0a7a33..1f7038f9f 100644 --- a/src-tauri/src/search/searcher.rs +++ b/src-tauri/src/search/searcher.rs @@ -131,10 +131,9 @@ impl Searcher { )); let diff_or_file_path_or_note_query = Box::new({ - let mut parser = + let parser = QueryParser::for_index(&self.index, vec![diff_field, file_path_field, note_field]); - parser.set_conjunction_by_default(); - parser.parse_query(&q.q)? + parser.parse_query(&format!("\"{}\"", &q.q))? }); let query = tantivy::query::BooleanQuery::intersection(vec![ @@ -329,8 +328,16 @@ fn build_schema() -> schema::Schema { ) .set_stored(); // text values stored to aloow updating document + let code_options = TextOptions::default() + .set_indexing_options( + TextFieldIndexing::default() + .set_tokenizer("ngram2_3") // text is indexed with ngram tokenizer to allow partial matching + .set_index_option(schema::IndexRecordOption::WithFreqsAndPositions), // text is indexed with positions to allow highlighted snippets generation + ) + .set_stored(); // text values stored to aloow updating document + schema_builder.add_text_field("file_path", text_options.clone()); - schema_builder.add_text_field("diff", text_options.clone()); + schema_builder.add_text_field("diff", code_options); schema_builder.add_text_field("note", text_options); schema_builder.build() diff --git a/src-tauri/src/search/searcher_test.rs b/src-tauri/src/search/searcher_test.rs index 9d3c717f9..65fd4efd2 100644 --- a/src-tauri/src/search/searcher_test.rs +++ b/src-tauri/src/search/searcher_test.rs @@ -213,12 +213,10 @@ fn search_by_full_match() -> Result<()> { let writer = sessions::Writer::open(&gb_repo, &session)?; writer.write_deltas( Path::new("test.txt"), - &vec![ - deltas::Delta { - operations: vec![deltas::Operation::Insert((0, "hello".to_string()))], - timestamp_ms: 0, - }, - ], + &vec![deltas::Delta { + operations: vec![deltas::Operation::Insert((0, "hello".to_string()))], + timestamp_ms: 0, + }], )?; let session = gb_repo.flush()?; let session = session.unwrap(); @@ -413,3 +411,64 @@ fn test_delete_all() -> Result<()> { Ok(()) } + +#[test] +fn search_bookmark_by_phrase() -> Result<()> { + let repository = test_repository()?; + let project = test_project(&repository)?; + let gb_repo_path = tempdir()?.path().to_str().unwrap().to_string(); + let storage = storage::Storage::from_path(tempdir()?.path().to_path_buf()); + let project_store = projects::Storage::new(storage.clone()); + project_store.add_project(&project)?; + let user_store = users::Storage::new(storage); + let gb_repo = gb_repository::Repository::open( + gb_repo_path, + project.id.clone(), + project_store.clone(), + user_store, + )?; + + let index_path = tempdir()?.path().to_str().unwrap().to_string(); + + let session = gb_repo.get_or_create_current_session()?; + let writer = sessions::Writer::open(&gb_repo, &session)?; + writer.write_deltas( + Path::new("test.txt"), + &vec![deltas::Delta { + operations: vec![deltas::Operation::Insert((0, "Hello".to_string()))], + timestamp_ms: 0, + }], + )?; + let session = gb_repo.flush()?; + let session = session.unwrap(); + + let searcher = super::Searcher::at(index_path).unwrap(); + + searcher.index_session(&gb_repo, &session)?; + searcher.index_bookmark(&bookmarks::Bookmark { + project_id: gb_repo.get_project_id().to_string(), + timestamp_ms: 0, + created_timestamp_ms: 0, + updated_timestamp_ms: 0, + note: "bookmark text note".to_string(), + deleted: false, + })?; + + let result = searcher.search(&super::Query { + project_id: gb_repo.get_project_id().to_string(), + q: "bookmark note".to_string(), + limit: 10, + offset: None, + })?; + assert_eq!(result.total, 0); + + let result = searcher.search(&super::Query { + project_id: gb_repo.get_project_id().to_string(), + q: "text note".to_string(), + limit: 10, + offset: None, + })?; + assert_eq!(result.total, 1); + + Ok(()) +}