ngram tokenizer

This commit is contained in:
Nikita Galaiko 2023-05-29 09:12:33 +02:00
parent 3517cb5339
commit 6b5e101f64
2 changed files with 76 additions and 10 deletions

View File

@ -131,10 +131,9 @@ impl Searcher {
));
let diff_or_file_path_or_note_query = Box::new({
let mut parser =
let parser =
QueryParser::for_index(&self.index, vec![diff_field, file_path_field, note_field]);
parser.set_conjunction_by_default();
parser.parse_query(&q.q)?
parser.parse_query(&format!("\"{}\"", &q.q))?
});
let query = tantivy::query::BooleanQuery::intersection(vec![
@ -329,8 +328,16 @@ fn build_schema() -> schema::Schema {
)
.set_stored(); // text values stored to aloow updating document
let code_options = TextOptions::default()
.set_indexing_options(
TextFieldIndexing::default()
.set_tokenizer("ngram2_3") // text is indexed with ngram tokenizer to allow partial matching
.set_index_option(schema::IndexRecordOption::WithFreqsAndPositions), // text is indexed with positions to allow highlighted snippets generation
)
.set_stored(); // text values stored to aloow updating document
schema_builder.add_text_field("file_path", text_options.clone());
schema_builder.add_text_field("diff", text_options.clone());
schema_builder.add_text_field("diff", code_options);
schema_builder.add_text_field("note", text_options);
schema_builder.build()

View File

@ -213,12 +213,10 @@ fn search_by_full_match() -> Result<()> {
let writer = sessions::Writer::open(&gb_repo, &session)?;
writer.write_deltas(
Path::new("test.txt"),
&vec![
deltas::Delta {
&vec![deltas::Delta {
operations: vec![deltas::Operation::Insert((0, "hello".to_string()))],
timestamp_ms: 0,
},
],
}],
)?;
let session = gb_repo.flush()?;
let session = session.unwrap();
@ -413,3 +411,64 @@ fn test_delete_all() -> Result<()> {
Ok(())
}
#[test]
fn search_bookmark_by_phrase() -> Result<()> {
let repository = test_repository()?;
let project = test_project(&repository)?;
let gb_repo_path = tempdir()?.path().to_str().unwrap().to_string();
let storage = storage::Storage::from_path(tempdir()?.path().to_path_buf());
let project_store = projects::Storage::new(storage.clone());
project_store.add_project(&project)?;
let user_store = users::Storage::new(storage);
let gb_repo = gb_repository::Repository::open(
gb_repo_path,
project.id.clone(),
project_store.clone(),
user_store,
)?;
let index_path = tempdir()?.path().to_str().unwrap().to_string();
let session = gb_repo.get_or_create_current_session()?;
let writer = sessions::Writer::open(&gb_repo, &session)?;
writer.write_deltas(
Path::new("test.txt"),
&vec![deltas::Delta {
operations: vec![deltas::Operation::Insert((0, "Hello".to_string()))],
timestamp_ms: 0,
}],
)?;
let session = gb_repo.flush()?;
let session = session.unwrap();
let searcher = super::Searcher::at(index_path).unwrap();
searcher.index_session(&gb_repo, &session)?;
searcher.index_bookmark(&bookmarks::Bookmark {
project_id: gb_repo.get_project_id().to_string(),
timestamp_ms: 0,
created_timestamp_ms: 0,
updated_timestamp_ms: 0,
note: "bookmark text note".to_string(),
deleted: false,
})?;
let result = searcher.search(&super::Query {
project_id: gb_repo.get_project_id().to_string(),
q: "bookmark note".to_string(),
limit: 10,
offset: None,
})?;
assert_eq!(result.total, 0);
let result = searcher.search(&super::Query {
project_id: gb_repo.get_project_id().to_string(),
q: "text note".to_string(),
limit: 10,
offset: None,
})?;
assert_eq!(result.total, 1);
Ok(())
}