mirror of
https://github.com/gitbutlerapp/gitbutler.git
synced 2025-01-08 19:06:38 +03:00
custom snippet generator
This commit is contained in:
parent
86209c04a5
commit
9e73226702
81
src-tauri/src/search/highlighted.rs
Normal file
81
src-tauri/src/search/highlighted.rs
Normal file
@ -0,0 +1,81 @@
|
||||
use std::{collections::HashSet, ops::Range};
|
||||
|
||||
use tantivy::Snippet;
|
||||
|
||||
// this is similar to Snippet.to_html, but only extracts the highlighted parts
|
||||
pub fn get_highlighted(snippet: &Snippet) -> Vec<String> {
|
||||
let mut result = HashSet::new();
|
||||
|
||||
for item in collapse_overlapped_ranges(&snippet.highlighted()) {
|
||||
result.insert(snippet.fragment()[item.clone()].to_string());
|
||||
}
|
||||
|
||||
let mut vec = result.into_iter().collect::<Vec<String>>();
|
||||
vec.sort();
|
||||
vec
|
||||
}
|
||||
|
||||
// copied from tantivy::Snippet
|
||||
fn collapse_overlapped_ranges(ranges: &[Range<usize>]) -> Vec<Range<usize>> {
|
||||
let mut result = Vec::new();
|
||||
let mut ranges_it = ranges.iter();
|
||||
|
||||
let mut current = match ranges_it.next() {
|
||||
Some(range) => range.clone(),
|
||||
None => return result,
|
||||
};
|
||||
|
||||
for range in ranges {
|
||||
if current.end > range.start {
|
||||
current = current.start..std::cmp::max(current.end, range.end);
|
||||
} else {
|
||||
result.push(current);
|
||||
current = range.clone();
|
||||
}
|
||||
}
|
||||
|
||||
result.push(current);
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_collapse_overlapped_ranges() {
|
||||
assert_eq!(&collapse_overlapped_ranges(&[0..1, 2..3,]), &[0..1, 2..3]);
|
||||
assert_eq!(&collapse_overlapped_ranges(&[0..1, 1..2,]), &[0..1, 1..2]);
|
||||
assert_eq!(&collapse_overlapped_ranges(&[0..2, 1..2,]), &[0..2]);
|
||||
assert_eq!(&collapse_overlapped_ranges(&[0..2, 1..3,]), &[0..3]);
|
||||
assert_eq!(&collapse_overlapped_ranges(&[0..3, 1..2,]), &[0..3]);
|
||||
}
|
||||
|
||||
// #[test]
|
||||
// fn test_snippet_with_overlapped_highlighted_ranges() {
|
||||
// let text = "abc";
|
||||
|
||||
// let mut terms = BTreeMap::new();
|
||||
// terms.insert(String::from("ab"), 0.9);
|
||||
// terms.insert(String::from("bc"), 1.0);
|
||||
|
||||
// let fragments = search_fragments(
|
||||
// &From::from(NgramTokenizer::all_ngrams(2, 2)),
|
||||
// text,
|
||||
// &terms,
|
||||
// 3,
|
||||
// );
|
||||
|
||||
// assert_eq!(fragments.len(), 1);
|
||||
// {
|
||||
// let first = &fragments[0];
|
||||
// assert_eq!(first.score, 1.9);
|
||||
// assert_eq!(first.start_offset, 0);
|
||||
// assert_eq!(first.stop_offset, 3);
|
||||
// }
|
||||
|
||||
// let snippet = select_best_fragment_combination(&fragments[..], text);
|
||||
// assert_eq!(snippet.fragment, "abc");
|
||||
// assert_eq!(snippet.to_html(), "<b>abc</b>");
|
||||
// }
|
||||
}
|
@ -1,6 +1,7 @@
|
||||
mod index;
|
||||
mod meta;
|
||||
mod searcher;
|
||||
mod highlighted;
|
||||
|
||||
pub use searcher::{Query, Results, Searcher};
|
||||
|
||||
|
@ -12,7 +12,7 @@ use tantivy::{collector, directory::MmapDirectory, IndexWriter};
|
||||
use tantivy::{query::QueryParser, Term};
|
||||
use tantivy::{schema::IndexRecordOption, tokenizer};
|
||||
|
||||
use crate::{bookmarks, deltas, gb_repository, sessions};
|
||||
use crate::{bookmarks, deltas, gb_repository, search::highlighted::get_highlighted, sessions};
|
||||
|
||||
use super::{index, meta};
|
||||
|
||||
@ -99,7 +99,7 @@ impl Searcher {
|
||||
);
|
||||
let count_handle = collectors.add_collector(collector::Count);
|
||||
|
||||
let snippet_generator = tantivy::SnippetGenerator::create(
|
||||
let diff_snippet_generator = tantivy::SnippetGenerator::create(
|
||||
&searcher,
|
||||
&query,
|
||||
self.index.schema().get_field("diff").unwrap(),
|
||||
@ -112,41 +112,16 @@ impl Searcher {
|
||||
let page = top_docs
|
||||
.iter()
|
||||
.map(|(_score, doc_address)| {
|
||||
let retrieved_doc = searcher.doc(*doc_address)?;
|
||||
|
||||
let project_id = retrieved_doc
|
||||
.get_first(self.index.schema().get_field("project_id").unwrap())
|
||||
.unwrap()
|
||||
.as_text()
|
||||
.unwrap();
|
||||
let file_path = retrieved_doc
|
||||
.get_first(self.index.schema().get_field("file_path").unwrap())
|
||||
.unwrap()
|
||||
.as_text()
|
||||
.unwrap();
|
||||
let session_id = retrieved_doc
|
||||
.get_first(self.index.schema().get_field("session_id").unwrap())
|
||||
.unwrap()
|
||||
.as_text()
|
||||
.unwrap();
|
||||
let index = retrieved_doc
|
||||
.get_first(self.index.schema().get_field("index").unwrap())
|
||||
.unwrap()
|
||||
.as_u64()
|
||||
.unwrap();
|
||||
let snippet = snippet_generator.snippet_from_doc(&retrieved_doc);
|
||||
let fragment = snippet.fragment();
|
||||
let highlighted: Vec<String> = snippet
|
||||
.highlighted()
|
||||
.iter()
|
||||
.map(|range| fragment[range.start..range.end].to_string())
|
||||
.collect();
|
||||
let doc = &searcher.doc(*doc_address)?;
|
||||
let index_document =
|
||||
index::IndexDocument::from_document(&self.index.schema(), &doc);
|
||||
let snippet = diff_snippet_generator.snippet_from_doc(&doc);
|
||||
Ok(SearchResult {
|
||||
project_id: project_id.to_string(),
|
||||
file_path: file_path.to_string(),
|
||||
session_id: session_id.to_string(),
|
||||
highlighted,
|
||||
index,
|
||||
project_id: index_document.project_id.unwrap(),
|
||||
file_path: index_document.file_path.unwrap(),
|
||||
session_id: index_document.session_id.unwrap(),
|
||||
highlighted: get_highlighted(&snippet),
|
||||
index: index_document.index.unwrap(),
|
||||
})
|
||||
})
|
||||
.collect::<Result<Vec<SearchResult>>>()?;
|
||||
|
@ -514,8 +514,7 @@ fn search_by_filename() -> Result<()> {
|
||||
|
||||
let searcher = super::Searcher::at(index_path).unwrap();
|
||||
|
||||
let write_result = searcher.index_session(&gb_repo, &session);
|
||||
assert!(write_result.is_ok());
|
||||
searcher.index_session(&gb_repo, &session)?;
|
||||
|
||||
let found_result = searcher
|
||||
.search(&super::Query {
|
||||
@ -540,3 +539,65 @@ fn search_by_filename() -> Result<()> {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_highlight() -> Result<()> {
|
||||
let repository = test_repository()?;
|
||||
let project = test_project(&repository)?;
|
||||
let gb_repo_path = tempdir()?.path().to_str().unwrap().to_string();
|
||||
let storage = storage::Storage::from_path(tempdir()?.path().to_path_buf());
|
||||
let project_store = projects::Storage::new(storage.clone());
|
||||
project_store.add_project(&project)?;
|
||||
let user_store = users::Storage::new(storage);
|
||||
let gb_repo = gb_repository::Repository::open(
|
||||
gb_repo_path,
|
||||
project.id.clone(),
|
||||
project_store.clone(),
|
||||
user_store,
|
||||
)?;
|
||||
|
||||
let index_path = tempdir()?.path().to_str().unwrap().to_string();
|
||||
|
||||
let session = gb_repo.get_or_create_current_session()?;
|
||||
let writer = sessions::Writer::open(&gb_repo, &session)?;
|
||||
writer.write_deltas(
|
||||
Path::new("test.txt"),
|
||||
&vec![deltas::Delta {
|
||||
operations: vec![deltas::Operation::Insert((
|
||||
0,
|
||||
"hello world hello".to_string(),
|
||||
))],
|
||||
timestamp_ms: 0,
|
||||
}],
|
||||
)?;
|
||||
let session = gb_repo.flush()?;
|
||||
let session = session.unwrap();
|
||||
|
||||
let searcher = super::Searcher::at(index_path).unwrap();
|
||||
|
||||
searcher.index_session(&gb_repo, &session)?;
|
||||
|
||||
let result = searcher
|
||||
.search(&super::Query {
|
||||
project_id: gb_repo.get_project_id().to_string(),
|
||||
q: "hello".to_string(),
|
||||
limit: 10,
|
||||
offset: None,
|
||||
})?
|
||||
.page;
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].highlighted, vec!["hello"]);
|
||||
|
||||
let result = searcher
|
||||
.search(&super::Query {
|
||||
project_id: gb_repo.get_project_id().to_string(),
|
||||
q: "hello world".to_string(),
|
||||
limit: 10,
|
||||
offset: None,
|
||||
})?
|
||||
.page;
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].highlighted, vec!["hello", "hello world"]);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user