mirror of
https://github.com/zed-industries/zed.git
synced 2024-11-08 07:35:01 +03:00
Rename Sha1
to DocumentDigest
Co-Authored-By: Kyle Caverly <kyle@zed.dev>
This commit is contained in:
parent
3001a46f69
commit
2503d54d19
@ -1,4 +1,8 @@
|
||||
use crate::{embedding::Embedding, parsing::Document, SEMANTIC_INDEX_VERSION};
|
||||
use crate::{
|
||||
embedding::Embedding,
|
||||
parsing::{Document, DocumentDigest},
|
||||
SEMANTIC_INDEX_VERSION,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use futures::channel::oneshot;
|
||||
use gpui::executor;
|
||||
@ -165,7 +169,7 @@ impl VectorDatabase {
|
||||
end_byte INTEGER NOT NULL,
|
||||
name VARCHAR NOT NULL,
|
||||
embedding BLOB NOT NULL,
|
||||
sha1 BLOB NOT NULL,
|
||||
digest BLOB NOT NULL,
|
||||
FOREIGN KEY(file_id) REFERENCES files(id) ON DELETE CASCADE
|
||||
)",
|
||||
[],
|
||||
@ -225,14 +229,14 @@ impl VectorDatabase {
|
||||
// I imagine we can speed this up with a bulk insert of some kind.
|
||||
for document in documents {
|
||||
db.execute(
|
||||
"INSERT INTO documents (file_id, start_byte, end_byte, name, embedding, sha1) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||
"INSERT INTO documents (file_id, start_byte, end_byte, name, embedding, digest) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
|
||||
params![
|
||||
file_id,
|
||||
document.range.start.to_string(),
|
||||
document.range.end.to_string(),
|
||||
document.name,
|
||||
document.embedding,
|
||||
document.sha1
|
||||
document.digest
|
||||
],
|
||||
)?;
|
||||
}
|
||||
|
@ -1,11 +1,11 @@
|
||||
use crate::embedding::{EmbeddingProvider, Embedding};
|
||||
use crate::embedding::{Embedding, EmbeddingProvider};
|
||||
use anyhow::{anyhow, Result};
|
||||
use language::{Grammar, Language};
|
||||
use rusqlite::{
|
||||
types::{FromSql, FromSqlResult, ToSqlOutput, ValueRef},
|
||||
ToSql,
|
||||
};
|
||||
use sha1::Digest;
|
||||
use sha1::{Digest, Sha1};
|
||||
use std::{
|
||||
cmp::{self, Reverse},
|
||||
collections::HashSet,
|
||||
@ -15,10 +15,10 @@ use std::{
|
||||
};
|
||||
use tree_sitter::{Parser, QueryCursor};
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Sha1([u8; 20]);
|
||||
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
|
||||
pub struct DocumentDigest([u8; 20]);
|
||||
|
||||
impl FromSql for Sha1 {
|
||||
impl FromSql for DocumentDigest {
|
||||
fn column_result(value: ValueRef) -> FromSqlResult<Self> {
|
||||
let blob = value.as_blob()?;
|
||||
let bytes =
|
||||
@ -27,19 +27,19 @@ impl FromSql for Sha1 {
|
||||
expected_size: 20,
|
||||
blob_size: blob.len(),
|
||||
})?;
|
||||
return Ok(Sha1(bytes));
|
||||
return Ok(DocumentDigest(bytes));
|
||||
}
|
||||
}
|
||||
|
||||
impl ToSql for Sha1 {
|
||||
impl ToSql for DocumentDigest {
|
||||
fn to_sql(&self) -> rusqlite::Result<ToSqlOutput> {
|
||||
self.0.to_sql()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&'_ str> for Sha1 {
|
||||
impl From<&'_ str> for DocumentDigest {
|
||||
fn from(value: &'_ str) -> Self {
|
||||
let mut sha1 = sha1::Sha1::new();
|
||||
let mut sha1 = Sha1::new();
|
||||
sha1.update(value);
|
||||
Self(sha1.finalize().into())
|
||||
}
|
||||
@ -51,7 +51,7 @@ pub struct Document {
|
||||
pub range: Range<usize>,
|
||||
pub content: String,
|
||||
pub embedding: Option<Embedding>,
|
||||
pub sha1: Sha1,
|
||||
pub digest: DocumentDigest,
|
||||
pub token_count: usize,
|
||||
}
|
||||
|
||||
@ -102,17 +102,14 @@ impl CodeContextRetriever {
|
||||
.replace("<path>", relative_path.to_string_lossy().as_ref())
|
||||
.replace("<language>", language_name.as_ref())
|
||||
.replace("<item>", &content);
|
||||
|
||||
let sha1 = Sha1::from(document_span.as_str());
|
||||
|
||||
let digest = DocumentDigest::from(document_span.as_str());
|
||||
let (document_span, token_count) = self.embedding_provider.truncate(&document_span);
|
||||
|
||||
Ok(vec![Document {
|
||||
range: 0..content.len(),
|
||||
content: document_span,
|
||||
embedding: Default::default(),
|
||||
name: language_name.to_string(),
|
||||
sha1,
|
||||
digest,
|
||||
token_count,
|
||||
}])
|
||||
}
|
||||
@ -121,14 +118,14 @@ impl CodeContextRetriever {
|
||||
let document_span = MARKDOWN_CONTEXT_TEMPLATE
|
||||
.replace("<path>", relative_path.to_string_lossy().as_ref())
|
||||
.replace("<item>", &content);
|
||||
let sha1 = Sha1::from(document_span.as_str());
|
||||
let digest = DocumentDigest::from(document_span.as_str());
|
||||
let (document_span, token_count) = self.embedding_provider.truncate(&document_span);
|
||||
Ok(vec![Document {
|
||||
range: 0..content.len(),
|
||||
content: document_span,
|
||||
embedding: None,
|
||||
name: "Markdown".to_string(),
|
||||
sha1,
|
||||
digest,
|
||||
token_count,
|
||||
}])
|
||||
}
|
||||
@ -308,13 +305,13 @@ impl CodeContextRetriever {
|
||||
);
|
||||
}
|
||||
|
||||
let sha1 = Sha1::from(document_content.as_str());
|
||||
let sha1 = DocumentDigest::from(document_content.as_str());
|
||||
documents.push(Document {
|
||||
name,
|
||||
content: document_content,
|
||||
range: item_range.clone(),
|
||||
embedding: None,
|
||||
sha1,
|
||||
digest: sha1,
|
||||
token_count: 0,
|
||||
})
|
||||
}
|
||||
|
@ -37,7 +37,7 @@ use util::{
|
||||
};
|
||||
use workspace::WorkspaceCreated;
|
||||
|
||||
const SEMANTIC_INDEX_VERSION: usize = 7;
|
||||
const SEMANTIC_INDEX_VERSION: usize = 8;
|
||||
const BACKGROUND_INDEXING_DELAY: Duration = Duration::from_secs(600);
|
||||
|
||||
pub fn init(
|
||||
|
@ -1,7 +1,7 @@
|
||||
use crate::{
|
||||
embedding::{DummyEmbeddings, Embedding, EmbeddingProvider},
|
||||
embedding_queue::EmbeddingQueue,
|
||||
parsing::{subtract_ranges, CodeContextRetriever, Document, Sha1},
|
||||
parsing::{subtract_ranges, CodeContextRetriever, Document, DocumentDigest},
|
||||
semantic_index_settings::SemanticIndexSettings,
|
||||
FileToEmbed, JobHandle, SearchResult, SemanticIndex,
|
||||
};
|
||||
@ -220,13 +220,13 @@ async fn test_embedding_batching(cx: &mut TestAppContext, mut rng: StdRng) {
|
||||
.with_simple_text()
|
||||
.take(content_len)
|
||||
.collect::<String>();
|
||||
let sha1 = Sha1::from(content.as_str());
|
||||
let digest = DocumentDigest::from(content.as_str());
|
||||
Document {
|
||||
range: 0..10,
|
||||
embedding: None,
|
||||
name: format!("document {document_ix}"),
|
||||
content,
|
||||
sha1,
|
||||
digest,
|
||||
token_count: rng.gen_range(10..30),
|
||||
}
|
||||
})
|
||||
|
Loading…
Reference in New Issue
Block a user