mirror of
https://github.com/zed-industries/zed.git
synced 2024-11-08 07:35:01 +03:00
fix bug for truncation ensuring no valid inputs are sent to openai
This commit is contained in:
parent
5abad58b0d
commit
7d4d6c871b
@ -78,15 +78,13 @@ impl EmbeddingProvider for DummyEmbeddings {
|
||||
let token_count = tokens.len();
|
||||
let output = if token_count > OPENAI_INPUT_LIMIT {
|
||||
tokens.truncate(OPENAI_INPUT_LIMIT);
|
||||
OPENAI_BPE_TOKENIZER
|
||||
.decode(tokens)
|
||||
.ok()
|
||||
.unwrap_or_else(|| span.to_string())
|
||||
let new_input = OPENAI_BPE_TOKENIZER.decode(tokens.clone());
|
||||
new_input.ok().unwrap_or_else(|| span.to_string())
|
||||
} else {
|
||||
span.to_string()
|
||||
};
|
||||
|
||||
(output, token_count)
|
||||
(output, tokens.len())
|
||||
}
|
||||
}
|
||||
|
||||
@ -120,7 +118,7 @@ impl OpenAIEmbeddings {
|
||||
#[async_trait]
|
||||
impl EmbeddingProvider for OpenAIEmbeddings {
|
||||
fn max_tokens_per_batch(&self) -> usize {
|
||||
OPENAI_INPUT_LIMIT
|
||||
50000
|
||||
}
|
||||
|
||||
fn truncate(&self, span: &str) -> (String, usize) {
|
||||
|
@ -105,9 +105,11 @@ impl EmbeddingQueue {
|
||||
for fragment in &batch {
|
||||
let file = fragment.file.lock();
|
||||
spans.extend(
|
||||
file.documents[fragment.document_range.clone()]
|
||||
.iter()
|
||||
.map(|d| d.content.clone()),
|
||||
{
|
||||
file.documents[fragment.document_range.clone()]
|
||||
.iter()
|
||||
.map(|d| d.content.clone())
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user