update token_count for OpenAIEmbeddings to accomodate for truncation

This commit is contained in:
KCaverly 2023-09-06 15:09:15 -04:00
parent f4237ace40
commit 17237f748c

View File

@ -181,18 +181,17 @@ impl EmbeddingProvider for OpenAIEmbeddings {
fn truncate(&self, span: &str) -> (String, usize) {
let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span);
let token_count = tokens.len();
let output = if token_count > OPENAI_INPUT_LIMIT {
let output = if tokens.len() > OPENAI_INPUT_LIMIT {
tokens.truncate(OPENAI_INPUT_LIMIT);
OPENAI_BPE_TOKENIZER
.decode(tokens)
.decode(tokens.clone())
.ok()
.unwrap_or_else(|| span.to_string())
} else {
span.to_string()
};
(output, token_count)
(output, tokens.len())
}
async fn embed_batch(&self, spans: Vec<String>) -> Result<Vec<Embedding>> {