update logging for open ai embedding and remove redundant truncation

This commit is contained in:
KCaverly 2023-07-18 11:00:21 -04:00
parent b9fdfd60f0
commit ed1b1a5ccd

View File

@ -67,13 +67,17 @@ impl EmbeddingProvider for DummyEmbeddings {
} }
} }
const INPUT_LIMIT: usize = 8190; const OPENAI_INPUT_LIMIT: usize = 8190;
impl OpenAIEmbeddings { impl OpenAIEmbeddings {
pub fn new(client: Arc<dyn HttpClient>, executor: Arc<Background>) -> Self {
Self { client, executor }
}
fn truncate(span: String) -> String { fn truncate(span: String) -> String {
let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span.as_ref()); let mut tokens = OPENAI_BPE_TOKENIZER.encode_with_special_tokens(span.as_ref());
if tokens.len() > INPUT_LIMIT { if tokens.len() > OPENAI_INPUT_LIMIT {
tokens.truncate(INPUT_LIMIT); tokens.truncate(OPENAI_INPUT_LIMIT);
let result = OPENAI_BPE_TOKENIZER.decode(tokens.clone()); let result = OPENAI_BPE_TOKENIZER.decode(tokens.clone());
if result.is_ok() { if result.is_ok() {
let transformed = result.unwrap(); let transformed = result.unwrap();
@ -115,6 +119,7 @@ impl EmbeddingProvider for OpenAIEmbeddings {
.ok_or_else(|| anyhow!("no api key"))?; .ok_or_else(|| anyhow!("no api key"))?;
let mut request_number = 0; let mut request_number = 0;
let mut truncated = false;
let mut response: Response<AsyncBody>; let mut response: Response<AsyncBody>;
let mut spans: Vec<String> = spans.iter().map(|x| x.to_string()).collect(); let mut spans: Vec<String> = spans.iter().map(|x| x.to_string()).collect();
while request_number < MAX_RETRIES { while request_number < MAX_RETRIES {
@ -136,15 +141,18 @@ impl EmbeddingProvider for OpenAIEmbeddings {
self.executor.timer(delay).await; self.executor.timer(delay).await;
} }
StatusCode::BAD_REQUEST => { StatusCode::BAD_REQUEST => {
log::info!( // Only truncate if it hasnt been truncated before
"BAD REQUEST: {:?} {:?}", if !truncated {
&response.status(), for span in spans.iter_mut() {
response.body() *span = Self::truncate(span.clone());
); }
// Don't worry about delaying bad request, as we can assume truncated = true;
// we haven't been rate limited yet. } else {
for span in spans.iter_mut() { // If failing once already truncated, log the error and break the loop
*span = Self::truncate(span.to_string()); let mut body = String::new();
response.body_mut().read_to_string(&mut body).await?;
log::trace!("open ai bad request: {:?} {:?}", &response.status(), body);
break;
} }
} }
StatusCode::OK => { StatusCode::OK => {
@ -152,7 +160,7 @@ impl EmbeddingProvider for OpenAIEmbeddings {
response.body_mut().read_to_string(&mut body).await?; response.body_mut().read_to_string(&mut body).await?;
let response: OpenAIEmbeddingResponse = serde_json::from_str(&body)?; let response: OpenAIEmbeddingResponse = serde_json::from_str(&body)?;
log::info!( log::trace!(
"openai embedding completed. tokens: {:?}", "openai embedding completed. tokens: {:?}",
response.usage.total_tokens response.usage.total_tokens
); );