perf: ️ signed_url (#2159)

improve speed by using memoisation

# Description

Please include a summary of the changes and the related issue. Please
also include relevant motivation and context.

## Checklist before requesting a review

Please delete options that are not relevant.

- [ ] My code follows the style guidelines of this project
- [ ] I have performed a self-review of my code
- [ ] I have commented hard-to-understand areas
- [ ] I have ideally added tests that prove my fix is effective or that
my feature works
- [ ] New and existing unit tests pass locally with my changes
- [ ] Any dependent changes have been merged

## Screenshots (if appropriate):
This commit is contained in:
Stan Girard 2024-02-06 20:09:01 -08:00 committed by GitHub
parent 354c5879ab
commit 503654c07a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -42,6 +42,9 @@ def generate_source(result, brain):
# Initialize an empty list for sources
sources_list: List[Sources] = []
# Initialize a dictionary for storing generated URLs
generated_urls = {}
# Get source documents from the result, default to an empty list if not found
source_documents = result.get("source_documents", [])
@ -73,9 +76,16 @@ def generate_source(result, brain):
if is_url:
source_url = doc.metadata["original_file_name"]
else:
source_url = generate_file_signed_url(
f"{brain.brain_id}/{doc.metadata['file_name']}"
).get("signedURL", "")
file_path = f"{brain.brain_id}/{doc.metadata['file_name']}"
# Check if the URL has already been generated
if file_path in generated_urls:
source_url = generated_urls[file_path]
else:
source_url = generate_file_signed_url(file_path).get(
"signedURL", ""
)
# Store the generated URL
generated_urls[file_path] = source_url
# Append a new Sources object to the list
sources_list.append(