From 7b73e2824b9f1f4585f4ac6d7e3518745a648244 Mon Sep 17 00:00:00 2001 From: Piotr Osiewicz <24362066+osiewicz@users.noreply.github.com> Date: Fri, 8 Mar 2024 14:40:26 +0100 Subject: [PATCH] fs: allocate backing storage once in Fs::load (#9020) `futures_lite::AsyncReadExt::read_to_string` (that we use in `RealFs::load`) explicitly does not allocate memory for String contents up front, which leads to excessive reallocations. That reallocation time is a significant contributor to the time we spend loading files (esp large ones). For example, out of ~1s that it takes to open up a 650Mb ASCII buffer on my machine (after changes related to fingerprinting from #9007), 350ms is spent in `RealFs::load`. This change slashes that figure to ~110ms, which is still *a lot*. About 60ms out of 110ms remaining is spent zeroing memory. Sadly, `AsyncReadExt` API forces us to zero a buffer we're reading into (whether it's via read_to_string or read_exact), but at the very least this commit alleviates unnecessary reallocations. We could probably use something like [simdutf8](https://docs.rs/simdutf8/latest/simdutf8/) to speed up UTF8 validation in this method as well, though that takes only about ~18ms out of 110ms, so while it is significant, I've left that out for now. Memory zeroing is a bigger problem at this point. Before: ![image](https://github.com/zed-industries/zed/assets/24362066/5e53c004-8a02-47db-bc75-04cb4113a6bc) After: ![image](https://github.com/zed-industries/zed/assets/24362066/00099032-d647-4683-b290-eaeb969cac4a) /cc @as-cii Release Notes: - Improved performance when loading large files. --- crates/fs/src/fs.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/crates/fs/src/fs.rs b/crates/fs/src/fs.rs index 9c6c6092e3..5b07619746 100644 --- a/crates/fs/src/fs.rs +++ b/crates/fs/src/fs.rs @@ -223,9 +223,11 @@ impl Fs for RealFs { async fn load(&self, path: &Path) -> Result { let mut file = smol::fs::File::open(path).await?; - let mut text = String::new(); - file.read_to_string(&mut text).await?; - Ok(text) + // We use `read_exact` here instead of `read_to_string` as the latter is *very* + // happy to reallocate often, which comes into play when we're loading large files. + let mut storage = vec![0; file.metadata().await?.len() as usize]; + file.read_exact(&mut storage).await?; + Ok(String::from_utf8(storage)?) } async fn atomic_write(&self, path: PathBuf, data: String) -> Result<()> {