1
1
mirror of https://github.com/wez/wezterm.git synced 2024-12-23 05:12:40 +03:00

trim heap usage

I spent a few hours in heap profilers.  What I found was:

* Inefficient use of heap when building up runs of
  `Action::Print(char)`.
    -> Solve by adding `Action::PrintString(String)`
  and accumulating utf8 bytes rather than u32 codepoints.
* Inefficient use of heap when building Quad buffers: the default
  exponential growth of `Vec` tended to waste 40%-75% of the allocated
  capacity, and since we could keep ~1024 of these in cache, there's
  a lot of potential for waste.
   -> Solve by bounding the growth to 64 at a time.  This has similar
   characteristics to exponential growth at the default 80x24 terminal
   size.  May need to add a config option for this step size for users
   with very large terminals.
* Lazy eviction from the LFU caches. The underlying cache advisor is
  somewhat probabilistic and has a minimum cache size of 256, making
  it difficult to maintain low heap utilization.
   -> Solve by replacing it with a very simple LFU algorithm. It doesn't
   seem to hurt much at the default terminal size with the default
   cache sizes.  If we make the cache sizes smaller, its overhead is
   reduced.

Some further experimentation is needed to adjust defaults, but this
should help reduce heap usage.

refs: https://github.com/wez/wezterm/issues/2626
This commit is contained in:
Wez Furlong 2022-10-22 17:10:36 -07:00
parent 5dd1f513f9
commit 35ce2fe74d
9 changed files with 165 additions and 86 deletions

10
Cargo.lock generated
View File

@ -433,15 +433,6 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
[[package]]
name = "cache-advisor"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11788ff413a0fe136b34756f5cce23a5a70fb28c18eccba75c5a6f9f7a95ad38"
dependencies = [
"crossbeam-queue",
]
[[package]]
name = "cache-padded"
version = "1.2.0"
@ -5618,7 +5609,6 @@ dependencies = [
"anyhow",
"benchmarking",
"bitflags",
"cache-advisor",
"cc",
"chrono",
"clap 4.0.15",

View File

@ -17,7 +17,7 @@ exclude = [
[profile.release]
opt-level = 3
# debug = 1
# debug = 2
[profile.dev]
# https://jakedeichert.com/blog/reducing-rust-incremental-compilation-times-on-macos-by-70-percent/

View File

@ -171,7 +171,7 @@ fn parse_buffered_data(pane_id: PaneId, dead: &Arc<AtomicBool>, mut rx: FileDesc
}
_ => {}
};
actions.push(action);
action.append_to(&mut actions);
if flush && !actions.is_empty() {
send_actions_to_mux(pane_id, dead, std::mem::take(&mut actions));

View File

@ -227,6 +227,11 @@ impl<'a> Performer<'a> {
}
match action {
Action::Print(c) => self.print(c),
Action::PrintString(s) => {
for c in s.chars() {
self.print(c)
}
}
Action::Control(code) => self.control(code),
Action::DeviceControl(ctrl) => self.device_control(ctrl),
Action::OperatingSystemCommand(osc) => self.osc_dispatch(*osc),

View File

@ -28,6 +28,8 @@ use vtparse::CsiParam;
pub enum Action {
/// Send a single printable character to the display
Print(char),
/// Send a string of printable characters to the display.
PrintString(String),
/// A C0 or C1 control code
Control(ControlCode),
/// Device control. This is uncommon wrt. terminal emulation.
@ -45,6 +47,32 @@ pub enum Action {
KittyImage(Box<KittyImage>),
}
impl Action {
/// Append this `Action` to a `Vec<Action>`.
/// If this `Action` is `Print` and the last element is `Print` or
/// `PrintString` then the elements are combined into `PrintString`
/// to reduce heap utilization.
pub fn append_to(self, dest: &mut Vec<Self>) {
if let Action::Print(c) = &self {
match dest.last_mut() {
Some(Action::PrintString(s)) => {
s.push(*c);
return;
}
Some(Action::Print(prior)) => {
let mut s = prior.to_string();
dest.pop();
s.push(*c);
dest.push(Action::PrintString(s));
return;
}
_ => {}
}
}
dest.push(self);
}
}
#[cfg(all(test, target_pointer_width = "64"))]
#[test]
fn action_size() {
@ -61,6 +89,7 @@ impl Display for Action {
fn fmt(&self, f: &mut Formatter) -> Result<(), FmtError> {
match self {
Action::Print(c) => write!(f, "{}", c),
Action::PrintString(s) => write!(f, "{}", s),
Action::Control(c) => f.write_char(*c as u8 as char),
Action::DeviceControl(c) => c.fmt(f),
Action::OperatingSystemCommand(osc) => osc.fmt(f),

View File

@ -33,7 +33,6 @@ cc = "1.0"
[dependencies]
anyhow = "1.0"
bitflags = "1.3"
cache-advisor = "1.0"
chrono = {version="0.4", features=["unstable-locales"]}
clap = {version="4.0", features=["derive"]}
codec = { path = "../codec" }

View File

@ -1,5 +1,4 @@
#![allow(dead_code)]
use cache_advisor::CacheAdvisor;
use config::ConfigHandle;
use fnv::FnvHashMap;
use std::borrow::Borrow;
@ -14,18 +13,51 @@ const ENTRY_PERCENT: u8 = 20;
pub type CapFunc = fn(&ConfigHandle) -> usize;
struct ValueWithFreq<V> {
value: V,
freq: u16,
}
impl<'a, V: 'a> ValueWithFreq<V> {
/// A very basic LFU algorithm.
/// If we have a known latest key, just return it.
/// Otherwise, find the key with the lowest freq by simply
/// iterating the entire cache.
/// For large cache sizes, this isn't great.
pub fn lfu<K: Clone + 'a>(
latest: &mut Option<K>,
iter: impl Iterator<Item = (&'a K, &'a ValueWithFreq<V>)>,
) -> Option<K> {
if let Some(key) = latest.take() {
return Some(key);
}
let mut lfu = None;
for (k, ValueWithFreq { freq, .. }) in iter {
if let Some((other_key, other_freq)) = lfu.take() {
if freq < other_freq {
lfu.replace((k, freq));
} else {
lfu.replace((other_key, other_freq));
}
} else {
lfu.replace((k, freq));
}
}
lfu.map(|(k, _)| k.clone())
}
}
/// A cache using a Least-Frequently-Used eviction policy.
/// If K is u64 you should use LfuCacheU64 instead as it has
/// less overhead.
pub struct LfuCache<K, V> {
hit: &'static str,
miss: &'static str,
key_to_id: HashMap<K, u64>,
map: FnvHashMap<u64, (K, V)>,
next_id: u64,
advisor: CacheAdvisor,
map: HashMap<K, ValueWithFreq<V>>,
cap: usize,
cap_func: CapFunc,
latest: Option<K>,
}
impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
@ -39,12 +71,10 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
Self {
hit,
miss,
key_to_id: HashMap::with_capacity(cap),
map: FnvHashMap::default(),
advisor: CacheAdvisor::new(cap, ENTRY_PERCENT),
next_id: 0,
map: HashMap::with_capacity(cap),
cap,
cap_func,
latest: None,
}
}
@ -56,22 +86,12 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
let new_cap = (self.cap_func)(config);
if new_cap != self.cap {
self.cap = new_cap;
self.clear();
self.map = HashMap::with_capacity(new_cap);
}
}
pub fn clear(&mut self) {
self.map.clear();
self.key_to_id.clear();
self.advisor = CacheAdvisor::new(self.cap, ENTRY_PERCENT);
}
fn process_evictions(&mut self, evict: &[(u64, usize)]) {
for (evict_id, _cost) in evict {
if let Some((evict_key, _v)) = self.map.remove(&evict_id) {
self.key_to_id.remove(&evict_key);
}
}
}
pub fn get<'a, Q: ?Sized>(&'a mut self, k: &Q) -> Option<&'a V>
@ -79,37 +99,36 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
K: Borrow<Q>,
Q: Hash + Eq,
{
let id = match self.key_to_id.get(k) {
Some(id) => *id,
match self.map.get_mut(k) {
None => {
metrics::histogram!(self.miss, 1.0);
return None;
metrics::histogram!(self.miss, 1.);
None
}
};
let evict = self.advisor.accessed(id, 1);
self.process_evictions(&evict);
metrics::histogram!(self.hit, 1.);
self.map.get(&id).map(|(_k, v)| v)
Some(ValueWithFreq { value, freq }) => {
metrics::histogram!(self.hit, 1.);
*freq = freq.saturating_add(1);
match &self.latest {
Some(latest) if latest.borrow() == k => {
self.latest.take();
}
_ => {}
}
Some(value)
}
}
}
pub fn put(&mut self, k: K, v: V) -> Option<V> {
let id = match self.key_to_id.get(&k) {
Some(id) => *id,
None => {
let id = self.next_id;
self.next_id += 1;
self.key_to_id.insert(k.clone(), id);
id
let prior = self.map.remove(&k);
if self.map.len() >= self.cap {
let lfu = ValueWithFreq::lfu(&mut self.latest, self.map.iter());
if let Some(key) = lfu {
self.map.remove(&key);
}
};
let evict = self.advisor.accessed(id, 1);
self.process_evictions(&evict);
self.map.insert(id, (k, v)).map(|(_k, v)| v)
}
self.latest.replace(k.clone());
self.map.insert(k, ValueWithFreq { value: v, freq: 0 });
prior.map(|ent| ent.value)
}
}
@ -118,10 +137,10 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
pub struct LfuCacheU64<V> {
hit: &'static str,
miss: &'static str,
map: FnvHashMap<u64, V>,
advisor: CacheAdvisor,
map: FnvHashMap<u64, ValueWithFreq<V>>,
cap: usize,
cap_func: CapFunc,
latest: Option<u64>,
}
impl<V> LfuCacheU64<V> {
@ -136,9 +155,9 @@ impl<V> LfuCacheU64<V> {
hit,
miss,
map: FnvHashMap::default(),
advisor: CacheAdvisor::new(cap, ENTRY_PERCENT),
cap,
cap_func,
latest: None,
}
}
@ -150,46 +169,64 @@ impl<V> LfuCacheU64<V> {
let new_cap = (self.cap_func)(config);
if new_cap != self.cap {
self.cap = new_cap;
self.clear();
self.map = FnvHashMap::default();
}
}
pub fn clear(&mut self) {
self.map.clear();
self.advisor = CacheAdvisor::new(self.cap, ENTRY_PERCENT);
}
fn process_evictions(&mut self, evict: &[(u64, usize)]) {
for (evict_id, _cost) in evict {
self.map.remove(&evict_id);
}
}
pub fn get(&mut self, id: &u64) -> Option<&V> {
if !self.map.contains_key(&id) {
metrics::histogram!(self.miss, 1.0);
return None;
match self.map.get_mut(&id) {
None => {
metrics::histogram!(self.miss, 1.0);
None
}
Some(ValueWithFreq { value, freq }) => {
metrics::histogram!(self.hit, 1.);
*freq = freq.saturating_add(1);
match &self.latest {
Some(latest) if latest == id => {
self.latest.take();
}
_ => {}
}
Some(value)
}
}
let evict = self.advisor.accessed(*id, 1);
self.process_evictions(&evict);
metrics::histogram!(self.hit, 1.);
self.map.get(&id)
}
pub fn get_mut(&mut self, id: &u64) -> Option<&mut V> {
if !self.map.contains_key(&id) {
metrics::histogram!(self.miss, 1.0);
return None;
match self.map.get_mut(&id) {
None => {
metrics::histogram!(self.miss, 1.0);
None
}
Some(ValueWithFreq { value, freq }) => {
metrics::histogram!(self.hit, 1.);
*freq = freq.saturating_add(1);
match &self.latest {
Some(latest) if latest == id => {
self.latest.take();
}
_ => {}
}
Some(value)
}
}
let evict = self.advisor.accessed(*id, 1);
self.process_evictions(&evict);
metrics::histogram!(self.hit, 1.);
self.map.get_mut(&id)
}
pub fn put(&mut self, id: u64, v: V) -> Option<V> {
let evict = self.advisor.accessed(id, 1);
self.process_evictions(&evict);
self.map.insert(id, v)
let prior = self.map.remove(&id);
if self.map.len() >= self.cap {
let lfu = ValueWithFreq::lfu(&mut self.latest, self.map.iter());
if let Some(key) = lfu {
self.map.remove(&key);
}
}
self.latest.replace(id);
self.map.insert(id, ValueWithFreq { value: v, freq: 0 });
prior.map(|ent| ent.value)
}
}

View File

@ -179,6 +179,24 @@ impl TripleLayerQuadAllocatorTrait for HeapQuadAllocator {
};
let idx = verts.len();
/* Explicitly manage growth when needed.
* Experiments have shown that relying on the default exponential
* growth of the underlying Vec can waste 40%-75% of the capacity,
* and since HeapQuadAllocators are cached, that
* causes a lot of the heap to be wasted.
* Here we use exponential growth until we reach 64 and then
* increment by 64.
* This strikes a reasonable balance with exponential growth;
* the default 80x24 size terminal tends to peak out around 640
* elements which has a similar number of allocation steps to
* exponential growth while not wasting as much for cases that
* use less memory and that would otherwise get rounded up
* to the same peak.
* May potentially be worth a config option to tune this increment.
*/
if idx >= verts.capacity() {
verts.reserve_exact(verts.capacity().next_power_of_two().min(64));
}
verts.resize_with(verts.len() + VERTICES_PER_CELL, Vertex::default);
Ok(Quad {

View File

@ -422,6 +422,7 @@ pub fn parse_status_text(text: &str, default_cell: CellAttributes) -> Line {
}
match action {
Action::Print(c) => print_buffer.push(c),
Action::PrintString(s) => print_buffer.push_str(&s),
Action::Control(c) => {
flush_print(&mut print_buffer, &mut cells, &pen);
match c {