mirror of
https://github.com/wez/wezterm.git
synced 2024-12-22 21:01:36 +03:00
trim heap usage
I spent a few hours in heap profilers. What I found was: * Inefficient use of heap when building up runs of `Action::Print(char)`. -> Solve by adding `Action::PrintString(String)` and accumulating utf8 bytes rather than u32 codepoints. * Inefficient use of heap when building Quad buffers: the default exponential growth of `Vec` tended to waste 40%-75% of the allocated capacity, and since we could keep ~1024 of these in cache, there's a lot of potential for waste. -> Solve by bounding the growth to 64 at a time. This has similar characteristics to exponential growth at the default 80x24 terminal size. May need to add a config option for this step size for users with very large terminals. * Lazy eviction from the LFU caches. The underlying cache advisor is somewhat probabilistic and has a minimum cache size of 256, making it difficult to maintain low heap utilization. -> Solve by replacing it with a very simple LFU algorithm. It doesn't seem to hurt much at the default terminal size with the default cache sizes. If we make the cache sizes smaller, its overhead is reduced. Some further experimentation is needed to adjust defaults, but this should help reduce heap usage. refs: https://github.com/wez/wezterm/issues/2626
This commit is contained in:
parent
5dd1f513f9
commit
35ce2fe74d
10
Cargo.lock
generated
10
Cargo.lock
generated
@ -433,15 +433,6 @@ version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
|
||||
|
||||
[[package]]
|
||||
name = "cache-advisor"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11788ff413a0fe136b34756f5cce23a5a70fb28c18eccba75c5a6f9f7a95ad38"
|
||||
dependencies = [
|
||||
"crossbeam-queue",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cache-padded"
|
||||
version = "1.2.0"
|
||||
@ -5618,7 +5609,6 @@ dependencies = [
|
||||
"anyhow",
|
||||
"benchmarking",
|
||||
"bitflags",
|
||||
"cache-advisor",
|
||||
"cc",
|
||||
"chrono",
|
||||
"clap 4.0.15",
|
||||
|
@ -17,7 +17,7 @@ exclude = [
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
# debug = 1
|
||||
# debug = 2
|
||||
|
||||
[profile.dev]
|
||||
# https://jakedeichert.com/blog/reducing-rust-incremental-compilation-times-on-macos-by-70-percent/
|
||||
|
@ -171,7 +171,7 @@ fn parse_buffered_data(pane_id: PaneId, dead: &Arc<AtomicBool>, mut rx: FileDesc
|
||||
}
|
||||
_ => {}
|
||||
};
|
||||
actions.push(action);
|
||||
action.append_to(&mut actions);
|
||||
|
||||
if flush && !actions.is_empty() {
|
||||
send_actions_to_mux(pane_id, dead, std::mem::take(&mut actions));
|
||||
|
@ -227,6 +227,11 @@ impl<'a> Performer<'a> {
|
||||
}
|
||||
match action {
|
||||
Action::Print(c) => self.print(c),
|
||||
Action::PrintString(s) => {
|
||||
for c in s.chars() {
|
||||
self.print(c)
|
||||
}
|
||||
}
|
||||
Action::Control(code) => self.control(code),
|
||||
Action::DeviceControl(ctrl) => self.device_control(ctrl),
|
||||
Action::OperatingSystemCommand(osc) => self.osc_dispatch(*osc),
|
||||
|
@ -28,6 +28,8 @@ use vtparse::CsiParam;
|
||||
pub enum Action {
|
||||
/// Send a single printable character to the display
|
||||
Print(char),
|
||||
/// Send a string of printable characters to the display.
|
||||
PrintString(String),
|
||||
/// A C0 or C1 control code
|
||||
Control(ControlCode),
|
||||
/// Device control. This is uncommon wrt. terminal emulation.
|
||||
@ -45,6 +47,32 @@ pub enum Action {
|
||||
KittyImage(Box<KittyImage>),
|
||||
}
|
||||
|
||||
impl Action {
|
||||
/// Append this `Action` to a `Vec<Action>`.
|
||||
/// If this `Action` is `Print` and the last element is `Print` or
|
||||
/// `PrintString` then the elements are combined into `PrintString`
|
||||
/// to reduce heap utilization.
|
||||
pub fn append_to(self, dest: &mut Vec<Self>) {
|
||||
if let Action::Print(c) = &self {
|
||||
match dest.last_mut() {
|
||||
Some(Action::PrintString(s)) => {
|
||||
s.push(*c);
|
||||
return;
|
||||
}
|
||||
Some(Action::Print(prior)) => {
|
||||
let mut s = prior.to_string();
|
||||
dest.pop();
|
||||
s.push(*c);
|
||||
dest.push(Action::PrintString(s));
|
||||
return;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
dest.push(self);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, target_pointer_width = "64"))]
|
||||
#[test]
|
||||
fn action_size() {
|
||||
@ -61,6 +89,7 @@ impl Display for Action {
|
||||
fn fmt(&self, f: &mut Formatter) -> Result<(), FmtError> {
|
||||
match self {
|
||||
Action::Print(c) => write!(f, "{}", c),
|
||||
Action::PrintString(s) => write!(f, "{}", s),
|
||||
Action::Control(c) => f.write_char(*c as u8 as char),
|
||||
Action::DeviceControl(c) => c.fmt(f),
|
||||
Action::OperatingSystemCommand(osc) => osc.fmt(f),
|
||||
|
@ -33,7 +33,6 @@ cc = "1.0"
|
||||
[dependencies]
|
||||
anyhow = "1.0"
|
||||
bitflags = "1.3"
|
||||
cache-advisor = "1.0"
|
||||
chrono = {version="0.4", features=["unstable-locales"]}
|
||||
clap = {version="4.0", features=["derive"]}
|
||||
codec = { path = "../codec" }
|
||||
|
@ -1,5 +1,4 @@
|
||||
#![allow(dead_code)]
|
||||
use cache_advisor::CacheAdvisor;
|
||||
use config::ConfigHandle;
|
||||
use fnv::FnvHashMap;
|
||||
use std::borrow::Borrow;
|
||||
@ -14,18 +13,51 @@ const ENTRY_PERCENT: u8 = 20;
|
||||
|
||||
pub type CapFunc = fn(&ConfigHandle) -> usize;
|
||||
|
||||
struct ValueWithFreq<V> {
|
||||
value: V,
|
||||
freq: u16,
|
||||
}
|
||||
|
||||
impl<'a, V: 'a> ValueWithFreq<V> {
|
||||
/// A very basic LFU algorithm.
|
||||
/// If we have a known latest key, just return it.
|
||||
/// Otherwise, find the key with the lowest freq by simply
|
||||
/// iterating the entire cache.
|
||||
/// For large cache sizes, this isn't great.
|
||||
pub fn lfu<K: Clone + 'a>(
|
||||
latest: &mut Option<K>,
|
||||
iter: impl Iterator<Item = (&'a K, &'a ValueWithFreq<V>)>,
|
||||
) -> Option<K> {
|
||||
if let Some(key) = latest.take() {
|
||||
return Some(key);
|
||||
}
|
||||
let mut lfu = None;
|
||||
for (k, ValueWithFreq { freq, .. }) in iter {
|
||||
if let Some((other_key, other_freq)) = lfu.take() {
|
||||
if freq < other_freq {
|
||||
lfu.replace((k, freq));
|
||||
} else {
|
||||
lfu.replace((other_key, other_freq));
|
||||
}
|
||||
} else {
|
||||
lfu.replace((k, freq));
|
||||
}
|
||||
}
|
||||
|
||||
lfu.map(|(k, _)| k.clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// A cache using a Least-Frequently-Used eviction policy.
|
||||
/// If K is u64 you should use LfuCacheU64 instead as it has
|
||||
/// less overhead.
|
||||
pub struct LfuCache<K, V> {
|
||||
hit: &'static str,
|
||||
miss: &'static str,
|
||||
key_to_id: HashMap<K, u64>,
|
||||
map: FnvHashMap<u64, (K, V)>,
|
||||
next_id: u64,
|
||||
advisor: CacheAdvisor,
|
||||
map: HashMap<K, ValueWithFreq<V>>,
|
||||
cap: usize,
|
||||
cap_func: CapFunc,
|
||||
latest: Option<K>,
|
||||
}
|
||||
|
||||
impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
|
||||
@ -39,12 +71,10 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
|
||||
Self {
|
||||
hit,
|
||||
miss,
|
||||
key_to_id: HashMap::with_capacity(cap),
|
||||
map: FnvHashMap::default(),
|
||||
advisor: CacheAdvisor::new(cap, ENTRY_PERCENT),
|
||||
next_id: 0,
|
||||
map: HashMap::with_capacity(cap),
|
||||
cap,
|
||||
cap_func,
|
||||
latest: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -56,22 +86,12 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
|
||||
let new_cap = (self.cap_func)(config);
|
||||
if new_cap != self.cap {
|
||||
self.cap = new_cap;
|
||||
self.clear();
|
||||
self.map = HashMap::with_capacity(new_cap);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.map.clear();
|
||||
self.key_to_id.clear();
|
||||
self.advisor = CacheAdvisor::new(self.cap, ENTRY_PERCENT);
|
||||
}
|
||||
|
||||
fn process_evictions(&mut self, evict: &[(u64, usize)]) {
|
||||
for (evict_id, _cost) in evict {
|
||||
if let Some((evict_key, _v)) = self.map.remove(&evict_id) {
|
||||
self.key_to_id.remove(&evict_key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get<'a, Q: ?Sized>(&'a mut self, k: &Q) -> Option<&'a V>
|
||||
@ -79,37 +99,36 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
|
||||
K: Borrow<Q>,
|
||||
Q: Hash + Eq,
|
||||
{
|
||||
let id = match self.key_to_id.get(k) {
|
||||
Some(id) => *id,
|
||||
match self.map.get_mut(k) {
|
||||
None => {
|
||||
metrics::histogram!(self.miss, 1.0);
|
||||
return None;
|
||||
metrics::histogram!(self.miss, 1.);
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
let evict = self.advisor.accessed(id, 1);
|
||||
self.process_evictions(&evict);
|
||||
|
||||
metrics::histogram!(self.hit, 1.);
|
||||
|
||||
self.map.get(&id).map(|(_k, v)| v)
|
||||
Some(ValueWithFreq { value, freq }) => {
|
||||
metrics::histogram!(self.hit, 1.);
|
||||
*freq = freq.saturating_add(1);
|
||||
match &self.latest {
|
||||
Some(latest) if latest.borrow() == k => {
|
||||
self.latest.take();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Some(value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn put(&mut self, k: K, v: V) -> Option<V> {
|
||||
let id = match self.key_to_id.get(&k) {
|
||||
Some(id) => *id,
|
||||
None => {
|
||||
let id = self.next_id;
|
||||
self.next_id += 1;
|
||||
self.key_to_id.insert(k.clone(), id);
|
||||
id
|
||||
let prior = self.map.remove(&k);
|
||||
if self.map.len() >= self.cap {
|
||||
let lfu = ValueWithFreq::lfu(&mut self.latest, self.map.iter());
|
||||
if let Some(key) = lfu {
|
||||
self.map.remove(&key);
|
||||
}
|
||||
};
|
||||
|
||||
let evict = self.advisor.accessed(id, 1);
|
||||
self.process_evictions(&evict);
|
||||
|
||||
self.map.insert(id, (k, v)).map(|(_k, v)| v)
|
||||
}
|
||||
self.latest.replace(k.clone());
|
||||
self.map.insert(k, ValueWithFreq { value: v, freq: 0 });
|
||||
prior.map(|ent| ent.value)
|
||||
}
|
||||
}
|
||||
|
||||
@ -118,10 +137,10 @@ impl<K: Hash + Eq + Clone, V> LfuCache<K, V> {
|
||||
pub struct LfuCacheU64<V> {
|
||||
hit: &'static str,
|
||||
miss: &'static str,
|
||||
map: FnvHashMap<u64, V>,
|
||||
advisor: CacheAdvisor,
|
||||
map: FnvHashMap<u64, ValueWithFreq<V>>,
|
||||
cap: usize,
|
||||
cap_func: CapFunc,
|
||||
latest: Option<u64>,
|
||||
}
|
||||
|
||||
impl<V> LfuCacheU64<V> {
|
||||
@ -136,9 +155,9 @@ impl<V> LfuCacheU64<V> {
|
||||
hit,
|
||||
miss,
|
||||
map: FnvHashMap::default(),
|
||||
advisor: CacheAdvisor::new(cap, ENTRY_PERCENT),
|
||||
cap,
|
||||
cap_func,
|
||||
latest: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -150,46 +169,64 @@ impl<V> LfuCacheU64<V> {
|
||||
let new_cap = (self.cap_func)(config);
|
||||
if new_cap != self.cap {
|
||||
self.cap = new_cap;
|
||||
self.clear();
|
||||
self.map = FnvHashMap::default();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.map.clear();
|
||||
self.advisor = CacheAdvisor::new(self.cap, ENTRY_PERCENT);
|
||||
}
|
||||
|
||||
fn process_evictions(&mut self, evict: &[(u64, usize)]) {
|
||||
for (evict_id, _cost) in evict {
|
||||
self.map.remove(&evict_id);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get(&mut self, id: &u64) -> Option<&V> {
|
||||
if !self.map.contains_key(&id) {
|
||||
metrics::histogram!(self.miss, 1.0);
|
||||
return None;
|
||||
match self.map.get_mut(&id) {
|
||||
None => {
|
||||
metrics::histogram!(self.miss, 1.0);
|
||||
None
|
||||
}
|
||||
Some(ValueWithFreq { value, freq }) => {
|
||||
metrics::histogram!(self.hit, 1.);
|
||||
*freq = freq.saturating_add(1);
|
||||
match &self.latest {
|
||||
Some(latest) if latest == id => {
|
||||
self.latest.take();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Some(value)
|
||||
}
|
||||
}
|
||||
let evict = self.advisor.accessed(*id, 1);
|
||||
self.process_evictions(&evict);
|
||||
metrics::histogram!(self.hit, 1.);
|
||||
self.map.get(&id)
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self, id: &u64) -> Option<&mut V> {
|
||||
if !self.map.contains_key(&id) {
|
||||
metrics::histogram!(self.miss, 1.0);
|
||||
return None;
|
||||
match self.map.get_mut(&id) {
|
||||
None => {
|
||||
metrics::histogram!(self.miss, 1.0);
|
||||
None
|
||||
}
|
||||
Some(ValueWithFreq { value, freq }) => {
|
||||
metrics::histogram!(self.hit, 1.);
|
||||
*freq = freq.saturating_add(1);
|
||||
match &self.latest {
|
||||
Some(latest) if latest == id => {
|
||||
self.latest.take();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Some(value)
|
||||
}
|
||||
}
|
||||
let evict = self.advisor.accessed(*id, 1);
|
||||
self.process_evictions(&evict);
|
||||
metrics::histogram!(self.hit, 1.);
|
||||
self.map.get_mut(&id)
|
||||
}
|
||||
|
||||
pub fn put(&mut self, id: u64, v: V) -> Option<V> {
|
||||
let evict = self.advisor.accessed(id, 1);
|
||||
self.process_evictions(&evict);
|
||||
self.map.insert(id, v)
|
||||
let prior = self.map.remove(&id);
|
||||
if self.map.len() >= self.cap {
|
||||
let lfu = ValueWithFreq::lfu(&mut self.latest, self.map.iter());
|
||||
if let Some(key) = lfu {
|
||||
self.map.remove(&key);
|
||||
}
|
||||
}
|
||||
self.latest.replace(id);
|
||||
self.map.insert(id, ValueWithFreq { value: v, freq: 0 });
|
||||
prior.map(|ent| ent.value)
|
||||
}
|
||||
}
|
||||
|
@ -179,6 +179,24 @@ impl TripleLayerQuadAllocatorTrait for HeapQuadAllocator {
|
||||
};
|
||||
|
||||
let idx = verts.len();
|
||||
/* Explicitly manage growth when needed.
|
||||
* Experiments have shown that relying on the default exponential
|
||||
* growth of the underlying Vec can waste 40%-75% of the capacity,
|
||||
* and since HeapQuadAllocators are cached, that
|
||||
* causes a lot of the heap to be wasted.
|
||||
* Here we use exponential growth until we reach 64 and then
|
||||
* increment by 64.
|
||||
* This strikes a reasonable balance with exponential growth;
|
||||
* the default 80x24 size terminal tends to peak out around 640
|
||||
* elements which has a similar number of allocation steps to
|
||||
* exponential growth while not wasting as much for cases that
|
||||
* use less memory and that would otherwise get rounded up
|
||||
* to the same peak.
|
||||
* May potentially be worth a config option to tune this increment.
|
||||
*/
|
||||
if idx >= verts.capacity() {
|
||||
verts.reserve_exact(verts.capacity().next_power_of_two().min(64));
|
||||
}
|
||||
verts.resize_with(verts.len() + VERTICES_PER_CELL, Vertex::default);
|
||||
|
||||
Ok(Quad {
|
||||
|
@ -422,6 +422,7 @@ pub fn parse_status_text(text: &str, default_cell: CellAttributes) -> Line {
|
||||
}
|
||||
match action {
|
||||
Action::Print(c) => print_buffer.push(c),
|
||||
Action::PrintString(s) => print_buffer.push_str(&s),
|
||||
Action::Control(c) => {
|
||||
flush_print(&mut print_buffer, &mut cells, &pen);
|
||||
match c {
|
||||
|
Loading…
Reference in New Issue
Block a user