1
1
mirror of https://github.com/wez/wezterm.git synced 2024-09-19 18:57:59 +03:00

perf: cache quads by line

Introduces a heap-based quad allocator that we cache on a per-line
basis, so if a line is unchanged we simply need to copy the previously
computed set of quads for it into the gpu quad buffer.

The results are encouraging wrt. constructing those quads; the
`quad_buffer_apply` is the cost of the copy operation, compare with
`render_screen_line_opengl` which is the cost of computing the quads;
it's 300x better at the p50 and >100x better at p95 for a full-screen
updating program:

full 2880x1800 screen top:

```
STAT                                             p50      p75      p95
Key(quad_buffer_apply)                           2.26µs   5.22µs   9.60µs
Key(render_screen_line_opengl)                   610.30µs 905.22µs 1.33ms
Key(gui.paint.opengl)                            35.39ms  37.75ms  45.88ms
```

However, the extra buffering does increase the latency of
`gui.paint.opengl` (the overall cost of painting a frame); contrast the
above with the latency in the same scenario with the current `main`
(rather than this branch):

```
Key(gui.paint.opengl)                            19.14ms  21.10ms  28.18ms
```

Note that for an idle screen this latency is ~1.5ms but that is also true
of `main`.

While the overall latency in the histogram isn't a slam dunk,
running `time cat bigfile` is ~10% faster on my mac.

I'm sure there's something that can be shaved off to get a more
convincing win.
This commit is contained in:
Wez Furlong 2022-08-23 06:37:12 -07:00
parent 9aaa4c4db6
commit 00ddfbf9b8
6 changed files with 207 additions and 13 deletions

View File

@ -9,7 +9,7 @@ use wezterm_term::{Line, StableRowIndex, Terminal};
/// Describes the location of the cursor
#[derive(
Debug, Default, Copy, Clone, Eq, PartialEq, Deserialize, Serialize, FromDynamic, ToDynamic,
Debug, Default, Copy, Clone, Hash, Eq, PartialEq, Deserialize, Serialize, FromDynamic, ToDynamic,
)]
pub struct StableCursorPosition {
pub x: usize,

View File

@ -33,7 +33,7 @@ pub enum Position {
}
#[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))]
#[derive(Debug, Clone, Copy, PartialEq, Eq, FromDynamic, ToDynamic)]
#[derive(Debug, Clone, Hash, Copy, PartialEq, Eq, FromDynamic, ToDynamic)]
pub enum CursorVisibility {
Hidden,
Visible,

View File

@ -135,20 +135,95 @@ impl<'a> Quad<'a> {
pub trait QuadAllocator {
fn allocate(&mut self) -> anyhow::Result<Quad>;
fn vertices(&self) -> &[Vertex];
fn extend_with(&mut self, vertices: &[Vertex]);
}
pub trait TripleLayerQuadAllocatorTrait {
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad>;
fn vertices(&self, layer_num: usize) -> &[Vertex];
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]);
}
#[derive(Default)]
pub struct HeapQuadAllocator {
layer0: Vec<Vertex>,
layer1: Vec<Vertex>,
layer2: Vec<Vertex>,
}
impl HeapQuadAllocator {
pub fn apply_to(&self, other: &mut TripleLayerQuadAllocator) -> anyhow::Result<()> {
let start = std::time::Instant::now();
for (layer_num, verts) in [(0, &self.layer0), (1, &self.layer1), (2, &self.layer2)] {
other.extend_with(layer_num, verts);
}
metrics::histogram!("quad_buffer_apply", start.elapsed());
Ok(())
}
}
impl TripleLayerQuadAllocatorTrait for HeapQuadAllocator {
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad> {
let verts = match layer_num {
0 => &mut self.layer0,
1 => &mut self.layer1,
2 => &mut self.layer2,
_ => unreachable!(),
};
let idx = verts.len();
verts.resize_with(verts.len() + VERTICES_PER_CELL, Vertex::default);
Ok(Quad {
vert: &mut verts[idx..idx + VERTICES_PER_CELL],
})
}
fn vertices(&self, layer_num: usize) -> &[Vertex] {
match layer_num {
0 => &self.layer0,
1 => &self.layer1,
2 => &self.layer2,
_ => unreachable!(),
}
}
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]) {
let verts = match layer_num {
0 => &mut self.layer0,
1 => &mut self.layer1,
2 => &mut self.layer2,
_ => unreachable!(),
};
verts.extend_from_slice(vertices);
}
}
pub enum TripleLayerQuadAllocator<'a> {
Gpu(BorrowedLayers<'a>),
Heap(&'a mut HeapQuadAllocator),
}
impl<'a> TripleLayerQuadAllocatorTrait for TripleLayerQuadAllocator<'a> {
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad> {
match self {
Self::Gpu(b) => b.allocate(layer_num),
Self::Heap(h) => h.allocate(layer_num),
}
}
fn vertices(&self, layer_num: usize) -> &[Vertex] {
match self {
Self::Gpu(b) => b.vertices(layer_num),
Self::Heap(h) => h.vertices(layer_num),
}
}
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]) {
match self {
Self::Gpu(b) => b.extend_with(layer_num, vertices),
Self::Heap(h) => h.extend_with(layer_num, vertices),
}
}
}

View File

@ -43,6 +43,24 @@ impl<'a> QuadAllocator for MappedQuads<'a> {
Ok(quad)
}
fn vertices(&self) -> &[Vertex] {
&self.mapping[0..*self.next]
}
fn extend_with(&mut self, vertices: &[Vertex]) {
let idx = *self.next;
// idx and next are number of quads, so divide by number of vertices
*self.next += vertices.len() / VERTICES_PER_CELL;
// Only copy in if there is enough room.
// We'll detect the out of space condition at the end of
// the render pass.
let idx = idx * VERTICES_PER_CELL;
let len = self.capacity * VERTICES_PER_CELL;
if idx + vertices.len() < len {
self.mapping[idx..idx + vertices.len()].copy_from_slice(vertices);
}
}
}
pub struct TripleVertexBuffer {
@ -192,6 +210,12 @@ impl<'a> TripleLayerQuadAllocatorTrait for BorrowedLayers<'a> {
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad> {
self.0[layer_num].allocate()
}
fn vertices(&self, layer_num: usize) -> &[Vertex] {
self.0[layer_num].vertices()
}
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]) {
self.0[layer_num].extend_with(vertices)
}
}
pub struct RenderState {

View File

@ -22,7 +22,9 @@ use crate::termwindow::background::{
};
use crate::termwindow::keyevent::{KeyTableArgs, KeyTableState};
use crate::termwindow::modal::Modal;
use crate::termwindow::render::{LineToElementShape, LineToElementShapeKey};
use crate::termwindow::render::{
LineToElementKey, LineToElementShape, LineToElementShapeKey, LineToElementValue,
};
use ::wezterm_term::input::{ClickPosition, MouseButton as TMB};
use ::window::*;
use anyhow::{anyhow, ensure, Context};
@ -399,6 +401,7 @@ pub struct TermWindow {
shape_cache:
RefCell<LruCache<ShapeCacheKey, anyhow::Result<Rc<Vec<ShapedInfo<SrgbTexture2d>>>>>>,
line_to_ele_shape_cache: RefCell<LruCache<LineToElementShapeKey, Rc<Vec<LineToElementShape>>>>,
line_to_ele_cache: RefCell<LruCache<LineToElementKey, LineToElementValue>>,
last_status_call: Instant,
cursor_blink_state: RefCell<ColorEase>,
@ -687,6 +690,11 @@ impl TermWindow {
"line_to_ele_shape_cache.miss.rate",
65536,
)),
line_to_ele_cache: RefCell::new(LruCache::new(
"line_to_ele_cache.hit.rate",
"line_to_ele_cache.miss.rate",
65536,
)),
last_status_call: Instant::now(),
cursor_blink_state: RefCell::new(ColorEase::new(
config.cursor_blink_rate,

View File

@ -3,7 +3,9 @@ use crate::colorease::{ColorEase, ColorEaseUniform};
use crate::customglyph::{BlockKey, *};
use crate::glium::texture::SrgbTexture2d;
use crate::glyphcache::{CachedGlyph, GlyphCache};
use crate::quad::{Quad, QuadAllocator, TripleLayerQuadAllocator, TripleLayerQuadAllocatorTrait};
use crate::quad::{
HeapQuadAllocator, Quad, QuadAllocator, TripleLayerQuadAllocator, TripleLayerQuadAllocatorTrait,
};
use crate::renderstate::BorrowedLayers;
use crate::shapecache::*;
use crate::tabbar::{TabBarItem, TabEntry};
@ -178,7 +180,16 @@ pub struct LineToElementShape {
pub struct LineToElementKey {
pub shape_key: LineToElementShapeKey,
/// Only set if cursor.y == stable_row
pub cursor: Option<StableRowIndex>,
pub cursor: Option<StableCursorPosition>,
pub selection: Range<usize>,
pub left: u32,
pub top: u32,
pub is_active: bool,
}
pub struct LineToElementValue {
pub buf: HeapQuadAllocator,
pub expires: Option<Instant>,
}
pub struct RenderScreenLineOpenGLParams<'a> {
@ -336,6 +347,7 @@ impl super::TermWindow {
self.invalidate_modal();
self.shape_cache.borrow_mut().clear();
self.line_to_ele_shape_cache.borrow_mut().clear();
self.line_to_ele_cache.borrow_mut().clear();
} else {
log::error!("paint_opengl_pass failed: {:#}", err);
break 'pass;
@ -2215,11 +2227,92 @@ impl super::TermWindow {
self.build_line_element_shape(&params, &shape_key)
}
pub fn render_screen_line_opengl(
&self,
params: RenderScreenLineOpenGLParams,
layers: &mut TripleLayerQuadAllocator,
) -> anyhow::Result<()> {
if params.line.is_double_height_bottom() {
// The top and bottom lines are required to have the same content.
// For the sake of simplicity, we render both of them as part of
// rendering the top row, so we have nothing more to do here.
return Ok(());
}
let ele_key = LineToElementKey {
shape_key: LineToElementShapeKey {
pane_id: params
.pane
.map(|p| p.pane_id())
.unwrap_or(PaneId::max_value()),
seqno: params.line.current_seqno(),
stable_line_idx: params
.stable_line_idx
.unwrap_or(StableRowIndex::max_value()),
composing: if Some(params.cursor.y) == params.stable_line_idx {
if let DeadKeyStatus::Composing(composing) = &self.dead_key_status {
Some((params.cursor.x, composing.to_string()))
} else {
None
}
} else {
None
},
},
cursor: if Some(params.cursor.y) == params.stable_line_idx {
Some(*params.cursor)
} else {
None
},
selection: params.selection.clone(),
left: params.left_pixel_x.ceil() as u32,
top: params.top_pixel_y.ceil() as u32,
is_active: params.is_active,
};
if let Some(value) = self.line_to_ele_cache.borrow_mut().get(&ele_key) {
let expired = value.expires.map(|i| Instant::now() >= i).unwrap_or(false);
if !expired {
value.buf.apply_to(layers)?;
return Ok(());
}
}
let next_due = self.has_animation.borrow().clone();
let mut buf_layer = HeapQuadAllocator::default();
self.render_screen_line_opengl_impl(
params,
&mut TripleLayerQuadAllocator::Heap(&mut buf_layer),
)?;
buf_layer.apply_to(layers)?;
let expires = if let Some(has_anim) = self.has_animation.borrow().as_ref() {
if Some(*has_anim) == next_due {
None
} else {
Some(*has_anim)
}
} else {
None
};
self.line_to_ele_cache.borrow_mut().put(
ele_key,
LineToElementValue {
buf: buf_layer,
expires,
},
);
Ok(())
}
/// "Render" a line of the terminal screen into the vertex buffer.
/// This is nominally a matter of setting the fg/bg color and the
/// texture coordinates for a given glyph. There's a little bit
/// of extra complexity to deal with multi-cell glyphs.
pub fn render_screen_line_opengl(
fn render_screen_line_opengl_impl(
&self,
params: RenderScreenLineOpenGLParams,
layers: &mut TripleLayerQuadAllocator,
@ -2240,13 +2333,6 @@ impl super::TermWindow {
1.0
};
if params.line.is_double_height_bottom() {
// The top and bottom lines are required to have the same content.
// For the sake of simplicity, we render both of them as part of
// rendering the top row, so we have nothing more to do here.
return Ok(());
}
let height_scale = if params.line.is_double_height_top() {
2.0
} else {
@ -3257,6 +3343,7 @@ impl super::TermWindow {
pub fn recreate_texture_atlas(&mut self, size: Option<usize>) -> anyhow::Result<()> {
self.shape_cache.borrow_mut().clear();
self.line_to_ele_shape_cache.borrow_mut().clear();
self.line_to_ele_cache.borrow_mut().clear();
if let Some(render_state) = self.render_state.as_mut() {
render_state.recreate_texture_atlas(&self.fonts, &self.render_metrics, size)?;
}