mirror of
https://github.com/wez/wezterm.git
synced 2024-09-19 18:57:59 +03:00
perf: cache quads by line
Introduces a heap-based quad allocator that we cache on a per-line basis, so if a line is unchanged we simply need to copy the previously computed set of quads for it into the gpu quad buffer. The results are encouraging wrt. constructing those quads; the `quad_buffer_apply` is the cost of the copy operation, compare with `render_screen_line_opengl` which is the cost of computing the quads; it's 300x better at the p50 and >100x better at p95 for a full-screen updating program: full 2880x1800 screen top: ``` STAT p50 p75 p95 Key(quad_buffer_apply) 2.26µs 5.22µs 9.60µs Key(render_screen_line_opengl) 610.30µs 905.22µs 1.33ms Key(gui.paint.opengl) 35.39ms 37.75ms 45.88ms ``` However, the extra buffering does increase the latency of `gui.paint.opengl` (the overall cost of painting a frame); contrast the above with the latency in the same scenario with the current `main` (rather than this branch): ``` Key(gui.paint.opengl) 19.14ms 21.10ms 28.18ms ``` Note that for an idle screen this latency is ~1.5ms but that is also true of `main`. While the overall latency in the histogram isn't a slam dunk, running `time cat bigfile` is ~10% faster on my mac. I'm sure there's something that can be shaved off to get a more convincing win.
This commit is contained in:
parent
9aaa4c4db6
commit
00ddfbf9b8
@ -9,7 +9,7 @@ use wezterm_term::{Line, StableRowIndex, Terminal};
|
||||
|
||||
/// Describes the location of the cursor
|
||||
#[derive(
|
||||
Debug, Default, Copy, Clone, Eq, PartialEq, Deserialize, Serialize, FromDynamic, ToDynamic,
|
||||
Debug, Default, Copy, Clone, Hash, Eq, PartialEq, Deserialize, Serialize, FromDynamic, ToDynamic,
|
||||
)]
|
||||
pub struct StableCursorPosition {
|
||||
pub x: usize,
|
||||
|
@ -33,7 +33,7 @@ pub enum Position {
|
||||
}
|
||||
|
||||
#[cfg_attr(feature = "use_serde", derive(Serialize, Deserialize))]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, FromDynamic, ToDynamic)]
|
||||
#[derive(Debug, Clone, Hash, Copy, PartialEq, Eq, FromDynamic, ToDynamic)]
|
||||
pub enum CursorVisibility {
|
||||
Hidden,
|
||||
Visible,
|
||||
|
@ -135,20 +135,95 @@ impl<'a> Quad<'a> {
|
||||
|
||||
pub trait QuadAllocator {
|
||||
fn allocate(&mut self) -> anyhow::Result<Quad>;
|
||||
fn vertices(&self) -> &[Vertex];
|
||||
fn extend_with(&mut self, vertices: &[Vertex]);
|
||||
}
|
||||
|
||||
pub trait TripleLayerQuadAllocatorTrait {
|
||||
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad>;
|
||||
fn vertices(&self, layer_num: usize) -> &[Vertex];
|
||||
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]);
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct HeapQuadAllocator {
|
||||
layer0: Vec<Vertex>,
|
||||
layer1: Vec<Vertex>,
|
||||
layer2: Vec<Vertex>,
|
||||
}
|
||||
|
||||
impl HeapQuadAllocator {
|
||||
pub fn apply_to(&self, other: &mut TripleLayerQuadAllocator) -> anyhow::Result<()> {
|
||||
let start = std::time::Instant::now();
|
||||
for (layer_num, verts) in [(0, &self.layer0), (1, &self.layer1), (2, &self.layer2)] {
|
||||
other.extend_with(layer_num, verts);
|
||||
}
|
||||
metrics::histogram!("quad_buffer_apply", start.elapsed());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl TripleLayerQuadAllocatorTrait for HeapQuadAllocator {
|
||||
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad> {
|
||||
let verts = match layer_num {
|
||||
0 => &mut self.layer0,
|
||||
1 => &mut self.layer1,
|
||||
2 => &mut self.layer2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let idx = verts.len();
|
||||
verts.resize_with(verts.len() + VERTICES_PER_CELL, Vertex::default);
|
||||
|
||||
Ok(Quad {
|
||||
vert: &mut verts[idx..idx + VERTICES_PER_CELL],
|
||||
})
|
||||
}
|
||||
|
||||
fn vertices(&self, layer_num: usize) -> &[Vertex] {
|
||||
match layer_num {
|
||||
0 => &self.layer0,
|
||||
1 => &self.layer1,
|
||||
2 => &self.layer2,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]) {
|
||||
let verts = match layer_num {
|
||||
0 => &mut self.layer0,
|
||||
1 => &mut self.layer1,
|
||||
2 => &mut self.layer2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
verts.extend_from_slice(vertices);
|
||||
}
|
||||
}
|
||||
|
||||
pub enum TripleLayerQuadAllocator<'a> {
|
||||
Gpu(BorrowedLayers<'a>),
|
||||
Heap(&'a mut HeapQuadAllocator),
|
||||
}
|
||||
|
||||
impl<'a> TripleLayerQuadAllocatorTrait for TripleLayerQuadAllocator<'a> {
|
||||
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad> {
|
||||
match self {
|
||||
Self::Gpu(b) => b.allocate(layer_num),
|
||||
Self::Heap(h) => h.allocate(layer_num),
|
||||
}
|
||||
}
|
||||
|
||||
fn vertices(&self, layer_num: usize) -> &[Vertex] {
|
||||
match self {
|
||||
Self::Gpu(b) => b.vertices(layer_num),
|
||||
Self::Heap(h) => h.vertices(layer_num),
|
||||
}
|
||||
}
|
||||
|
||||
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]) {
|
||||
match self {
|
||||
Self::Gpu(b) => b.extend_with(layer_num, vertices),
|
||||
Self::Heap(h) => h.extend_with(layer_num, vertices),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -43,6 +43,24 @@ impl<'a> QuadAllocator for MappedQuads<'a> {
|
||||
|
||||
Ok(quad)
|
||||
}
|
||||
|
||||
fn vertices(&self) -> &[Vertex] {
|
||||
&self.mapping[0..*self.next]
|
||||
}
|
||||
|
||||
fn extend_with(&mut self, vertices: &[Vertex]) {
|
||||
let idx = *self.next;
|
||||
// idx and next are number of quads, so divide by number of vertices
|
||||
*self.next += vertices.len() / VERTICES_PER_CELL;
|
||||
// Only copy in if there is enough room.
|
||||
// We'll detect the out of space condition at the end of
|
||||
// the render pass.
|
||||
let idx = idx * VERTICES_PER_CELL;
|
||||
let len = self.capacity * VERTICES_PER_CELL;
|
||||
if idx + vertices.len() < len {
|
||||
self.mapping[idx..idx + vertices.len()].copy_from_slice(vertices);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TripleVertexBuffer {
|
||||
@ -192,6 +210,12 @@ impl<'a> TripleLayerQuadAllocatorTrait for BorrowedLayers<'a> {
|
||||
fn allocate(&mut self, layer_num: usize) -> anyhow::Result<Quad> {
|
||||
self.0[layer_num].allocate()
|
||||
}
|
||||
fn vertices(&self, layer_num: usize) -> &[Vertex] {
|
||||
self.0[layer_num].vertices()
|
||||
}
|
||||
fn extend_with(&mut self, layer_num: usize, vertices: &[Vertex]) {
|
||||
self.0[layer_num].extend_with(vertices)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RenderState {
|
||||
|
@ -22,7 +22,9 @@ use crate::termwindow::background::{
|
||||
};
|
||||
use crate::termwindow::keyevent::{KeyTableArgs, KeyTableState};
|
||||
use crate::termwindow::modal::Modal;
|
||||
use crate::termwindow::render::{LineToElementShape, LineToElementShapeKey};
|
||||
use crate::termwindow::render::{
|
||||
LineToElementKey, LineToElementShape, LineToElementShapeKey, LineToElementValue,
|
||||
};
|
||||
use ::wezterm_term::input::{ClickPosition, MouseButton as TMB};
|
||||
use ::window::*;
|
||||
use anyhow::{anyhow, ensure, Context};
|
||||
@ -399,6 +401,7 @@ pub struct TermWindow {
|
||||
shape_cache:
|
||||
RefCell<LruCache<ShapeCacheKey, anyhow::Result<Rc<Vec<ShapedInfo<SrgbTexture2d>>>>>>,
|
||||
line_to_ele_shape_cache: RefCell<LruCache<LineToElementShapeKey, Rc<Vec<LineToElementShape>>>>,
|
||||
line_to_ele_cache: RefCell<LruCache<LineToElementKey, LineToElementValue>>,
|
||||
|
||||
last_status_call: Instant,
|
||||
cursor_blink_state: RefCell<ColorEase>,
|
||||
@ -687,6 +690,11 @@ impl TermWindow {
|
||||
"line_to_ele_shape_cache.miss.rate",
|
||||
65536,
|
||||
)),
|
||||
line_to_ele_cache: RefCell::new(LruCache::new(
|
||||
"line_to_ele_cache.hit.rate",
|
||||
"line_to_ele_cache.miss.rate",
|
||||
65536,
|
||||
)),
|
||||
last_status_call: Instant::now(),
|
||||
cursor_blink_state: RefCell::new(ColorEase::new(
|
||||
config.cursor_blink_rate,
|
||||
|
@ -3,7 +3,9 @@ use crate::colorease::{ColorEase, ColorEaseUniform};
|
||||
use crate::customglyph::{BlockKey, *};
|
||||
use crate::glium::texture::SrgbTexture2d;
|
||||
use crate::glyphcache::{CachedGlyph, GlyphCache};
|
||||
use crate::quad::{Quad, QuadAllocator, TripleLayerQuadAllocator, TripleLayerQuadAllocatorTrait};
|
||||
use crate::quad::{
|
||||
HeapQuadAllocator, Quad, QuadAllocator, TripleLayerQuadAllocator, TripleLayerQuadAllocatorTrait,
|
||||
};
|
||||
use crate::renderstate::BorrowedLayers;
|
||||
use crate::shapecache::*;
|
||||
use crate::tabbar::{TabBarItem, TabEntry};
|
||||
@ -178,7 +180,16 @@ pub struct LineToElementShape {
|
||||
pub struct LineToElementKey {
|
||||
pub shape_key: LineToElementShapeKey,
|
||||
/// Only set if cursor.y == stable_row
|
||||
pub cursor: Option<StableRowIndex>,
|
||||
pub cursor: Option<StableCursorPosition>,
|
||||
pub selection: Range<usize>,
|
||||
pub left: u32,
|
||||
pub top: u32,
|
||||
pub is_active: bool,
|
||||
}
|
||||
|
||||
pub struct LineToElementValue {
|
||||
pub buf: HeapQuadAllocator,
|
||||
pub expires: Option<Instant>,
|
||||
}
|
||||
|
||||
pub struct RenderScreenLineOpenGLParams<'a> {
|
||||
@ -336,6 +347,7 @@ impl super::TermWindow {
|
||||
self.invalidate_modal();
|
||||
self.shape_cache.borrow_mut().clear();
|
||||
self.line_to_ele_shape_cache.borrow_mut().clear();
|
||||
self.line_to_ele_cache.borrow_mut().clear();
|
||||
} else {
|
||||
log::error!("paint_opengl_pass failed: {:#}", err);
|
||||
break 'pass;
|
||||
@ -2215,11 +2227,92 @@ impl super::TermWindow {
|
||||
self.build_line_element_shape(¶ms, &shape_key)
|
||||
}
|
||||
|
||||
pub fn render_screen_line_opengl(
|
||||
&self,
|
||||
params: RenderScreenLineOpenGLParams,
|
||||
layers: &mut TripleLayerQuadAllocator,
|
||||
) -> anyhow::Result<()> {
|
||||
if params.line.is_double_height_bottom() {
|
||||
// The top and bottom lines are required to have the same content.
|
||||
// For the sake of simplicity, we render both of them as part of
|
||||
// rendering the top row, so we have nothing more to do here.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let ele_key = LineToElementKey {
|
||||
shape_key: LineToElementShapeKey {
|
||||
pane_id: params
|
||||
.pane
|
||||
.map(|p| p.pane_id())
|
||||
.unwrap_or(PaneId::max_value()),
|
||||
seqno: params.line.current_seqno(),
|
||||
stable_line_idx: params
|
||||
.stable_line_idx
|
||||
.unwrap_or(StableRowIndex::max_value()),
|
||||
composing: if Some(params.cursor.y) == params.stable_line_idx {
|
||||
if let DeadKeyStatus::Composing(composing) = &self.dead_key_status {
|
||||
Some((params.cursor.x, composing.to_string()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
},
|
||||
},
|
||||
cursor: if Some(params.cursor.y) == params.stable_line_idx {
|
||||
Some(*params.cursor)
|
||||
} else {
|
||||
None
|
||||
},
|
||||
selection: params.selection.clone(),
|
||||
left: params.left_pixel_x.ceil() as u32,
|
||||
top: params.top_pixel_y.ceil() as u32,
|
||||
is_active: params.is_active,
|
||||
};
|
||||
|
||||
if let Some(value) = self.line_to_ele_cache.borrow_mut().get(&ele_key) {
|
||||
let expired = value.expires.map(|i| Instant::now() >= i).unwrap_or(false);
|
||||
if !expired {
|
||||
value.buf.apply_to(layers)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
let next_due = self.has_animation.borrow().clone();
|
||||
|
||||
let mut buf_layer = HeapQuadAllocator::default();
|
||||
self.render_screen_line_opengl_impl(
|
||||
params,
|
||||
&mut TripleLayerQuadAllocator::Heap(&mut buf_layer),
|
||||
)?;
|
||||
buf_layer.apply_to(layers)?;
|
||||
|
||||
let expires = if let Some(has_anim) = self.has_animation.borrow().as_ref() {
|
||||
if Some(*has_anim) == next_due {
|
||||
None
|
||||
} else {
|
||||
Some(*has_anim)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.line_to_ele_cache.borrow_mut().put(
|
||||
ele_key,
|
||||
LineToElementValue {
|
||||
buf: buf_layer,
|
||||
expires,
|
||||
},
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// "Render" a line of the terminal screen into the vertex buffer.
|
||||
/// This is nominally a matter of setting the fg/bg color and the
|
||||
/// texture coordinates for a given glyph. There's a little bit
|
||||
/// of extra complexity to deal with multi-cell glyphs.
|
||||
pub fn render_screen_line_opengl(
|
||||
fn render_screen_line_opengl_impl(
|
||||
&self,
|
||||
params: RenderScreenLineOpenGLParams,
|
||||
layers: &mut TripleLayerQuadAllocator,
|
||||
@ -2240,13 +2333,6 @@ impl super::TermWindow {
|
||||
1.0
|
||||
};
|
||||
|
||||
if params.line.is_double_height_bottom() {
|
||||
// The top and bottom lines are required to have the same content.
|
||||
// For the sake of simplicity, we render both of them as part of
|
||||
// rendering the top row, so we have nothing more to do here.
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let height_scale = if params.line.is_double_height_top() {
|
||||
2.0
|
||||
} else {
|
||||
@ -3257,6 +3343,7 @@ impl super::TermWindow {
|
||||
pub fn recreate_texture_atlas(&mut self, size: Option<usize>) -> anyhow::Result<()> {
|
||||
self.shape_cache.borrow_mut().clear();
|
||||
self.line_to_ele_shape_cache.borrow_mut().clear();
|
||||
self.line_to_ele_cache.borrow_mut().clear();
|
||||
if let Some(render_state) = self.render_state.as_mut() {
|
||||
render_state.recreate_texture_atlas(&self.fonts, &self.render_metrics, size)?;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user