From bf6c848be435019706b1c3926594c779ab6a1c96 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 22 Jun 2023 20:31:55 +0530 Subject: [PATCH] Use the new shader include system to avoid having to upload 256 uniform floats to two different programs on the GPU --- .gitattributes | 2 +- gen-srgb-lut.py | 26 +++++++++++++++---------- kitty/border_vertex.glsl | 2 +- kitty/cell_vertex.glsl | 2 +- kitty/shaders.c | 2 -- kitty/{srgb_gamma.c => srgb_gamma.glsl} | 7 +++---- kitty/srgb_gamma.h | 22 ++++++++++++++++++--- 7 files changed, 41 insertions(+), 22 deletions(-) rename kitty/{srgb_gamma.c => srgb_gamma.glsl} (96%) diff --git a/.gitattributes b/.gitattributes index a1b6b1d99..86dc26afc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,7 +5,7 @@ kitty/key_encoding.py linguist-generated=true kitty/unicode-data.c linguist-generated=true kitty/rowcolumn-diacritics.c linguist-generated=true kitty/rgb.py linguist-generated=true -kitty/srgb_gamma.c linguist-generated=true +kitty/srgb_gamma.* linguist-generated=true kitty/gl-wrapper.* linguist-generated=true kitty/glfw-wrapper.* linguist-generated=true kitty/parse-graphics-command.h linguist-generated=true diff --git a/gen-srgb-lut.py b/gen-srgb-lut.py index 7d4a328a8..5bbed87dc 100755 --- a/gen-srgb-lut.py +++ b/gen-srgb-lut.py @@ -2,6 +2,7 @@ # vim:fileencoding=utf-8 import os +from functools import lru_cache from typing import List @@ -12,7 +13,8 @@ def to_linear(a: float) -> float: return float(pow((a + 0.055) / 1.055, 2.4)) -def generate_srgb_lut(line_prefix: str = '') -> List[str]: +@lru_cache +def generate_srgb_lut(line_prefix: str = ' ') -> List[str]: values: List[str] = [] lines: List[str] = [] @@ -22,26 +24,30 @@ def generate_srgb_lut(line_prefix: str = '') -> List[str]: for i in range(16): lines.append(line_prefix + ', '.join(values[i * 16:(i + 1) * 16]) + ',') + lines[-1] = lines[-1].rstrip(',') return lines -def generate_srgb_gamma_c() -> str: +def generate_srgb_gamma(declaration: str = 'static const GLfloat srgb_lut[256] = {', close: str = '};') -> str: lines: List[str] = [] + a = lines.append - lines.append('// Generated by gen-srgb-lut.py DO NOT edit') - lines.append('#include "srgb_gamma.h"') - lines.append('') - lines.append('const GLfloat srgb_lut[256] = {') - lines += generate_srgb_lut(' ') - lines.append('};') + a('// Generated by gen-srgb-lut.py DO NOT edit') + a('') + a(declaration) + lines += generate_srgb_lut() + a(close) return "\n".join(lines) def main() -> None: - c = generate_srgb_gamma_c() - with open(os.path.join('kitty', 'srgb_gamma.c'), 'w') as f: + c = generate_srgb_gamma() + with open(os.path.join('kitty', 'srgb_gamma.h'), 'w') as f: f.write(f'{c}\n') + g = generate_srgb_gamma('const float gamma_lut[256] = float[256](', ');') + with open(os.path.join('kitty', 'srgb_gamma.glsl'), 'w') as f: + f.write(f'{g}\n') if __name__ == '__main__': diff --git a/kitty/border_vertex.glsl b/kitty/border_vertex.glsl index 4a74b50b2..2b23bbf0f 100644 --- a/kitty/border_vertex.glsl +++ b/kitty/border_vertex.glsl @@ -1,8 +1,8 @@ +#pragma kitty_include_shader uniform uvec2 viewport; uniform uint colors[9]; uniform float background_opacity; uniform float tint_opacity, tint_premult; -uniform float gamma_lut[256]; in vec4 rect; // left, top, right, bottom in uint rect_color; out vec4 color; diff --git a/kitty/cell_vertex.glsl b/kitty/cell_vertex.glsl index ca9a84af6..0c4e8e014 100644 --- a/kitty/cell_vertex.glsl +++ b/kitty/cell_vertex.glsl @@ -1,4 +1,5 @@ #extension GL_ARB_explicit_attrib_location : require +#pragma kitty_include_shader #define {WHICH_PROGRAM} #define NOT_TRANSPARENT @@ -30,7 +31,6 @@ uniform uint draw_bg_bitfield; layout(location=0) in uvec3 colors; layout(location=1) in uvec4 sprite_coords; layout(location=2) in uint is_selected; -uniform float gamma_lut[256]; const int fg_index_map[] = int[3](0, 1, 0); diff --git a/kitty/shaders.c b/kitty/shaders.c index 686922765..aa7859565 100644 --- a/kitty/shaders.c +++ b/kitty/shaders.c @@ -593,7 +593,6 @@ set_cell_uniforms(float current_inactive_text_alpha, bool force) { } for (int i = CELL_PROGRAM; i <= CELL_FG_PROGRAM; i++) { bind_program(i); const CellUniforms *cu = &cell_program_layouts[i].uniforms; - glUniform1fv(cu->gamma_lut, arraysz(srgb_lut), srgb_lut); switch(i) { case CELL_PROGRAM: case CELL_FG_PROGRAM: glUniform1i(cu->sprites, SPRITE_MAP_UNIT); @@ -1040,7 +1039,6 @@ draw_borders(ssize_t vao_idx, unsigned int num_border_rects, BorderRect *rect_bu glUniform1f(border_program_layout.uniforms.tint_opacity, tint_opacity); glUniform1f(border_program_layout.uniforms.tint_premult, tint_premult); glUniform2ui(border_program_layout.uniforms.viewport, viewport_width, viewport_height); - glUniform1fv(border_program_layout.uniforms.gamma_lut, 256, srgb_lut); if (has_bgimage(w)) { if (w->is_semi_transparent) { BLEND_PREMULT; } else { BLEND_ONTO_OPAQUE_WITH_OPAQUE_OUTPUT; } diff --git a/kitty/srgb_gamma.c b/kitty/srgb_gamma.glsl similarity index 96% rename from kitty/srgb_gamma.c rename to kitty/srgb_gamma.glsl index f34cfd482..9ff4a1a04 100644 --- a/kitty/srgb_gamma.c +++ b/kitty/srgb_gamma.glsl @@ -1,7 +1,6 @@ // Generated by gen-srgb-lut.py DO NOT edit -#include "srgb_gamma.h" -const GLfloat srgb_lut[256] = { +const float gamma_lut[256] = float[256]( 0.00000f, 0.00030f, 0.00061f, 0.00091f, 0.00121f, 0.00152f, 0.00182f, 0.00212f, 0.00243f, 0.00273f, 0.00304f, 0.00335f, 0.00368f, 0.00402f, 0.00439f, 0.00478f, 0.00518f, 0.00561f, 0.00605f, 0.00651f, 0.00700f, 0.00750f, 0.00802f, 0.00857f, 0.00913f, 0.00972f, 0.01033f, 0.01096f, 0.01161f, 0.01229f, 0.01298f, 0.01370f, 0.01444f, 0.01521f, 0.01600f, 0.01681f, 0.01764f, 0.01850f, 0.01938f, 0.02029f, 0.02122f, 0.02217f, 0.02315f, 0.02416f, 0.02519f, 0.02624f, 0.02732f, 0.02843f, @@ -17,5 +16,5 @@ const GLfloat srgb_lut[256] = { 0.52712f, 0.53328f, 0.53948f, 0.54572f, 0.55201f, 0.55834f, 0.56471f, 0.57112f, 0.57758f, 0.58408f, 0.59062f, 0.59720f, 0.60383f, 0.61050f, 0.61721f, 0.62396f, 0.63076f, 0.63760f, 0.64448f, 0.65141f, 0.65837f, 0.66539f, 0.67244f, 0.67954f, 0.68669f, 0.69387f, 0.70110f, 0.70838f, 0.71569f, 0.72306f, 0.73046f, 0.73791f, 0.74540f, 0.75294f, 0.76052f, 0.76815f, 0.77582f, 0.78354f, 0.79130f, 0.79910f, 0.80695f, 0.81485f, 0.82279f, 0.83077f, 0.83880f, 0.84687f, 0.85499f, 0.86316f, - 0.87137f, 0.87962f, 0.88792f, 0.89627f, 0.90466f, 0.91310f, 0.92158f, 0.93011f, 0.93869f, 0.94731f, 0.95597f, 0.96469f, 0.97345f, 0.98225f, 0.99110f, 1.00000f, -}; + 0.87137f, 0.87962f, 0.88792f, 0.89627f, 0.90466f, 0.91310f, 0.92158f, 0.93011f, 0.93869f, 0.94731f, 0.95597f, 0.96469f, 0.97345f, 0.98225f, 0.99110f, 1.00000f +); diff --git a/kitty/srgb_gamma.h b/kitty/srgb_gamma.h index 752a84c35..b3df58241 100644 --- a/kitty/srgb_gamma.h +++ b/kitty/srgb_gamma.h @@ -1,4 +1,20 @@ -#pragma once -#include "gl.h" +// Generated by gen-srgb-lut.py DO NOT edit -extern const GLfloat srgb_lut[256]; +static const GLfloat srgb_lut[256] = { + 0.00000f, 0.00030f, 0.00061f, 0.00091f, 0.00121f, 0.00152f, 0.00182f, 0.00212f, 0.00243f, 0.00273f, 0.00304f, 0.00335f, 0.00368f, 0.00402f, 0.00439f, 0.00478f, + 0.00518f, 0.00561f, 0.00605f, 0.00651f, 0.00700f, 0.00750f, 0.00802f, 0.00857f, 0.00913f, 0.00972f, 0.01033f, 0.01096f, 0.01161f, 0.01229f, 0.01298f, 0.01370f, + 0.01444f, 0.01521f, 0.01600f, 0.01681f, 0.01764f, 0.01850f, 0.01938f, 0.02029f, 0.02122f, 0.02217f, 0.02315f, 0.02416f, 0.02519f, 0.02624f, 0.02732f, 0.02843f, + 0.02956f, 0.03071f, 0.03190f, 0.03310f, 0.03434f, 0.03560f, 0.03689f, 0.03820f, 0.03955f, 0.04092f, 0.04231f, 0.04374f, 0.04519f, 0.04667f, 0.04817f, 0.04971f, + 0.05127f, 0.05286f, 0.05448f, 0.05613f, 0.05781f, 0.05951f, 0.06125f, 0.06301f, 0.06480f, 0.06663f, 0.06848f, 0.07036f, 0.07227f, 0.07421f, 0.07619f, 0.07819f, + 0.08022f, 0.08228f, 0.08438f, 0.08650f, 0.08866f, 0.09084f, 0.09306f, 0.09531f, 0.09759f, 0.09990f, 0.10224f, 0.10462f, 0.10702f, 0.10946f, 0.11193f, 0.11444f, + 0.11697f, 0.11954f, 0.12214f, 0.12477f, 0.12744f, 0.13014f, 0.13287f, 0.13563f, 0.13843f, 0.14126f, 0.14413f, 0.14703f, 0.14996f, 0.15293f, 0.15593f, 0.15896f, + 0.16203f, 0.16513f, 0.16827f, 0.17144f, 0.17465f, 0.17789f, 0.18116f, 0.18447f, 0.18782f, 0.19120f, 0.19462f, 0.19807f, 0.20156f, 0.20508f, 0.20864f, 0.21223f, + 0.21586f, 0.21953f, 0.22323f, 0.22697f, 0.23074f, 0.23455f, 0.23840f, 0.24228f, 0.24620f, 0.25016f, 0.25415f, 0.25818f, 0.26225f, 0.26636f, 0.27050f, 0.27468f, + 0.27889f, 0.28315f, 0.28744f, 0.29177f, 0.29614f, 0.30054f, 0.30499f, 0.30947f, 0.31399f, 0.31855f, 0.32314f, 0.32778f, 0.33245f, 0.33716f, 0.34191f, 0.34670f, + 0.35153f, 0.35640f, 0.36131f, 0.36625f, 0.37124f, 0.37626f, 0.38133f, 0.38643f, 0.39157f, 0.39676f, 0.40198f, 0.40724f, 0.41254f, 0.41789f, 0.42327f, 0.42869f, + 0.43415f, 0.43966f, 0.44520f, 0.45079f, 0.45641f, 0.46208f, 0.46778f, 0.47353f, 0.47932f, 0.48515f, 0.49102f, 0.49693f, 0.50289f, 0.50888f, 0.51492f, 0.52100f, + 0.52712f, 0.53328f, 0.53948f, 0.54572f, 0.55201f, 0.55834f, 0.56471f, 0.57112f, 0.57758f, 0.58408f, 0.59062f, 0.59720f, 0.60383f, 0.61050f, 0.61721f, 0.62396f, + 0.63076f, 0.63760f, 0.64448f, 0.65141f, 0.65837f, 0.66539f, 0.67244f, 0.67954f, 0.68669f, 0.69387f, 0.70110f, 0.70838f, 0.71569f, 0.72306f, 0.73046f, 0.73791f, + 0.74540f, 0.75294f, 0.76052f, 0.76815f, 0.77582f, 0.78354f, 0.79130f, 0.79910f, 0.80695f, 0.81485f, 0.82279f, 0.83077f, 0.83880f, 0.84687f, 0.85499f, 0.86316f, + 0.87137f, 0.87962f, 0.88792f, 0.89627f, 0.90466f, 0.91310f, 0.92158f, 0.93011f, 0.93869f, 0.94731f, 0.95597f, 0.96469f, 0.97345f, 0.98225f, 0.99110f, 1.00000f +};