Optimize the Painter::blit() loop a bit. ~3% fewer cycles, I'll take it.

This commit is contained in:
Andreas Kling 2019-01-16 19:43:01 +01:00
parent 7750e6952b
commit f651405694
Notes: sideshowbarker 2024-07-19 16:01:20 +09:00
4 changed files with 22 additions and 15 deletions

View File

@ -20,7 +20,7 @@ ALWAYS_INLINE void fast_dword_copy(dword* dest, const dword* src, size_t count)
#ifdef SERENITY
asm volatile(
"rep movsl\n"
: "=S"(src), "=D"(dest)
: "=S"(src), "=D"(dest), "=c"(count)
: "S"(src), "D"(dest), "c"(count)
: "memory"
);
@ -34,7 +34,7 @@ ALWAYS_INLINE void fast_dword_fill(dword* dest, dword value, size_t count)
#ifdef SERENITY
asm volatile(
"rep stosl\n"
: "=D"(dest)
: "=D"(dest), "=c"(count)
: "D"(dest), "c"(count), "a"(value)
: "memory"
);

View File

@ -55,12 +55,3 @@ GraphicsBitmap::~GraphicsBitmap()
m_data = nullptr;
}
RGBA32* GraphicsBitmap::scanline(int y)
{
return reinterpret_cast<RGBA32*>((((byte*)m_data) + (y * m_pitch)));
}
const RGBA32* GraphicsBitmap::scanline(int y) const
{
return reinterpret_cast<const RGBA32*>((((const byte*)m_data) + (y * m_pitch)));
}

View File

@ -46,3 +46,13 @@ private:
Region* m_server_region { nullptr };
#endif
};
inline RGBA32* GraphicsBitmap::scanline(int y)
{
return reinterpret_cast<RGBA32*>((((byte*)m_data) + (y * m_pitch)));
}
inline const RGBA32* GraphicsBitmap::scanline(int y) const
{
return reinterpret_cast<const RGBA32*>((((const byte*)m_data) + (y * m_pitch)));
}

View File

@ -226,9 +226,15 @@ void Painter::blit(const Point& position, const GraphicsBitmap& source)
Rect dst_rect(position, source.size());
dst_rect.intersect(m_clip_rect);
for (int y = 0; y < dst_rect.height(); ++y) {
auto* dst_scanline = m_target->scanline(position.y() + y);
auto* src_scanline = source.scanline(y);
fast_dword_copy(dst_scanline + dst_rect.x(), src_scanline + (dst_rect.x() - position.x()), dst_rect.width());
RGBA32* dst = m_target->scanline(position.y()) + dst_rect.x();
const RGBA32* src= source.scanline(0) + (dst_rect.x() - position.x());
const unsigned dst_skip = m_target->width();
const unsigned src_skip = source.width();
for (int i = dst_rect.height() - 1; i >= 0; --i) {
fast_dword_copy(dst, src, dst_rect.width());
dst += dst_skip;
src += src_skip;
}
}