diff --git a/src/blitter/32bpp_optimized.cpp b/src/blitter/32bpp_optimized.cpp --- a/src/blitter/32bpp_optimized.cpp +++ b/src/blitter/32bpp_optimized.cpp @@ -6,44 +6,133 @@ #include "../zoom_func.h" #include "../gfx_func.h" #include "../debug.h" +#include "../core/math_func.hpp" +#include "../core/alloc_func.hpp" #include "32bpp_optimized.hpp" static FBlitter_32bppOptimized iFBlitter_32bppOptimized; -template inline void Blitter_32bppOptimized::Draw(Blitter::BlitterParams *bp) +/** + * Draws a sprite to a (screen) buffer. It is templated to allow faster operation. + * + * @param mode blitter mode + * @param bp further blitting parameters + * @param zoom zoom level at which we are drawing + */ +template +inline void Blitter_32bppOptimized::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom) { - const SpriteLoader::CommonPixel *src, *src_line; - uint32 *dst, *dst_line; + const SpriteData *src = (const SpriteData *)bp->sprite; + + /* src_px : each line begins with uint32 n = 'number of bytes in this line', + * then n times is the Colour struct for this line */ + const Colour *src_px = (const Colour *)(src->data + src->offset[zoom][0]); + /* src_n : each line begins with uint32 n = 'number of bytes in this line', + * then interleaved stream of 'm' and 'n' channels. 'm' is remap, + * 'n' is number of bytes with the same alpha channel class */ + const uint8 *src_n = (const uint8 *)(src->data + src->offset[zoom][1]); - /* Find where to start reading in the source sprite */ - src_line = (const SpriteLoader::CommonPixel *)bp->sprite + (bp->skip_top * bp->sprite_width + bp->skip_left) * ScaleByZoom(1, zoom); - dst_line = (uint32 *)bp->dst + bp->top * bp->pitch + bp->left; + /* skip upper lines in src_px and src_n */ + for (uint i = bp->skip_top; i != 0; i--) { + src_px = (const Colour *)((const byte *)src_px + *(const uint32 *)src_px); + src_n += *(uint32 *)src_n; + } + + /* skip lines in dst */ + uint32 *dst = (uint32 *)bp->dst + bp->top * bp->pitch + bp->left; + + /* store so we don't have to access it via bp everytime (compiler assumes pointer aliasing) */ + const byte *remap = bp->remap; for (int y = 0; y < bp->height; y++) { - dst = dst_line; - dst_line += bp->pitch; + /* next dst line begins here */ + uint32 *dst_ln = dst + bp->pitch; + + /* next src line begins here */ + const Colour *src_px_ln = (const Colour *)((const byte *)src_px + *(const uint32 *)src_px); + src_px++; - src = src_line; - src_line += bp->sprite_width * ScaleByZoom(1, zoom); + /* next src_n line begins here */ + const uint8 *src_n_ln = src_n + *(uint32 *)src_n; + src_n += 4; + + /* we will end this line when we reach this point */ + uint32 *dst_end = dst + bp->skip_left; + + /* number of pixels with the same aplha channel class */ + uint n; + + while (dst < dst_end) { + n = *src_n++; - for (int x = 0; x < bp->width; x++) { - if (src->a == 0) { - /* src->r is 'misused' here to indicate how much more pixels are following with an alpha of 0 */ - int skip = UnScaleByZoom(src->r, zoom); + if (src_px->a == 0) { + dst += n; + src_px ++; + src_n++; + } else { + if (dst + n > dst_end) { + uint d = dst_end - dst; + src_px += d; + src_n += d; + + dst = dst_end - bp->skip_left; + dst_end = dst + bp->width; - dst += skip; - x += skip - 1; - src += ScaleByZoom(1, zoom) * skip; + n = min(n - d, (uint)bp->width); + goto draw; + } + dst += n; + src_px += n; + src_n += n; + } + } + + dst -= bp->skip_left; + dst_end -= bp->skip_left; + + dst_end += bp->width; + + while (dst < dst_end) { + n = min(*src_n++, (uint)(dst_end - dst)); + + if (src_px->a == 0) { + dst += n; + src_px++; + src_n++; continue; } + draw:; + switch (mode) { case BM_COLOUR_REMAP: - /* In case the m-channel is zero, do not remap this pixel in any way */ - if (src->m == 0) { - *dst = ComposeColourRGBA(src->r, src->g, src->b, src->a, *dst); + if (src_px->a == 255) { + do { + uint m = *src_n; + /* In case the m-channel is zero, do not remap this pixel in any way */ + if (m == 0) { + *dst = *src_px; + } else { + uint r = remap[m]; + if (r != 0) *dst = this->LookupColourInPalette(r); + } + dst++; + src_px++; + src_n++; + } while (--n != 0); } else { - if (bp->remap[src->m] != 0) *dst = ComposeColourPA(this->LookupColourInPalette(bp->remap[src->m]), src->a, *dst); + do { + uint m = *src_n; + if (m == 0) { + *dst = ComposeColourRGBANoCheck(src_px->r, src_px->g, src_px->b, src_px->a, *dst); + } else { + uint r = remap[m]; + if (r != 0) *dst = ComposeColourPANoCheck(this->LookupColourInPalette(r), src_px->a, *dst); + } + dst++; + src_px++; + src_n++; + } while (--n != 0); } break; @@ -53,30 +142,47 @@ template r, src->g, src->b, src->a, *dst); + if (src_px->a == 255) { + /* faster than memcpy(), n is usually low */ + src_n += n; + do { + *dst++ = *src_px++; + } while (--n != 0); + } else { + src_n += n; + do { + *dst = ComposeColourRGBANoCheck(src_px->r, src_px->g, src_px->b, src_px->a, *dst); + dst++; + src_px++; + } while (--n != 0); + } break; } - dst++; - src += ScaleByZoom(1, zoom); } + + dst = dst_ln; + src_px = src_px_ln; + src_n = src_n_ln; } } -template inline void Blitter_32bppOptimized::Draw(Blitter::BlitterParams *bp, ZoomLevel zoom) -{ - switch (zoom) { - default: NOT_REACHED(); - case ZOOM_LVL_NORMAL: Draw(bp); return; - case ZOOM_LVL_OUT_2X: Draw(bp); return; - case ZOOM_LVL_OUT_4X: Draw(bp); return; - case ZOOM_LVL_OUT_8X: Draw(bp); return; - } -} - +/** + * Draws a sprite to a (screen) buffer. Calls adequate templated function. + * + * @param bp further blitting parameters + * @param mode blitter mode + * @param zoom zoom level at which we are drawing + */ void Blitter_32bppOptimized::Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom) { switch (mode) { @@ -87,46 +193,180 @@ void Blitter_32bppOptimized::Draw(Blitte } } +/** + * Resizes the sprite in a very simple way, takes every n-th pixel and every n-th row + * + * @param sprite_src sprite to resize + * @param zoom resizing scale + * @return resized sprite + */ +static const SpriteLoader::Sprite *ResizeSprite(const SpriteLoader::Sprite *sprite_src, ZoomLevel zoom) +{ + SpriteLoader::Sprite *sprite = MallocT(1); + + if (zoom == ZOOM_LVL_NORMAL) { + memcpy(sprite, sprite_src, sizeof(*sprite)); + uint size = sprite_src->height * sprite_src->width; + sprite->data = MallocT(size); + memcpy(sprite->data, sprite_src->data, size * sizeof(SpriteLoader::CommonPixel)); + return sprite; + } + + sprite->height = UnScaleByZoom(sprite_src->height, zoom); + sprite->width = UnScaleByZoom(sprite_src->width, zoom); + sprite->x_offs = UnScaleByZoom(sprite_src->x_offs, zoom); + sprite->y_offs = UnScaleByZoom(sprite_src->y_offs, zoom); + + uint size = sprite->height * sprite->width; + SpriteLoader::CommonPixel *dst = sprite->data = CallocT(size); + + const SpriteLoader::CommonPixel *src = (SpriteLoader::CommonPixel *)sprite_src->data; + const SpriteLoader::CommonPixel *src_end = src + sprite_src->height * sprite_src->width; + + uint scaled_1 = ScaleByZoom(1, zoom); + + for (uint y = 0; y < sprite->height; y++) { + if (src >= src_end) src = src_end - sprite_src->width; + + const SpriteLoader::CommonPixel *src_ln = src + sprite_src->width * scaled_1; + for (uint x = 0; x < sprite->width; x++) { + if (src >= src_ln) src = src_ln - 1; + *dst = *src; + dst++; + src += scaled_1; + } + + src = src_ln; + } + + return sprite; +} + Sprite *Blitter_32bppOptimized::Encode(SpriteLoader::Sprite *sprite, Blitter::AllocatorProc *allocator) { - Sprite *dest_sprite; - SpriteLoader::CommonPixel *dst; - dest_sprite = (Sprite *)allocator(sizeof(*dest_sprite) + sprite->height * sprite->width * sizeof(SpriteLoader::CommonPixel)); + /* streams of pixels (a, r, g, b channels) + * + * stored in separated stream so data are always aligned on 4B boundary */ + Colour *dst_px_orig[ZOOM_LVL_COUNT]; + + /* interleaved stream of 'm' channel and 'n' channel + * 'n' is number if following pixels with the same alpha channel class + * there are 3 classes: 0, 255, others + * + * it has to be stored in one stream so fewer registers are used - + * x86 has problems with register allocation even with this solution */ + uint8 *dst_n_orig[ZOOM_LVL_COUNT]; + + /* lengths of streams */ + uint32 lengths[ZOOM_LVL_COUNT][2]; + + for (ZoomLevel z = ZOOM_LVL_BEGIN; z < ZOOM_LVL_END; z++) { + const SpriteLoader::Sprite *src_orig = ResizeSprite(sprite, z); + + uint size = src_orig->height * src_orig->width; + + dst_px_orig[z] = CallocT(size + src_orig->height * 2); + dst_n_orig[z] = CallocT(size * 2 + src_orig->height * 4 * 2); + + uint32 *dst_px_ln = (uint32 *)dst_px_orig[z]; + uint32 *dst_n_ln = (uint32 *)dst_n_orig[z]; + + const SpriteLoader::CommonPixel *src = (const SpriteLoader::CommonPixel *)src_orig->data; + + for (uint y = src_orig->height; y > 0; y--) { + Colour *dst_px = (Colour *)(dst_px_ln + 1); + uint8 *dst_n = (uint8 *)(dst_n_ln + 1); + + uint8 *dst_len = dst_n++; + + uint last = 3; + int len = 0; + + for (uint x = src_orig->width; x > 0; x--) { + uint8 a = src->a; + uint t = a > 0 && a < 255 ? 1 : a; + + if (last != t || len == 255) { + if (last != 3) { + *dst_len = len; + dst_len = dst_n++; + } + len = 0; + } + + last = t; + len++; + + if (a != 0) { + dst_px->a = a; + *dst_n = src->m; + if (src->m != 0) { + /* Pre-convert the mapping channel to a RGB value */ + uint32 colour = this->LookupColourInPalette(src->m); + dst_px->r = GB(colour, 16, 8); + dst_px->g = GB(colour, 8, 8); + dst_px->b = GB(colour, 0, 8); + } else { + dst_px->r = src->r; + dst_px->g = src->g; + dst_px->b = src->b; + } + dst_px++; + dst_n++; + } else if (len == 1) { + dst_px++; + *dst_n = src->m; + dst_n++; + } + + src++; + } + + if (last != 3) { + *dst_len = len; + } + + dst_px = (Colour *)AlignPtr(dst_px, 4); + dst_n = (uint8 *)AlignPtr(dst_n, 4); + + *dst_px_ln = (uint8 *)dst_px - (uint8 *)dst_px_ln; + *dst_n_ln = (uint8 *)dst_n - (uint8 *)dst_n_ln; + + dst_px_ln = (uint32 *)dst_px; + dst_n_ln = (uint32 *)dst_n; + } + + lengths[z][0] = (byte *)dst_px_ln - (byte *)dst_px_orig[z]; // all are aligned to 4B boundary + lengths[z][1] = (byte *)dst_n_ln - (byte *)dst_n_orig[z]; + + free(src_orig->data); + free((void *)src_orig); + } + + uint len = 0; // total length of data + for (ZoomLevel z = ZOOM_LVL_BEGIN; z < ZOOM_LVL_END; z++) { + len += lengths[z][0] + lengths[z][1]; + } + + Sprite *dest_sprite = (Sprite *)allocator(sizeof(*dest_sprite) + sizeof(SpriteData) + len); dest_sprite->height = sprite->height; dest_sprite->width = sprite->width; dest_sprite->x_offs = sprite->x_offs; dest_sprite->y_offs = sprite->y_offs; - dst = (SpriteLoader::CommonPixel *)dest_sprite->data; + SpriteData *dst = (SpriteData *)dest_sprite->data; - memcpy(dst, sprite->data, sprite->height * sprite->width * sizeof(SpriteLoader::CommonPixel)); - /* Skip to the end of the array, and work backwards to find transparent blocks */ - dst = dst + sprite->height * sprite->width - 1; + for (ZoomLevel z = ZOOM_LVL_BEGIN; z < ZOOM_LVL_END; z++) { + dst->offset[z][0] = z == ZOOM_LVL_BEGIN ? 0 : lengths[z - 1][1] + dst->offset[z - 1][1]; + dst->offset[z][1] = lengths[z][0] + dst->offset[z][0]; - for (uint y = sprite->height; y > 0; y--) { - int trans = 0; - /* Process sprite line backwards, to compute lengths of transparent blocks */ - for (uint x = sprite->width; x > 0; x--) { - if (dst->a == 0) { - /* Save transparent block length in red channel; max value is 255 the red channel can contain */ - if (trans < 255) trans++; - dst->r = trans; - dst->g = 0; - dst->b = 0; - dst->m = 0; - } else { - trans = 0; - if (dst->m != 0) { - /* Pre-convert the mapping channel to a RGB value */ - uint color = this->LookupColourInPalette(dst->m); - dst->r = GB(color, 16, 8); - dst->g = GB(color, 8, 8); - dst->b = GB(color, 0, 8); - } - } - dst--; - } + memcpy(dst->data + dst->offset[z][0], dst_px_orig[z], lengths[z][0]); + memcpy(dst->data + dst->offset[z][1], dst_n_orig[z], lengths[z][1]); + + free(dst_px_orig[z]); + free(dst_n_orig[z]); } + return dest_sprite; }