Files
@ r21169:98d87aed29c1
Branch filter:
Location: cpp/openttd-patchpack/source/src/blitter/32bpp_sse2.hpp
r21169:98d87aed29c1
7.6 KiB
text/x-c++hdr
(svn r26255) -Codechange: improve performance of brightness adjustment (MJP)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 | /* $Id$ */
/*
* This file is part of OpenTTD.
* OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
* OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
*/
/** @file 32bpp_sse2.hpp SSE2 32 bpp blitter. */
#ifndef BLITTER_32BPP_SSE2_HPP
#define BLITTER_32BPP_SSE2_HPP
#ifdef WITH_SSE
#include "32bpp_simple.hpp"
#include "emmintrin.h"
#define META_LENGTH 2 ///< Number of uint32 inserted before each line of pixels in a sprite.
#define MARGIN_NORMAL_THRESHOLD (zoom == ZOOM_LVL_OUT_32X ? 8 : 4) ///< Minimum width to use margins with BM_NORMAL.
#define MARGIN_REMAP_THRESHOLD 4 ///< Minimum width to use margins with BM_COLOUR_REMAP.
#ifdef _MSC_VER
#define ALIGN(n) __declspec(align(n))
#else
#define ALIGN(n) __attribute__ ((aligned (n)))
#endif
typedef union ALIGN(16) um128i {
__m128i m128i;
uint8 m128i_u8[16];
uint16 m128i_u16[8];
uint32 m128i_u32[4];
uint64 m128i_u64[2];
} um128i;
#define CLEAR_HIGH_BYTE_MASK _mm_setr_epi8(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0)
#define ALPHA_CONTROL_MASK _mm_setr_epi8( 6, 7, 6, 7, 6, 7, -1, -1, 14, 15, 14, 15, 14, 15, -1, -1)
#define PACK_LOW_CONTROL_MASK _mm_setr_epi8( 0, 2, 4, -1, 8, 10, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1)
#define PACK_HIGH_CONTROL_MASK _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 0, 2, 4, -1, 8, 10, 12, -1)
#define BRIGHTNESS_LOW_CONTROL_MASK _mm_setr_epi8( 1, 2, 1, 2, 1, 2, 0, 2, 3, 2, 3, 2, 3, 2, 0, 2)
#define BRIGHTNESS_DIV_CLEANER _mm_setr_epi8(-1, 1, -1, 1, -1, 1, -1, 0, -1, 1, -1, 1, -1, 1, -1, 0)
#define OVERBRIGHT_PRESENCE_MASK _mm_setr_epi8( 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0)
#define OVERBRIGHT_VALUE_MASK _mm_setr_epi8(-1, 0, -1, 0, -1, 0, 0, 0, -1, 0, -1, 0, -1, 0, 0, 0)
#define OVERBRIGHT_CONTROL_MASK _mm_setr_epi8( 0, 1, 0, 1, 0, 1, 7, 7, 2, 3, 2, 3, 2, 3, 7, 7)
#define TRANSPARENT_NOM_BASE _mm_setr_epi16(256, 256, 256, 256, 256, 256, 256, 256)
#define EXTR32(m_from, m_rank) (*(um128i*) &m_from).m128i_u32[m_rank]
#define EXTR64(m_from, m_rank) (*(um128i*) &m_from).m128i_u64[m_rank]
#define INSR32(m_val, m_into, m_rank) { \
(*(um128i*) &m_into).m128i = _mm_insert_epi16((*(um128i*) &m_into).m128i, m_val, (m_rank)*2); \
(*(um128i*) &m_into).m128i = _mm_insert_epi16((*(um128i*) &m_into).m128i, (m_val) >> 16, (m_rank)*2 + 1); \
}
#define INSR64(m_val, m_into, m_rank) (*(um128i*) &m_into).m128i_u64[m_rank] = (m_val)
#ifdef _SQ64
#define LOAD64(m_val, m_into) m_into = _mm_cvtsi64_si128(m_val);
#else
#define LOAD64(m_val, m_into) INSR64(m_val, m_into, 0)
#endif
/* PUT_ALPHA_IN_FRONT_OF_RGB is redefined in 32bpp_ssse3.hpp. */
#define PUT_ALPHA_IN_FRONT_OF_RGB(m_from, m_into) \
m_into = _mm_shufflelo_epi16(m_from, 0x3F); /* PSHUFLW, put alpha1 in front of each rgb1 */ \
m_into = _mm_shufflehi_epi16(m_into, 0x3F); /* PSHUFHW, put alpha2 in front of each rgb2 */
/* PACK_AB_WITHOUT_SATURATION is redefined in 32bpp_ssse3.hpp. */
#define PACK_AB_WITHOUT_SATURATION(m_from, m_into) \
m_from = _mm_and_si128(m_from, clear_hi); /* PAND, wipe high bytes to keep low bytes when packing */ \
m_into = _mm_packus_epi16(m_from, m_from); /* PACKUSWB, pack 2 colours (with saturation) */
/* Alpha blend 2 pixels. */
#define ALPHA_BLEND_2() { \
__m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128()); /* PUNPCKLBW, expand each uint8 into uint16 */ \
__m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128()); \
\
__m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); /* PCMPGTW, if (alpha > 0) a++; */ \
alphaAB = _mm_srli_epi16(alphaAB, 15); \
alphaAB = _mm_add_epi16(alphaAB, srcAB); \
PUT_ALPHA_IN_FRONT_OF_RGB(alphaAB, alphaAB); \
\
srcAB = _mm_sub_epi16(srcAB, dstAB); /* PSUBW, (r - Cr) */ \
srcAB = _mm_mullo_epi16(srcAB, alphaAB); /* PMULLW, a*(r - Cr) */ \
srcAB = _mm_srli_epi16(srcAB, 8); /* PSRLW, a*(r - Cr)/256 */ \
srcAB = _mm_add_epi16(srcAB, dstAB); /* PADDW, a*(r - Cr)/256 + Cr */ \
PACK_AB_WITHOUT_SATURATION(srcAB, srcABCD); \
}
/* Darken 2 pixels.
* rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4)
*/
#define DARKEN_2() \
__m128i srcAB = _mm_unpacklo_epi8(srcABCD, _mm_setzero_si128()); \
__m128i dstAB = _mm_unpacklo_epi8(dstABCD, _mm_setzero_si128()); \
__m128i PUT_ALPHA_IN_FRONT_OF_RGB(srcAB, alphaAB); \
alphaAB = _mm_srli_epi16(alphaAB, 2); /* Reduce to 64 levels of shades so the max value fits in 16 bits. */ \
__m128i nom = _mm_sub_epi16(tr_nom_base, alphaAB); \
dstAB = _mm_mullo_epi16(dstAB, nom); \
dstAB = _mm_srli_epi16(dstAB, 8); \
dstAB = _mm_packus_epi16(dstAB, dstAB);
/** Base methods for 32bpp SSE blitters. */
class Blitter_32bppSSE_Base {
public:
virtual ~Blitter_32bppSSE_Base() {}
struct MapValue {
uint8 m;
uint8 v;
};
assert_compile(sizeof(MapValue) == 2);
/** Helper for creating specialised functions for specific optimisations. */
enum ReadMode {
RM_WITH_SKIP, ///< Use normal code for skipping empty pixels.
RM_WITH_MARGIN, ///< Use cached number of empty pixels at begin and end of line to reduce work.
RM_NONE, ///< No specialisation.
};
/** Helper for creating specialised functions for the case where the sprite width is odd or even. */
enum BlockType {
BT_EVEN, ///< An even number of pixels in the width; no need for a special case for the last pixel.
BT_ODD, ///< An odd number of pixels in the width; special case for the last pixel.
BT_NONE, ///< No specialisation for either case.
};
/** Data stored about a (single) sprite. */
struct SpriteInfo {
uint32 sprite_offset; ///< The offset to the sprite data.
uint32 mv_offset; ///< The offset to the map value data.
uint16 sprite_line_size; ///< The size of a single line (pitch).
uint16 sprite_width; ///< The width of the sprite.
};
struct SpriteData {
SpriteInfo infos[ZOOM_LVL_COUNT];
byte data[]; ///< Data, all zoomlevels.
};
Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator);
virtual Colour AdjustBrightness(Colour colour, uint8 brightness) = 0;
};
/** The SSE2 32 bpp blitter (without palette animation). */
class Blitter_32bppSSE2 : public Blitter_32bppSimple, public Blitter_32bppSSE_Base {
public:
virtual Colour AdjustBrightness(Colour colour, uint8 brightness);
static Colour ReallyAdjustBrightness(Colour colour, uint8 brightness);
/* virtual */ void Draw(Blitter::BlitterParams *bp, BlitterMode mode, ZoomLevel zoom);
template <BlitterMode mode, Blitter_32bppSSE_Base::ReadMode read_mode, Blitter_32bppSSE_Base::BlockType bt_last>
void Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom);
/* virtual */ Sprite *Encode(const SpriteLoader::Sprite *sprite, AllocatorProc *allocator) {
return Blitter_32bppSSE_Base::Encode(sprite, allocator);
}
/* virtual */ const char *GetName() { return "32bpp-sse2"; }
};
/** Factory for the SSE2 32 bpp blitter (without palette animation). */
class FBlitter_32bppSSE2 : public BlitterFactory {
public:
FBlitter_32bppSSE2() : BlitterFactory("32bpp-sse2", "32bpp SSE2 Blitter (no palette animation)", HasCPUIDFlag(1, 3, 26)) {}
/* virtual */ Blitter *CreateInstance() { return new Blitter_32bppSSE2(); }
};
#endif /* WITH_SSE */
#endif /* BLITTER_32BPP_SSE2_HPP */
|