Changeset - r26443:26c28242addc
[Not reviewed]
master
0 3 0
Jonathan G Rennison - 2 years ago 2022-09-28 23:46:31
j.g.rennison@gmail.com
Fix: Correctly set alpha of output in AlphaBlendTwoPixels

Match alpha behaviour of ComposeColourRGBA
3 files changed with 24 insertions and 14 deletions:
0 comments (0 inline, 0 general)
src/blitter/32bpp_anim_sse4.cpp
Show inline comments
 
@@ -53,6 +53,7 @@ inline void Blitter_32bppSSE4_Anim::Draw
 
	const __m128i a_cm        = ALPHA_CONTROL_MASK;
 
	const __m128i pack_low_cm = PACK_LOW_CONTROL_MASK;
 
	const __m128i tr_nom_base = TRANSPARENT_NOM_BASE;
 
	const __m128i a_am        = ALPHA_AND_MASK;
 

	
 
	for (int y = bp->height; y != 0; y--) {
 
		Colour *dst = dst_line;
 
@@ -144,7 +145,7 @@ inline void Blitter_32bppSSE4_Anim::Draw
 

	
 
					/* Blend colours. */
 
bmno_alpha_blend:
 
					srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
 
					srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
 
bmno_full_opacity:
 
					_mm_storel_epi64((__m128i *) dst, srcABCD);
 
bmno_full_transparency:
 
@@ -171,7 +172,7 @@ bmno_full_transparency:
 
						} else {
 
							srcABCD = _mm_cvtsi32_si128(src->data);
 
						}
 
						dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm));
 
						dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am));
 
					}
 
				}
 
				break;
 
@@ -255,7 +256,7 @@ bmno_full_transparency:
 

	
 
					/* Blend colours. */
 
bmcr_alpha_blend:
 
					srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
 
					srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
 
bmcr_full_opacity:
 
					_mm_storel_epi64((__m128i *) dst, srcABCD);
 
bmcr_full_transparency:
 
@@ -288,7 +289,7 @@ bmcr_full_transparency:
 
						if (src->a < 255) {
 
bmcr_alpha_blend_single:
 
							__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
 
							srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm);
 
							srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, a_cm, pack_low_cm, a_am);
 
						}
 
						dst->data = _mm_cvtsi128_si32(srcABCD);
 
					}
src/blitter/32bpp_sse_func.hpp
Show inline comments
 
@@ -65,27 +65,33 @@ static inline __m128i DistributeAlpha(co
 
{
 
#if (SSE_VERSION == 2)
 
	__m128i alphaAB = _mm_shufflelo_epi16(from, 0x3F); // PSHUFLW, put alpha1 in front of each rgb1
 
	return _mm_shufflehi_epi16(alphaAB, 0x3F);         // PSHUFHW, put alpha2 in front of each rgb2
 
	alphaAB = _mm_shufflehi_epi16(alphaAB, 0x3F);      // PSHUFHW, put alpha2 in front of each rgb2
 
	alphaAB = _mm_or_si128(alphaAB, mask);             // POR, set alpha fields to all 1
 
	return _mm_xor_si128(alphaAB, mask);               // PXOR, set alpha fields to 0
 
#else
 
	return _mm_shuffle_epi8(from, mask);
 
#endif
 
}
 

	
 
GNU_TARGET(SSE_TARGET)
 
static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask)
 
static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask, const __m128i &alpha_mask)
 
{
 
	__m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());   // PUNPCKLBW, expand each uint8 into uint16
 
	__m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128());
 

	
 
	__m128i alphaAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); // PCMPGTW, if (alpha > 0) a++;
 
	alphaAB = _mm_srli_epi16(alphaAB, 15);
 
	alphaAB = _mm_add_epi16(alphaAB, srcAB);
 
	__m128i alphaMaskAB = _mm_cmpgt_epi16(srcAB, _mm_setzero_si128()); // PCMPGTW (alpha > 0) ? 0xFFFF : 0
 
	__m128i alphaAB = _mm_srli_epi16(alphaMaskAB, 15);
 
	alphaAB = _mm_add_epi16(alphaAB, srcAB);                           // if (alpha > 0) a++;
 
	alphaAB = DistributeAlpha(alphaAB, distribution_mask);
 

	
 
	srcAB = _mm_sub_epi16(srcAB, dstAB);     // PSUBW,    (r - Cr)
 
	srcAB = _mm_mullo_epi16(srcAB, alphaAB); // PMULLW, a*(r - Cr)
 
	srcAB = _mm_srli_epi16(srcAB, 8);        // PSRLW,  a*(r - Cr)/256
 
	srcAB = _mm_add_epi16(srcAB, dstAB);     // PADDW,  a*(r - Cr)/256 + Cr
 

	
 
	alphaMaskAB = _mm_and_si128(alphaMaskAB, alpha_mask); // PAND, set non alpha fields to 0
 
	srcAB = _mm_or_si128(srcAB, alphaMaskAB);             // POR, set alpha fields to 0xFFFF is src alpha was > 0
 

	
 
	return PackUnsaturated(srcAB, pack_mask);
 
}
 

	
 
@@ -227,9 +233,11 @@ inline void Blitter_32bppSSE4::Draw(cons
 
	const MapValue *src_mv = src_mv_line;
 

	
 
	/* Load these variables into register before loop. */
 
	const __m128i alpha_and   = ALPHA_AND_MASK;
 
	#define ALPHA_BLEND_PARAM_3 alpha_and
 
#if (SSE_VERSION == 2)
 
	const __m128i clear_hi    = CLEAR_HIGH_BYTE_MASK;
 
	#define ALPHA_BLEND_PARAM_1 clear_hi
 
	#define ALPHA_BLEND_PARAM_1 alpha_and
 
	#define ALPHA_BLEND_PARAM_2 clear_hi
 
	#define DARKEN_PARAM_1      tr_nom_base
 
	#define DARKEN_PARAM_2      tr_nom_base
 
@@ -275,7 +283,7 @@ inline void Blitter_32bppSSE4::Draw(cons
 
				for (uint x = (uint) effective_width / 2; x > 0; x--) {
 
					__m128i srcABCD = _mm_loadl_epi64((const __m128i*) src);
 
					__m128i dstABCD = _mm_loadl_epi64((__m128i*) dst);
 
					_mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
 
					_mm_storel_epi64((__m128i*) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
 
					src += 2;
 
					dst += 2;
 
				}
 
@@ -283,7 +291,7 @@ inline void Blitter_32bppSSE4::Draw(cons
 
				if ((bt_last == BT_NONE && effective_width & 1) || bt_last == BT_ODD) {
 
					__m128i srcABCD = _mm_cvtsi32_si128(src->data);
 
					__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
 
					dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
 
					dst->data = _mm_cvtsi128_si32(AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
 
				}
 
				break;
 

	
 
@@ -328,7 +336,7 @@ inline void Blitter_32bppSSE4::Draw(cons
 
					}
 

	
 
					/* Blend colours. */
 
					_mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2));
 
					_mm_storel_epi64((__m128i *) dst, AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3));
 
					dst += 2;
 
					src += 2;
 
					src_mv += 2;
 
@@ -357,7 +365,7 @@ inline void Blitter_32bppSSE4::Draw(cons
 
						if (src->a < 255) {
 
bmcr_alpha_blend_single:
 
							__m128i dstABCD = _mm_cvtsi32_si128(dst->data);
 
							srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2);
 
							srcABCD = AlphaBlendTwoPixels(srcABCD, dstABCD, ALPHA_BLEND_PARAM_1, ALPHA_BLEND_PARAM_2, ALPHA_BLEND_PARAM_3);
 
						}
 
						dst->data = _mm_cvtsi128_si32(srcABCD);
 
					}
src/blitter/32bpp_sse_type.h
Show inline comments
 
@@ -51,6 +51,7 @@ typedef union ALIGN(16) um128i {
 
#define OVERBRIGHT_VALUE_MASK       _mm_setr_epi8(-1,  0, -1,  0, -1,  0,  0,  0, -1,  0, -1,  0, -1,  0,  0,  0)
 
#define OVERBRIGHT_CONTROL_MASK     _mm_setr_epi8( 0,  1,  0,  1,  0,  1,  7,  7,  2,  3,  2,  3,  2,  3,  7,  7)
 
#define TRANSPARENT_NOM_BASE        _mm_setr_epi16(256, 256, 256, 256, 256, 256, 256, 256)
 
#define ALPHA_AND_MASK              _mm_setr_epi16(  0,   0,   0,  -1,   0,   0,   0,  -1)
 

	
 
#endif /* WITH_SSE */
 
#endif /* BLITTER_32BPP_SSE_TYPE_H */
0 comments (0 inline, 0 general)