Changeset - r21120:bf4cad9f8d6f
[Not reviewed]
master
0 7 2
rubidium - 10 years ago 2014-01-02 16:48:16
rubidium@openttd.org
(svn r26205) -Feature: SSE 4.1 sprite sorter, improving the sorting performance significantly (MJP)
For example with GCC 4.8, x86_64 Linux, Intel i5-3337U this patch improves the performance of Pile, Treham and Hamac test save games by about 10% in over-all run time at fast forward at 1920x1080 when zoomed out and when trees are not disabled.
9 files changed with 233 insertions and 27 deletions:
0 comments (0 inline, 0 general)
projects/openttd_vs100.vcxproj
Show inline comments
 
@@ -385,6 +385,7 @@
 
    <ClCompile Include="..\src\vehicle.cpp" />
 
    <ClCompile Include="..\src\vehiclelist.cpp" />
 
    <ClCompile Include="..\src\viewport.cpp" />
 
    <ClCompile Include="..\src\viewport_sprite_sorter_sse4.cpp" />
 
    <ClCompile Include="..\src\waypoint.cpp" />
 
    <ClCompile Include="..\src\widget.cpp" />
 
    <ClCompile Include="..\src\window.cpp" />
 
@@ -641,6 +642,7 @@
 
    <ClInclude Include="..\src\vehicle_type.h" />
 
    <ClInclude Include="..\src\vehiclelist.h" />
 
    <ClInclude Include="..\src\viewport_func.h" />
 
    <ClInclude Include="..\src\viewport_sprite_sorter.h" />
 
    <ClInclude Include="..\src\viewport_type.h" />
 
    <ClInclude Include="..\src\water.h" />
 
    <ClInclude Include="..\src\waypoint_base.h" />
projects/openttd_vs100.vcxproj.filters
Show inline comments
 
@@ -384,6 +384,9 @@
 
    <ClCompile Include="..\src\viewport.cpp">
 
      <Filter>Source Files</Filter>
 
    </ClCompile>
 
    <ClCompile Include="..\src\viewport_sprite_sorter_sse4.cpp">
 
      <Filter>Source Files</Filter>
 
    </ClCompile>
 
    <ClCompile Include="..\src\waypoint.cpp">
 
      <Filter>Source Files</Filter>
 
    </ClCompile>
 
@@ -1152,6 +1155,9 @@
 
    <ClInclude Include="..\src\viewport_func.h">
 
      <Filter>Header Files</Filter>
 
    </ClInclude>
 
    <ClInclude Include="..\src\viewport_sprite_sorter.h">
 
      <Filter>Header Files</Filter>
 
    </ClInclude>
 
    <ClInclude Include="..\src\viewport_type.h">
 
      <Filter>Header Files</Filter>
 
    </ClInclude>
projects/openttd_vs80.vcproj
Show inline comments
 
@@ -811,6 +811,10 @@
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\viewport_sprite_sorter_sse4.cpp"
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\waypoint.cpp"
 
				>
 
			</File>
 
@@ -1839,6 +1843,10 @@
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\viewport_sprite_sorter.h"
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\viewport_type.h"
 
				>
 
			</File>
projects/openttd_vs90.vcproj
Show inline comments
 
@@ -808,6 +808,10 @@
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\viewport_sprite_sorter_sse4.cpp"
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\waypoint.cpp"
 
				>
 
			</File>
 
@@ -1836,6 +1840,10 @@
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\viewport_sprite_sorter.h"
 
				>
 
			</File>
 
			<File
 
				RelativePath=".\..\src\viewport_type.h"
 
				>
 
			</File>
source.list
Show inline comments
 
@@ -111,6 +111,9 @@ townname.cpp
 
vehicle.cpp
 
vehiclelist.cpp
 
viewport.cpp
 
#if SSE
 
viewport_sprite_sorter_sse4.cpp
 
#end
 
waypoint.cpp
 
widget.cpp
 
window.cpp
 
@@ -374,6 +377,7 @@ vehicle_gui_base.h
 
vehicle_type.h
 
vehiclelist.h
 
viewport_func.h
 
viewport_sprite_sorter.h
 
viewport_type.h
 
water.h
 
waypoint_base.h
src/openttd.cpp
Show inline comments
 
@@ -62,13 +62,12 @@
 
#include "town.h"
 
#include "subsidy_func.h"
 
#include "gfx_layout.h"
 

	
 
#include "viewport_sprite_sorter.h"
 

	
 
#include "linkgraph/linkgraphschedule.h"
 

	
 
#include <stdarg.h>
 

	
 

	
 
void CallLandscapeTick();
 
void IncreaseDate();
 
void DoPaletteAnimations();
 
@@ -775,6 +774,8 @@ int openttd_main(int argc, char *argv[])
 
	}
 
	free(videodriver);
 

	
 
	InitializeSpriteSorter();
 

	
 
	/* Initialize the zoom level of the screen to normal */
 
	_screen.zoom = ZOOM_LVL_NORMAL;
 

	
src/viewport.cpp
Show inline comments
 
@@ -46,6 +46,7 @@
 
#include "tilehighlight_func.h"
 
#include "window_gui.h"
 
#include "linkgraph/linkgraph_gui.h"
 
#include "viewport_sprite_sorter.h"
 

	
 
#include "table/strings.h"
 
#include "table/palettes.h"
 
@@ -78,29 +79,6 @@ struct ChildScreenSpriteToDraw {
 
	int next;                       ///< next child to draw (-1 at the end)
 
};
 

	
 
/** Parent sprite that should be drawn */
 
struct ParentSpriteToDraw {
 
	SpriteID image;                 ///< sprite to draw
 
	PaletteID pal;                  ///< palette to use
 
	const SubSprite *sub;           ///< only draw a rectangular part of the sprite
 

	
 
	int32 x;                        ///< screen X coordinate of sprite
 
	int32 y;                        ///< screen Y coordinate of sprite
 

	
 
	int32 left;                     ///< minimal screen X coordinate of sprite (= x + sprite->x_offs), reference point for child sprites
 
	int32 top;                      ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites
 

	
 
	int32 xmin;                     ///< minimal world X coordinate of bounding box
 
	int32 xmax;                     ///< maximal world X coordinate of bounding box
 
	int32 ymin;                     ///< minimal world Y coordinate of bounding box
 
	int32 ymax;                     ///< maximal world Y coordinate of bounding box
 
	int zmin;                       ///< minimal world Z coordinate of bounding box
 
	int zmax;                       ///< maximal world Z coordinate of bounding box
 

	
 
	int first_child;                ///< the first child to draw.
 
	bool comparison_done;           ///< Used during sprite sorting: true if sprite has been compared with all other sprites
 
};
 

	
 
/** Enumeration of multi-part foundations */
 
enum FoundationPart {
 
	FOUNDATION_PART_NONE     = 0xFF,  ///< Neither foundation nor groundsprite drawn yet.
 
@@ -122,7 +100,6 @@ enum SpriteCombineMode {
 
typedef SmallVector<TileSpriteToDraw, 64> TileSpriteToDrawVector;
 
typedef SmallVector<StringSpriteToDraw, 4> StringSpriteToDrawVector;
 
typedef SmallVector<ParentSpriteToDraw, 64> ParentSpriteToDrawVector;
 
typedef SmallVector<ParentSpriteToDraw*, 64> ParentSpriteToSortVector;
 
typedef SmallVector<ChildScreenSpriteToDraw, 16> ChildScreenSpriteToDrawVector;
 

	
 
/** Data structure storing rendering information */
 
@@ -154,6 +131,7 @@ static TileInfo *_cur_ti;
 
bool _draw_bounding_boxes = false;
 
bool _draw_dirty_blocks = false;
 
uint _dirty_block_colour = 0;
 
static VpSpriteSorter _vp_sprite_sorter = NULL;
 

	
 
static Point MapXYZToViewport(const ViewPort *vp, int x, int y, int z)
 
{
 
@@ -1288,6 +1266,12 @@ static void ViewportDrawTileSprites(cons
 
	}
 
}
 

	
 
/** This fallback sprite checker always exists. */
 
static bool ViewportSortParentSpritesChecker()
 
{
 
	return true;
 
}
 

	
 
/** Sort parent sprites pointer array */
 
static void ViewportSortParentSprites(ParentSpriteToSortVector *psdv)
 
{
 
@@ -1480,7 +1464,7 @@ void ViewportDoDraw(const ViewPort *vp, 
 
		*_vd.parent_sprites_to_sort.Append() = it;
 
	}
 

	
 
	ViewportSortParentSprites(&_vd.parent_sprites_to_sort);
 
	_vp_sprite_sorter(&_vd.parent_sprites_to_sort);
 
	ViewportDrawParentSprites(&_vd.parent_sprites_to_sort, &_vd.child_screen_sprites_to_draw);
 

	
 
	if (_draw_bounding_boxes) ViewportDrawBoundingBoxes(&_vd.parent_sprites_to_sort);
 
@@ -2998,3 +2982,29 @@ Point GetViewportStationMiddle(const Vie
 
	p.y = UnScaleByZoom(p.y - vp->virtual_top, vp->zoom) + vp->top;
 
	return p;
 
}
 

	
 
/** Helper class for getting the best sprite sorter. */
 
struct ViewportSSCSS {
 
	VpSorterChecker fct_checker; ///< The check function.
 
	VpSpriteSorter fct_sorter;   ///< The sorting function.
 
};
 

	
 
/** List of sorters ordered from best to worst. */
 
static ViewportSSCSS _vp_sprite_sorters[] = {
 
#ifdef WITH_SSE
 
	{ &ViewportSortParentSpritesSSE41Checker, &ViewportSortParentSpritesSSE41 },
 
#endif
 
	{ &ViewportSortParentSpritesChecker, &ViewportSortParentSprites }
 
};
 

	
 
/** Choose the "best" sprite sorter and set _vp_sprite_sorter. */
 
void InitializeSpriteSorter()
 
{
 
	for (uint i = 0; i < lengthof(_vp_sprite_sorters); i++) {
 
		if (_vp_sprite_sorters[i].fct_checker()) {
 
			_vp_sprite_sorter = _vp_sprite_sorters[i].fct_sorter;
 
			break;
 
		}
 
	}
 
	assert(_vp_sprite_sorter != NULL);
 
}
src/viewport_sprite_sorter.h
Show inline comments
 
new file 100644
 
/* $Id$ */
 

	
 
/*
 
 * This file is part of OpenTTD.
 
 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
 
 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
/** @file viewport_sprite_sorter.h Types related to sprite sorting. */
 

	
 
#include "stdafx.h"
 
#include "core/smallvec_type.hpp"
 
#include "gfx_type.h"
 

	
 
#ifndef VIEWPORT_SPRITE_SORTER_H
 
#define VIEWPORT_SPRITE_SORTER_H
 

	
 
/** Parent sprite that should be drawn */
 
struct ParentSpriteToDraw {
 
	/* Block of 16B loadable in xmm register */
 
	int32 xmin;                     ///< minimal world X coordinate of bounding box
 
	int32 ymin;                     ///< minimal world Y coordinate of bounding box
 
	int32 zmin;                     ///< minimal world Z coordinate of bounding box
 
	int32 x;                        ///< screen X coordinate of sprite
 

	
 
	/* Second block of 16B loadable in xmm register */
 
	int32 xmax;                     ///< maximal world X coordinate of bounding box
 
	int32 ymax;                     ///< maximal world Y coordinate of bounding box
 
	int32 zmax;                     ///< maximal world Z coordinate of bounding box
 
	int32 y;                        ///< screen Y coordinate of sprite
 

	
 
	SpriteID image;                 ///< sprite to draw
 
	PaletteID pal;                  ///< palette to use
 
	const SubSprite *sub;           ///< only draw a rectangular part of the sprite
 

	
 
	int32 left;                     ///< minimal screen X coordinate of sprite (= x + sprite->x_offs), reference point for child sprites
 
	int32 top;                      ///< minimal screen Y coordinate of sprite (= y + sprite->y_offs), reference point for child sprites
 

	
 
	int first_child;                ///< the first child to draw.
 
	bool comparison_done;           ///< Used during sprite sorting: true if sprite has been compared with all other sprites
 
};
 

	
 
typedef SmallVector<ParentSpriteToDraw*, 64> ParentSpriteToSortVector;
 

	
 
/** Type for method for checking whether a viewport sprite sorter exists. */
 
typedef bool (*VpSorterChecker)();
 
/** Type for the actual viewport sprite sorter. */
 
typedef void (*VpSpriteSorter)(ParentSpriteToSortVector *psd);
 

	
 
#ifdef WITH_SSE
 
bool ViewportSortParentSpritesSSE41Checker();
 
void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv);
 
#endif
 

	
 
void InitializeSpriteSorter();
 

	
 
#endif /* VIEWPORT_SPRITE_SORTER_H */
src/viewport_sprite_sorter_sse4.cpp
Show inline comments
 
new file 100644
 
/* $Id$ */
 

	
 
/*
 
 * This file is part of OpenTTD.
 
 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
 
 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 
 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
 
 */
 

	
 
/** @file viewport_sprite_sorter_sse.cpp Sprite sorter that uses SSE4.1. */
 

	
 
#ifdef WITH_SSE
 

	
 
#include "stdafx.h"
 
#include "cpu.h"
 
#include "smmintrin.h"
 
#include "viewport_sprite_sorter.h"
 

	
 
#ifdef _SQ64
 
	assert_compile((sizeof(ParentSpriteToDraw) % 16) == 0);
 
	#define LOAD_128 _mm_load_si128
 
#else
 
	#define LOAD_128 _mm_loadu_si128
 
#endif
 

	
 
/** Sort parent sprites pointer array using SSE4.1 optimizations. */
 
void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
 
{
 
	const __m128i mask_ptest = _mm_setr_epi8(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,  0,  0,  0);
 
	ParentSpriteToDraw ** const psdvend = psdv->End();
 
	ParentSpriteToDraw **psd = psdv->Begin();
 
	while (psd != psdvend) {
 
		ParentSpriteToDraw * const ps = *psd;
 

	
 
		if (ps->comparison_done) {
 
			psd++;
 
			continue;
 
		}
 

	
 
		ps->comparison_done = true;
 

	
 
		for (ParentSpriteToDraw **psd2 = psd + 1; psd2 != psdvend; psd2++) {
 
			ParentSpriteToDraw * const ps2 = *psd2;
 

	
 
			if (ps2->comparison_done) continue;
 

	
 
			/*
 
			 * Decide which comparator to use, based on whether the bounding boxes overlap
 
			 *
 
			 * Original code:
 
			 * if (ps->xmax >= ps2->xmin && ps->xmin <= ps2->xmax && // overlap in X?
 
			 *     ps->ymax >= ps2->ymin && ps->ymin <= ps2->ymax && // overlap in Y?
 
			 *     ps->zmax >= ps2->zmin && ps->zmin <= ps2->zmax) { // overlap in Z?
 
			 *
 
			 * Above conditions are equivalent to:
 
			 * 1/    !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin)   &&    (ps->xmin <= ps2->xmax) && (ps->ymin <= ps2->ymax) && (ps->zmin <= ps2->zmax) )
 
			 * 2/    !( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin)   &&    (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) )
 
			 * 3/  !( ( (ps->xmax >= ps2->xmin) && (ps->ymax >= ps2->ymin) && (ps->zmax >= ps2->zmin) ) &&  ( (ps2->xmax >= ps->xmin) && (ps2->ymax >= ps->ymin) && (ps2->zmax >= ps->zmin) ) )
 
			 * 4/ !( !( (ps->xmax <  ps2->xmin) || (ps->ymax <  ps2->ymin) || (ps->zmax <  ps2->zmin) ) && !( (ps2->xmax <  ps->xmin) || (ps2->ymax <  ps->ymin) || (ps2->zmax <  ps->zmin) ) )
 
			 * 5/ PTEST <---------------------------------- rslt1 ---------------------------------->         <------------------------------ rslt2 -------------------------------------->
 
			 */
 
			__m128i ps1_max = LOAD_128((__m128i*) &ps->xmax);
 
			__m128i ps2_min = LOAD_128((__m128i*) &ps2->xmin);
 
			__m128i rslt1 = _mm_cmplt_epi32(ps1_max, ps2_min);
 
			if (!_mm_testz_si128(mask_ptest, rslt1))
 
				continue;
 

	
 
			__m128i ps1_min = LOAD_128((__m128i*) &ps->xmin);
 
			__m128i ps2_max = LOAD_128((__m128i*) &ps2->xmax);
 
			__m128i rslt2 = _mm_cmplt_epi32(ps2_max, ps1_min);
 
			if (_mm_testz_si128(mask_ptest, rslt2)) {
 
				/* Use X+Y+Z as the sorting order, so sprites closer to the bottom of
 
				 * the screen and with higher Z elevation, are drawn in front.
 
				 * Here X,Y,Z are the coordinates of the "center of mass" of the sprite,
 
				 * i.e. X=(left+right)/2, etc.
 
				 * However, since we only care about order, don't actually divide / 2
 
				 */
 
				if (ps->xmin + ps->xmax + ps->ymin + ps->ymax + ps->zmin + ps->zmax <=
 
						ps2->xmin + ps2->xmax + ps2->ymin + ps2->ymax + ps2->zmin + ps2->zmax) {
 
					continue;
 
				}
 
			}
 

	
 
			/* Move ps2 in front of ps */
 
			ParentSpriteToDraw * const temp = ps2;
 
			for (ParentSpriteToDraw **psd3 = psd2; psd3 > psd; psd3--) {
 
				*psd3 = *(psd3 - 1);
 
			}
 
			*psd = temp;
 
		}
 
	}
 
}
 

	
 
/**
 
 * Check whether the current CPU supports SSE 4.1.
 
 * @return True iff the CPU supports SSE 4.1.
 
 */
 
bool ViewportSortParentSpritesSSE41Checker()
 
{
 
	int cpu_info[4] = {-1};
 
	ottd_cpuid(cpu_info, 0);
 
	unsigned int max_info_type = cpu_info[0];
 
	if (max_info_type < 1) return false;
 

	
 
	ottd_cpuid(cpu_info, 1);
 
	return (cpu_info[2] & (1 << 19)) != 0;
 
}
 

	
 
#endif /* WITH_SSE */
0 comments (0 inline, 0 general)