Re: Subdiv16 for sprites?
Posted: Wed Sep 18, 2013 12:12 am
Saves a lot of retyping and scrolling. Just good macromantics.
This will be faster when the texture has more transparent pixels than opaque pixels, and when there isn't anything in front of it.qbism wrote:In a certain stress case, this:turns out to be faster than this:Code: Select all
#define PARALLELCHECK(i) { btemp = *(pbase + (s >> 16) + (t >> 16) * cachewidth); if (btemp != 255 && (pz[i] <= IZI)) { pz[i] = IZI; pdest[i] = btemp;} s+=sstep; t+=tstep;} #define ORIENTEDCHECK(i) { btemp = *(pbase + (s >> 16) + (t >> 16) * cachewidth); if (btemp != 255 && pz[i] <= (izi >> 16)){ pz[i] = izi >> 16; pdest[i] = btemp;} s+=sstep; t+=tstep; izi+=izistep;}Besides pulling out static variables it's the only change that made a dent in the fps counter.Code: Select all
#define PARALLELCHECK(i) {if (pz[i] <= IZI){ btemp = *(pbase + (s >> 16) + (t >> 16) * cachewidth); if (btemp != 255) { pz[i] = IZI; pdest[i] = btemp;}} s+=sstep; t+=tstep;} #define ORIENTEDCHECK(i) {if (pz[i] <= (izi >> 16)){ btemp = *(pbase + (s >> 16) + (t >> 16) * cachewidth); if (btemp != 255){ pz[i] = izi >> 16; pdest[i] = btemp;}} s+=sstep; t+=tstep; izi+=izistep;}
Code: Select all
void D_SpriteDrawSpans_Dithered_Blend (void)
{
// mankrip - begin
if (psprite->type == SPR_VP_PARALLEL)
{
// for square surfaces on parallel perspective, all spans have the same size
int
countstart = pspan->count >> 4
;
count = countstart;
spancount = pspan->count % 16;
spancountminus1 = (float) (spancount - 1);
// izistep, d_zistepu and d_zistepv are zero, so we can skip zi
z = (float)0x10000 / d_ziorigin; // prescale to 16.16 fixed-point
// we count on FP exceptions being turned off to avoid range problems
izi = (int) (d_ziorigin * 0x8000 * 0x10000) >> 16;
#undef IZI
#define IZI izi
// calculate the initial s/z and t/z
sdivz = d_sdivzorigin + (float) (pspan->u) * d_sdivzstepu;
tdivz = d_tdivzorigin + (float) (pspan->v) * d_tdivzstepv;
// calculate s/z, t/z, zi->fixed s and t at far end of span,
// calculate s and t steps across span by shifting
sstep = ( ( (int) ( (sdivz + sdivzstepu) * z) + sadjust) - ( (int) (sdivz * z) + sadjust)) >> 4;
tstep = ( ( (int) ( (tdivz + tdivzstepu) * z) + tadjust) - ( (int) (tdivz * z) + tadjust)) >> 4;
do
{
u = pspan->u;
v = pspan->v;
// calculate the initial s/z, t/z, 1/z, s, and t and clamp
sdivz = d_sdivzorigin + (float)u * d_sdivzstepu;
tdivz = d_tdivzorigin + (float)v * d_tdivzstepv;
// mankrip - end
s = (int) (sdivz * z) + sadjust;
if (s > bbextents)
s = bbextents;
else if (s < 0)
s = 0;
t = (int) (tdivz * z) + tadjust;
if (t > bbextentt)
t = bbextentt;
else if (t < 0)
t = 0;
// mankrip - begin
pdest = (byte *)d_viewbuffer + (screenwidth * v) + u;
pz = d_pzbuffer + (d_zwidth * v) + u;
Y = v & 1;
count = countstart;
if (count)
{
// prepare dither values
X = ! ( (v + u) & 1);
XY0a = dither_kernel[X][Y][0];
XY1a = dither_kernel[X][Y][1];
XY0b = dither_kernel[!X][Y][0];
XY1b = dither_kernel[!X][Y][1];
while (count--)
{
pdest += 16;
pz += 16;
DITHERED_BLEND_A(-16); s += sstep; t += tstep;
DITHERED_BLEND_B(-15); s += sstep; t += tstep;
DITHERED_BLEND_A(-14); s += sstep; t += tstep;
DITHERED_BLEND_B(-13); s += sstep; t += tstep;
DITHERED_BLEND_A(-12); s += sstep; t += tstep;
DITHERED_BLEND_B(-11); s += sstep; t += tstep;
DITHERED_BLEND_A(-10); s += sstep; t += tstep;
DITHERED_BLEND_B( -9); s += sstep; t += tstep;
DITHERED_BLEND_A( -8); s += sstep; t += tstep;
DITHERED_BLEND_B( -7); s += sstep; t += tstep;
DITHERED_BLEND_A( -6); s += sstep; t += tstep;
DITHERED_BLEND_B( -5); s += sstep; t += tstep;
DITHERED_BLEND_A( -4); s += sstep; t += tstep;
DITHERED_BLEND_B( -3); s += sstep; t += tstep;
DITHERED_BLEND_A( -2); s += sstep; t += tstep;
DITHERED_BLEND_B( -1); s += sstep; t += tstep;
}
}
if (spancount)
{
// prepare dither values
X = (v + u) & 1;
XY0a = dither_kernel[X][Y][0];
XY1a = dither_kernel[X][Y][1];
XY0b = dither_kernel[!X][Y][0];
XY1b = dither_kernel[!X][Y][1];
pdest += spancount;
pz += spancount;
switch (spancount)
{
case 16: DITHERED_BLEND_A(-16); s += sstep; t += tstep;
case 15: DITHERED_BLEND_B(-15); s += sstep; t += tstep;
case 14: DITHERED_BLEND_A(-14); s += sstep; t += tstep;
case 13: DITHERED_BLEND_B(-13); s += sstep; t += tstep;
case 12: DITHERED_BLEND_A(-12); s += sstep; t += tstep;
case 11: DITHERED_BLEND_B(-11); s += sstep; t += tstep;
case 10: DITHERED_BLEND_A(-10); s += sstep; t += tstep;
case 9: DITHERED_BLEND_B( -9); s += sstep; t += tstep;
case 8: DITHERED_BLEND_A( -8); s += sstep; t += tstep;
case 7: DITHERED_BLEND_B( -7); s += sstep; t += tstep;
case 6: DITHERED_BLEND_A( -6); s += sstep; t += tstep;
case 5: DITHERED_BLEND_B( -5); s += sstep; t += tstep;
case 4: DITHERED_BLEND_A( -4); s += sstep; t += tstep;
case 3: DITHERED_BLEND_B( -3); s += sstep; t += tstep;
case 2: DITHERED_BLEND_A( -2); s += sstep; t += tstep;
case 1: DITHERED_BLEND_B( -1);
break;
}
}
// mankrip - end
pspan++;
} while (pspan->count != DS_SPAN_LIST_END);
}
else
[...]
}