Kernel-based dithering of BSP surface lighting (in software)

mankrip · Post by **mankrip** » Thu Aug 29, 2013 4:27 am

Before and after screenshots.

Open r_surf.c, and paste where appropriate:

// mankrip - begin
static qboolean
	ditherlight
	;
void
	R_DrawSurfaceBlock8_mip0_C (void)
,	R_DrawSurfaceBlock8_mip1_C (void)
,	R_DrawSurfaceBlock8_mip2_C (void)
,	R_DrawSurfaceBlock8_mip3_C (void)
	;
// mankrip - end
void R_DrawSurfaceBlock8_mip0 (void);
void R_DrawSurfaceBlock8_mip1 (void);
void R_DrawSurfaceBlock8_mip2 (void);
void R_DrawSurfaceBlock8_mip3 (void);

static void	(*surfmiptable[4]) (void) =
{
	// mankrip - begin
,	R_DrawSurfaceBlock8_mip0_C
,	R_DrawSurfaceBlock8_mip1_C
,	R_DrawSurfaceBlock8_mip2_C
,	R_DrawSurfaceBlock8_mip3_C
	// mankrip - end
};

Code: Select all

void R_BuildLightMap (void)
{
	int			smax, tmax;
	int			t;
	int			i, size;
	byte		*lightmap;
	unsigned	scale;
	int			maps;
	int			count, remainder; // mankrip
	unsigned	*blocks; // mankrip
	msurface_t	*surf;

	surf = r_drawsurf.surf;

	smax = (surf->extents[0]>>4)+1;
	tmax = (surf->extents[1]>>4)+1;
	size = smax*tmax; // in most cases, 64*64

	// mankrip - fullbright BSP textures fix - begin
	if (r_fullbright.value)
	{
		memset (blocklights, 63 << 8, sizeof (unsigned) * size);
		return;
	}
	else if (!cl.worldmodel->lightdata)
	{
	//	memset (blocklights, 31 << 8, sizeof (unsigned) * size);
		memset (blocklights, (unsigned) neutral_light, sizeof (unsigned) * size);
		return;
	}
	// mankrip - fullbright BSP textures fix - end

// clear to ambient
	memset (blocklights, r_refdef.ambientlight << 8, sizeof (unsigned) * size); // mankrip

	lightmap = surf->samples;
	count = size >> 5; // mankrip
	remainder = size % 32; // mankrip

// add all the lightmaps
	if (lightmap)
		for (maps = 0 ; maps < MAXLIGHTMAPS && surf->styles[maps] != 255 ; maps++)
		{
		//	scale = r_drawsurf.lightadj[maps];	// 8.8 fraction
		//	if (!r_externbsp_lit.value || currententity != cl_entities && currententity->model->name[0] != '*') // mankrip
				scale = (unsigned) ( (float)r_drawsurf.lightadj[maps] * currententity->lightlevel); // mankrip
			for (i=0 ; i<size ; i++)
				blocklights[i] += lightmap[i] * scale;
			lightmap += size;	// skip to next lightmap
		}

// add all the dynamic lights
	if (surf->dlightframe == r_framecount)
		R_AddDynamicLights ();

// bound, invert, and shift
	// mankrip - begin
#if 1
	blocks = blocklights;
	if (ditherlight)
	{
		// extra clamping for dithered shading
		for (i = 0 ; i < count ; i++, blocks += 32)
		{
			t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (2 << 6)) ? (2 << 6) : t;
			t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (2 << 6)) ? (2 << 6) : t;
		}
		switch (remainder)
		{
			case 31: t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (2 << 6)) ? (2 << 6) : t;
			case 30: t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (2 << 6)) ? (2 << 6) : t;
			case 29: t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (2 << 6)) ? (2 << 6) : t;
			case 28: t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (2 << 6)) ? (2 << 6) : t;
			case 27: t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (2 << 6)) ? (2 << 6) : t;
			case 26: t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (2 << 6)) ? (2 << 6) : t;
			case 25: t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (2 << 6)) ? (2 << 6) : t;
			case 24: t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (2 << 6)) ? (2 << 6) : t;
			case 23: t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (2 << 6)) ? (2 << 6) : t;
			case 22: t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (2 << 6)) ? (2 << 6) : t;
			case 21: t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (2 << 6)) ? (2 << 6) : t;
			case 20: t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (2 << 6)) ? (2 << 6) : t;
			case 19: t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (2 << 6)) ? (2 << 6) : t;
			case 18: t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (2 << 6)) ? (2 << 6) : t;
			case 17: t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (2 << 6)) ? (2 << 6) : t;
			case 16: t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (2 << 6)) ? (2 << 6) : t;
			case 15: t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (2 << 6)) ? (2 << 6) : t;
			case 14: t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (2 << 6)) ? (2 << 6) : t;
			case 13: t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (2 << 6)) ? (2 << 6) : t;
			case 12: t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (2 << 6)) ? (2 << 6) : t;
			case 11: t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (2 << 6)) ? (2 << 6) : t;
			case 10: t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (2 << 6)) ? (2 << 6) : t;
			case  9: t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (2 << 6)) ? (2 << 6) : t;
			case  8: t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (2 << 6)) ? (2 << 6) : t;
			case  7: t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (2 << 6)) ? (2 << 6) : t;
			case  6: t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (2 << 6)) ? (2 << 6) : t;
			case  5: t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (2 << 6)) ? (2 << 6) : t;
			case  4: t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (2 << 6)) ? (2 << 6) : t;
			case  3: t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (2 << 6)) ? (2 << 6) : t;
			case  2: t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (2 << 6)) ? (2 << 6) : t;
			case  1: t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (2 << 6)) ? (2 << 6) : t;
			case  0: t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (2 << 6)) ? (2 << 6) : t;
			default: break;
		}
	}
	else
	{
		for (i = 0 ; i < count ; i++, blocks += 32)
		{
			t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (1 << 6)) ? (1 << 6) : t;
			t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (1 << 6)) ? (1 << 6) : t;
		}
		switch (remainder)
		{
			case 31: t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (1 << 6)) ? (1 << 6) : t;
			case 30: t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (1 << 6)) ? (1 << 6) : t;
			case 29: t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (1 << 6)) ? (1 << 6) : t;
			case 28: t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (1 << 6)) ? (1 << 6) : t;
			case 27: t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (1 << 6)) ? (1 << 6) : t;
			case 26: t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (1 << 6)) ? (1 << 6) : t;
			case 25: t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (1 << 6)) ? (1 << 6) : t;
			case 24: t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (1 << 6)) ? (1 << 6) : t;
			case 23: t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (1 << 6)) ? (1 << 6) : t;
			case 22: t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (1 << 6)) ? (1 << 6) : t;
			case 21: t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (1 << 6)) ? (1 << 6) : t;
			case 20: t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (1 << 6)) ? (1 << 6) : t;
			case 19: t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (1 << 6)) ? (1 << 6) : t;
			case 18: t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (1 << 6)) ? (1 << 6) : t;
			case 17: t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (1 << 6)) ? (1 << 6) : t;
			case 16: t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (1 << 6)) ? (1 << 6) : t;
			case 15: t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (1 << 6)) ? (1 << 6) : t;
			case 14: t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (1 << 6)) ? (1 << 6) : t;
			case 13: t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (1 << 6)) ? (1 << 6) : t;
			case 12: t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (1 << 6)) ? (1 << 6) : t;
			case 11: t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (1 << 6)) ? (1 << 6) : t;
			case 10: t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (1 << 6)) ? (1 << 6) : t;
			case  9: t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (1 << 6)) ? (1 << 6) : t;
			case  8: t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (1 << 6)) ? (1 << 6) : t;
			case  7: t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (1 << 6)) ? (1 << 6) : t;
			case  6: t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (1 << 6)) ? (1 << 6) : t;
			case  5: t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (1 << 6)) ? (1 << 6) : t;
			case  4: t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (1 << 6)) ? (1 << 6) : t;
			case  3: t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (1 << 6)) ? (1 << 6) : t;
			case  2: t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (1 << 6)) ? (1 << 6) : t;
			case  1: t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (1 << 6)) ? (1 << 6) : t;
			case  0: t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (1 << 6)) ? (1 << 6) : t;
			default: break;
		}
	}
#else
	// mankrip - end
	for (i=0 ; i<size ; i++)
	{
		t = (255*256 - (int)blocklights[i]) >> (8 - VID_CBITS);

		blocklights[i] = (t < (1 << 6)) ? (1 << 6) : t; // mankrip
	}
#endif
}

Code: Select all

void R_DrawSurface (void)
{
	unsigned char	*basetptr;
	int				smax, tmax, twidth;
	int				u;
	int				soffset, basetoffset, texwidth;
	int				horzblockstep;
	unsigned char	*pcolumndest;
	void			(*pblockdrawer)(void);
	texture_t		*mt;

	ditherlight = (d_ditherlight.value && !r_fullbright.value); // mankrip

[...]

}

Code: Select all

// mankrip - begin
int lightmap_dither_kernel[2][2] =
// same as dither_kernel[X][Y][0] >> 8
// set to >> 8 for more accuracy and less smoothness
{
	{
		 -8192 >> 8
	,	 24576 >> 8
	}
,	{
		  8192 >> 8
	,	-24576 >> 8 // needs clamping, otherwise makes artifacts on overbright
	}
};
static int
// integers for dithering
//	u
//,	v
//,	X
	Y
,	XY0a
,	XY0b
,	XY0a1 // for mip3
,	XY0b1 // for mip3
	;

static int
	b
,	i
,	lightstep
,	light
	;
static byte
	* psource
,	* prowdest
	;
// mankrip - end

Code: Select all

void R_DrawSurfaceBlock8_mip0_C (void)
{
	psource = pbasesource;
	prowdest = prowdestbase;

	for (b=0 ; b<r_numvblocks ; b++)
	{
	// FIXME: make these locals?
	// FIXME: use delta rather than both right and left, like ASM?
		lightleft = r_lightptr[0];
		lightright = r_lightptr[1];
		r_lightptr += r_lightwidth;
		lightleftstep = (r_lightptr[0] - lightleft) >> 4;
		lightrightstep = (r_lightptr[1] - lightright) >> 4;

		for (i=0 ; i<16 ; i++)
		{
			// mankrip - begin
			if (ditherlight)
			{
				Y = i & 1;
				XY0a = lightmap_dither_kernel[1][Y];
				XY0b = lightmap_dither_kernel[0][Y];
			}
			else
			{
				XY0a = 0;
				XY0b = 0;
			}
			// mankrip - end

			lightstep = (lightleft - lightright) >> 4; // mankrip

			light = lightright;

			// mankrip - begin
			prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
			prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
			prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
			prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
			prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
			prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
			prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
			prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
			prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
			prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
			prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
			prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
			prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
			prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
			prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
			prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
			// mankrip - end

			psource += sourcetstep;
			lightright += lightrightstep;
			lightleft += lightleftstep;
			prowdest += surfrowbytes;
		}

		if (psource >= r_sourcemax)
			psource -= r_stepback;
	}
}


void R_DrawSurfaceBlock8_mip1_C (void)
{
	psource = pbasesource;
	prowdest = prowdestbase;

	for (b=0 ; b<r_numvblocks ; b++)
	{
	// FIXME: make these locals?
	// FIXME: use delta rather than both right and left, like ASM?
		lightleft = r_lightptr[0];
		lightright = r_lightptr[1];
		r_lightptr += r_lightwidth;
		lightleftstep = (r_lightptr[0] - lightleft) >> 3;
		lightrightstep = (r_lightptr[1] - lightright) >> 3;

		for (i=0 ; i<8 ; i++)
		{
			// mankrip - begin
			if (ditherlight)
			{
				Y = i & 1;
				XY0a = lightmap_dither_kernel[1][Y];
				XY0b = lightmap_dither_kernel[0][Y];
			}
			else
			{
				XY0a = 0;
				XY0b = 0;
			}
			// mankrip - end

			lightstep = (lightleft - lightright) >> 3; // mankrip

			light = lightright;

			// mankrip - begin
			prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
			prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
			prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
			prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
			prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
			prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
			prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
			prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
			// mankrip - end

			psource += sourcetstep;
			lightright += lightrightstep;
			lightleft += lightleftstep;
			prowdest += surfrowbytes;
		}

		if (psource >= r_sourcemax)
			psource -= r_stepback;
	}
}


void R_DrawSurfaceBlock8_mip2_C (void)
{
	psource = pbasesource;
	prowdest = prowdestbase;

	for (b=0 ; b<r_numvblocks ; b++)
	{
	// FIXME: make these locals?
	// FIXME: use delta rather than both right and left, like ASM?
		lightleft = r_lightptr[0];
		lightright = r_lightptr[1];
		r_lightptr += r_lightwidth;
		lightleftstep = (r_lightptr[0] - lightleft) >> 2;
		lightrightstep = (r_lightptr[1] - lightright) >> 2;

		for (i=0 ; i<4 ; i++)
		{
			// mankrip - begin
			if (ditherlight)
			{
				Y = i & 1;
				XY0a = lightmap_dither_kernel[1][Y];
				XY0b = lightmap_dither_kernel[0][Y];
			}
			else
			{
				XY0a = 0;
				XY0b = 0;
			}
			// mankrip - end

			lightstep = (lightleft - lightright) >> 2; // mankrip

			light = lightright;

			// mankrip - begin
			prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
			prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
			prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
			prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
			// mankrip - end

			psource += sourcetstep;
			lightright += lightrightstep;
			lightleft += lightleftstep;
			prowdest += surfrowbytes;
		}

		if (psource >= r_sourcemax)
			psource -= r_stepback;
	}
}


void R_DrawSurfaceBlock8_mip3_C (void)
{
	psource = pbasesource;
	prowdest = prowdestbase;
	// mankrip - begin
	if (ditherlight)
	{
		XY0a  = lightmap_dither_kernel[1][0];
		XY0b  = lightmap_dither_kernel[0][0];
		XY0a1 = lightmap_dither_kernel[1][1];
		XY0b1 = lightmap_dither_kernel[0][1];
	}
	else
	{
		XY0a  = 0;
		XY0b  = 0;
		XY0a1 = 0;
		XY0b1 = 0;
	}
	// mankrip - end

	for (b=0 ; b<r_numvblocks ; b++)
	{
	// FIXME: make these locals?
	// FIXME: use delta rather than both right and left, like ASM?
		lightleft = r_lightptr[0];
		lightright = r_lightptr[1];
		r_lightptr += r_lightwidth;
		lightleftstep = (r_lightptr[0] - lightleft) >> 1;
		lightrightstep = (r_lightptr[1] - lightright) >> 1;

		lightstep = (lightleft - lightright) >> 1; // mankrip
		light = lightright;
		// mankrip - begin
		prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
		prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
		// mankrip - end
		psource += sourcetstep;
		lightright += lightrightstep;
		lightleft += lightleftstep;
		prowdest += surfrowbytes;

		lightstep = (lightleft - lightright) >> 1; // mankrip
		light = lightright;
		// mankrip - begin
		prowdest[ 1] = vid.colormap[ ( (light + XY0a1) & 0xFF00) + psource[ 1]]; light += lightstep;
		prowdest[ 0] = vid.colormap[ ( (light + XY0b1) & 0xFF00) + psource[ 0]]; light += lightstep;
		// mankrip - end
		psource += sourcetstep;
		prowdest += surfrowbytes;

		if (psource >= r_sourcemax)
			psource -= r_stepback;
	}
}

You also need to create and register a cvar called d_ditherlight.

The theory is quite simple. The variables XY0a and XY0b means basically that we're filtering the dimension 0 (which is the only one) of the lighting along the XY coordinates of the destination. The a and b versions of the variable are just an optimization so we don't have to read the X coordinate on every pixel. Also, due to the nature of the surface cache drawing algorithm, X is always guaranteed to start at zero.
And the shading for each color is unidimensional, so dimension 0 is all we need. For bidimensional sources, like textures, the dimension 1 is also needed.

As you might have guessed, this code doesn't work with colored lighting. But my intention here was just to increase the quality of the original lighting, and not to add more features to it.

Also, don't forget to disable it on BSP entities that uses cel-shading, if your engine supports that feature.

This dithering may look too sharp on distant mip0 surfaces, so setting d_mipscale to a higher value, like 1.4, may be recommended to reduce artifacts like this on the lighting.

And here's the code for dithered lighting on the padded mip0 surfaces, which are used in Makaqu's texture dithering:

Code: Select all

void R_DrawSurfaceBlock8_mip0_Padded (void) // mankrip - for dithering
{
	// mankrip - begin
	// in most cases, square textures are tiled, and non-square textures aren't.
	qboolean tiled = (r_drawsurf.texture->width == r_drawsurf.texture->height);
	// mankrip - end
	prowdest = prowdestbase;
	// mankrip - begin
	if (d_dither.value)
	{
		if (tiled)
		{
			// duplicate the last line of texture, wrapping the texture
			psource = pbasesource + r_stepback - r_drawsurf.texture->width;
			if (psource >= r_sourcemax)
				psource -= r_stepback;
		}
		else
			psource = pbasesource;

		prowdest++; // skip the first column, we'll use -1 for that
		r_lightptr_start = r_lightptr; // duplicate the first line of light
		lightleft = r_lightptr_start[0];
		lightright = r_lightptr_start[1];
		r_lightptr_start += r_lightwidth;
		lightleftstep = (r_lightptr_start[0] - lightleft) >> 4;
		lightrightstep = (r_lightptr_start[1] - lightright) >> 4;

		if (ditherlight)
		{
			XY0a = lightmap_dither_kernel[1][1];
			XY0b = lightmap_dither_kernel[0][1];
		}
		else
		{
			XY0a = 0;
			XY0b = 0;
		}

		lightstep = (lightleft - lightright) >> 4;

		light = lightright;

		if (padr)
		prowdest[16] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ (tiled) ? ( (psource + 16 >= r_sourcemax) ? (16 - r_stepback) : 16) : 15]]; // right padding
		prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
		prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
		prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
		prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
		prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
		prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
		prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
		prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
		prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
		prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
		prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
		prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
		prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
		prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
		prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
		prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]];
		if (padl)
		prowdest[-1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ (tiled) ? ( (psource - 1 < 0) ? (r_stepback - 1) : -1) : 0]]; // left padding

		prowdest += surfrowbytes;
	}
	// mankrip - end
	psource = pbasesource;
	#if 0
	for (v=0 ; v<r_numvblocks ; v++)
	{
		for (i=0 ; i<16 ; i++)
		{
			if (padr)
			prowdest[16 + i * surfrowbytes] = 0;
			prowdest[15 + i * surfrowbytes] = 0;
			prowdest[14 + i * surfrowbytes] = 0;
			prowdest[13 + i * surfrowbytes] = 0;
			prowdest[12 + i * surfrowbytes] = 0;
			prowdest[11 + i * surfrowbytes] = 0;
			prowdest[10 + i * surfrowbytes] = 0;
			prowdest[ 9 + i * surfrowbytes] = 0;
			prowdest[ 8 + i * surfrowbytes] = 0;
			prowdest[ 7 + i * surfrowbytes] = 0;
			prowdest[ 6 + i * surfrowbytes] = 0;
			prowdest[ 5 + i * surfrowbytes] = 0;
			prowdest[ 4 + i * surfrowbytes] = 0;
			prowdest[ 3 + i * surfrowbytes] = 0;
			prowdest[ 2 + i * surfrowbytes] = 0;
			prowdest[ 1 + i * surfrowbytes] = 0;
			prowdest[ 0 + i * surfrowbytes] = 0;
			if (padl)
			prowdest[-1 + i * surfrowbytes] = 0;
		}
	}
	#endif

	for (b=0 ; b<r_numvblocks ; b++)
	{
	// FIXME: make these locals?
	// FIXME: use delta rather than both right and left, like ASM?
		lightleft = r_lightptr[0];
		lightright = r_lightptr[1];
		r_lightptr += r_lightwidth;
		lightleftstep = (r_lightptr[0] - lightleft) >> 4;
		lightrightstep = (r_lightptr[1] - lightright) >> 4;

		for (i=0 ; i<16 ; i++)
		{
			// mankrip - begin
			if (ditherlight)
			{
				Y = i & 1;
				XY0a = lightmap_dither_kernel[1][Y];
				XY0b = lightmap_dither_kernel[0][Y];
			}
			else
			{
				XY0a = 0;
				XY0b = 0;
			}

			lightstep = (lightleft - lightright) >> 4;
			// mankrip - end

			light = lightright;

			// mankrip - begin
			if (padr)
			prowdest[16] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ (tiled) ? ( (psource + 16 >= r_sourcemax) ? (16 - r_stepback) : 16) : 15]]; // right padding
			prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
			prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
			prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
			prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
			prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
			prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
			prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
			prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
			prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
			prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
			prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
			prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
			prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
			prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
			prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
			prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]];
			if (padl)
			prowdest[-1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ (tiled) ? ( (psource - 1 < 0) ? (r_stepback - 1) : -1) : 0]]; // left padding
			// mankrip - end

			psource += sourcetstep;
			lightright += lightrightstep;
			lightleft += lightleftstep;
			prowdest += surfrowbytes;
		}

		// mankrip - begin
		if (!tiled && b == r_numvblocks - 1)
			psource -= sourcetstep;
		// mankrip - end
		if (psource >= r_sourcemax)
			psource -= r_stepback;
	}
	// mankrip - begin
	if (d_dither.value)
	{
		if (ditherlight)
		{
			XY0a = lightmap_dither_kernel[1][0];
			XY0b = lightmap_dither_kernel[0][0];
		}
		else
		{
			XY0a = 0;
			XY0b = 0;
		}

		light -= lightstep * 15; // step back to the last line of light

		if (padr)
		prowdest[16] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ (tiled) ? ( (psource + 16 >= r_sourcemax) ? (16 - r_stepback) : 16) : 15]]; // right padding
		prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
		prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
		prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
		prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
		prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
		prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
		prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
		prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
		prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
		prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
		prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
		prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
		prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
		prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
		prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
		prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]];
		if (padl)
		prowdest[-1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ (tiled) ? ( (psource - 1 < 0) ? (r_stepback - 1) : -1) : 0]]; // left padding
	}
	// mankrip - end
}

mh · Post by mh » Thu Aug 29, 2013 6:13 pm

I'm wondering at this stage if there's better mileage in just biting the bullet and writing a 32-bit software renderer?

One theoretical way of doing this is by drawing 3 views - one for each of the R, G and B channels - to offscreen buffers, then combining them during write-out to the back buffer. You'd need 3 copies of each colormap, again one for each of R, G and B, but that's a lot more lightweight than lots of huge LUTs. An optimization would be to run the vertex transforms and Z test once only, then only the actual drawing runs 3 times. You could even reuse the original ASM code for much of this.

mankrip · Post by **mankrip** » Thu Aug 29, 2013 7:00 pm

Well, this algorithm is blazing fast. The only change for each texel is using (light + XY0a) (or (light + XY0b)) instead of light. It should be quite slower than the x86 ASM version, but someone with enough skills could port it to Abrash's code.

But the main point for me is that it keeps the original colors. In the screens I've posted, notice how the darker parts of the wall goes a bit cyan, and then goes somewhat between green and brown before going fully black.

To me, this is part of Quake's visual identity. In the software renderer, the darkest spots of the game are actually very rich with all those different and somewhat unexpected colors showing up in the shading. It gives a better sense of the ravaged, desolated places where the player goes, because the darker it is, the more chaotic it gets. This is the same reason why I like this kind of filtering in PrBoom's software renderer, and always wanted to do the same in Quake.

In hardware-accelerated engines, the shading looks too clean, too perfect, and that makes the atmosphere more bland.

However, I often think about how to implement colored lighting, and it's a case where I agree that 24 or 32 bit color should be better. Maps with colored lighting are almost always made for hardware-accelerated engines, so using true color rendering for them is the best to make them look as their authors intended.

About your ideas for optimization, the easiest way for doing that would be to switch the surface cache and virtual screen buffer for each color before calling the function to draw the spans. But since the dithered versions of those functions are in C, it would be more appropriate to convert them to 32-bit, using 32-bit surface caches.

Spike · Post by **Spike** » Thu Aug 29, 2013 10:33 pm

32bit surface caches rock, although depending on surface caches means you're stuck with 'nearest' filtering (including dithered nearest sampling).
you can do linear sampling with or without a surface cache - it just looks terrible in 8bit, and slow in 24+bit.

leileilol · Post by **leileilol** » Thu Aug 29, 2013 10:40 pm

mh wrote:One theoretical way of doing this is by drawing 3 views - one for each of the R, G and B channels - to offscreen buffers, then combining them during write-out to the back buffer. You'd need 3 copies of each colormap, again one for each of R, G and B, but that's a lot more lightweight than lots of huge LUTs. An optimization would be to run the vertex transforms and Z test once only, then only the actual drawing runs 3 times. You could even reuse the original ASM code for much of this.

I was planning to do this same thing (and have error-diffusion dithering similar to what i did for models at the span level combining a lighting surfacecache with a texture surfacecache (along with planned lighting features to go in the buffer similar to deferred rendering)) but I had trouble with multiple buffers arleady with the water. You'd see lagged behind frames of them.

I was also thinking about a lighting-only surfacecache too, but I get enough SHOWRAM spam with just one expanded-for-15bpp-rendering surfacecache anyway

Also I wonder what Makaqu's platform target is anymore. Is it still the Dreamcast? Is it still a 6th generation Win9x computer?

InsideQC Forums

Kernel-based dithering of BSP surface lighting (in software)

Kernel-based dithering of BSP surface lighting (in software)

Re: Kernel-based dithering of BSP surface lighting (in softw

Re: Kernel-based dithering of BSP surface lighting (in softw

Re: Kernel-based dithering of BSP surface lighting (in softw

Re: Kernel-based dithering of BSP surface lighting (in softw