Forum

Kernel-based dithering of BSP surface lighting (in software)

Post tutorials on how to do certain tasks within game or engine code here.

Moderator: InsideQC Admins

Kernel-based dithering of BSP surface lighting (in software)

Postby mankrip » Thu Aug 29, 2013 4:27 am

Before and after screenshots.

Open r_surf.c, and paste where appropriate:
Code: Select all
// mankrip - begin
static qboolean
   ditherlight
   ;
void
   R_DrawSurfaceBlock8_mip0_C (void)
,   R_DrawSurfaceBlock8_mip1_C (void)
,   R_DrawSurfaceBlock8_mip2_C (void)
,   R_DrawSurfaceBlock8_mip3_C (void)
   ;
// mankrip - end
void R_DrawSurfaceBlock8_mip0 (void);
void R_DrawSurfaceBlock8_mip1 (void);
void R_DrawSurfaceBlock8_mip2 (void);
void R_DrawSurfaceBlock8_mip3 (void);

static void   (*surfmiptable[4]) (void) =
{
   // mankrip - begin
,   R_DrawSurfaceBlock8_mip0_C
,   R_DrawSurfaceBlock8_mip1_C
,   R_DrawSurfaceBlock8_mip2_C
,   R_DrawSurfaceBlock8_mip3_C
   // mankrip - end
};

Code: Select all
void R_BuildLightMap (void)
{
   int         smax, tmax;
   int         t;
   int         i, size;
   byte      *lightmap;
   unsigned   scale;
   int         maps;
   int         count, remainder; // mankrip
   unsigned   *blocks; // mankrip
   msurface_t   *surf;

   surf = r_drawsurf.surf;

   smax = (surf->extents[0]>>4)+1;
   tmax = (surf->extents[1]>>4)+1;
   size = smax*tmax; // in most cases, 64*64

   // mankrip - fullbright BSP textures fix - begin
   if (r_fullbright.value)
   {
      memset (blocklights, 63 << 8, sizeof (unsigned) * size);
      return;
   }
   else if (!cl.worldmodel->lightdata)
   {
   //   memset (blocklights, 31 << 8, sizeof (unsigned) * size);
      memset (blocklights, (unsigned) neutral_light, sizeof (unsigned) * size);
      return;
   }
   // mankrip - fullbright BSP textures fix - end

// clear to ambient
   memset (blocklights, r_refdef.ambientlight << 8, sizeof (unsigned) * size); // mankrip

   lightmap = surf->samples;
   count = size >> 5; // mankrip
   remainder = size % 32; // mankrip

// add all the lightmaps
   if (lightmap)
      for (maps = 0 ; maps < MAXLIGHTMAPS && surf->styles[maps] != 255 ; maps++)
      {
      //   scale = r_drawsurf.lightadj[maps];   // 8.8 fraction
      //   if (!r_externbsp_lit.value || currententity != cl_entities && currententity->model->name[0] != '*') // mankrip
            scale = (unsigned) ( (float)r_drawsurf.lightadj[maps] * currententity->lightlevel); // mankrip
         for (i=0 ; i<size ; i++)
            blocklights[i] += lightmap[i] * scale;
         lightmap += size;   // skip to next lightmap
      }

// add all the dynamic lights
   if (surf->dlightframe == r_framecount)
      R_AddDynamicLights ();

// bound, invert, and shift
   // mankrip - begin
#if 1
   blocks = blocklights;
   if (ditherlight)
   {
      // extra clamping for dithered shading
      for (i = 0 ; i < count ; i++, blocks += 32)
      {
         t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (2 << 6)) ? (2 << 6) : t;
         t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (2 << 6)) ? (2 << 6) : t;
      }
      switch (remainder)
      {
         case 31: t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (2 << 6)) ? (2 << 6) : t;
         case 30: t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (2 << 6)) ? (2 << 6) : t;
         case 29: t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (2 << 6)) ? (2 << 6) : t;
         case 28: t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (2 << 6)) ? (2 << 6) : t;
         case 27: t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (2 << 6)) ? (2 << 6) : t;
         case 26: t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (2 << 6)) ? (2 << 6) : t;
         case 25: t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (2 << 6)) ? (2 << 6) : t;
         case 24: t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (2 << 6)) ? (2 << 6) : t;
         case 23: t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (2 << 6)) ? (2 << 6) : t;
         case 22: t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (2 << 6)) ? (2 << 6) : t;
         case 21: t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (2 << 6)) ? (2 << 6) : t;
         case 20: t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (2 << 6)) ? (2 << 6) : t;
         case 19: t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (2 << 6)) ? (2 << 6) : t;
         case 18: t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (2 << 6)) ? (2 << 6) : t;
         case 17: t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (2 << 6)) ? (2 << 6) : t;
         case 16: t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (2 << 6)) ? (2 << 6) : t;
         case 15: t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (2 << 6)) ? (2 << 6) : t;
         case 14: t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (2 << 6)) ? (2 << 6) : t;
         case 13: t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (2 << 6)) ? (2 << 6) : t;
         case 12: t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (2 << 6)) ? (2 << 6) : t;
         case 11: t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (2 << 6)) ? (2 << 6) : t;
         case 10: t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (2 << 6)) ? (2 << 6) : t;
         case  9: t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (2 << 6)) ? (2 << 6) : t;
         case  8: t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (2 << 6)) ? (2 << 6) : t;
         case  7: t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (2 << 6)) ? (2 << 6) : t;
         case  6: t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (2 << 6)) ? (2 << 6) : t;
         case  5: t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (2 << 6)) ? (2 << 6) : t;
         case  4: t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (2 << 6)) ? (2 << 6) : t;
         case  3: t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (2 << 6)) ? (2 << 6) : t;
         case  2: t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (2 << 6)) ? (2 << 6) : t;
         case  1: t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (2 << 6)) ? (2 << 6) : t;
         case  0: t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (2 << 6)) ? (2 << 6) : t;
         default: break;
      }
   }
   else
   {
      for (i = 0 ; i < count ; i++, blocks += 32)
      {
         t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (1 << 6)) ? (1 << 6) : t;
         t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (1 << 6)) ? (1 << 6) : t;
      }
      switch (remainder)
      {
         case 31: t = (255*256 - (int)blocks[31]) >> (8 - VID_CBITS); blocks[31] = (t < (1 << 6)) ? (1 << 6) : t;
         case 30: t = (255*256 - (int)blocks[30]) >> (8 - VID_CBITS); blocks[30] = (t < (1 << 6)) ? (1 << 6) : t;
         case 29: t = (255*256 - (int)blocks[29]) >> (8 - VID_CBITS); blocks[29] = (t < (1 << 6)) ? (1 << 6) : t;
         case 28: t = (255*256 - (int)blocks[28]) >> (8 - VID_CBITS); blocks[28] = (t < (1 << 6)) ? (1 << 6) : t;
         case 27: t = (255*256 - (int)blocks[27]) >> (8 - VID_CBITS); blocks[27] = (t < (1 << 6)) ? (1 << 6) : t;
         case 26: t = (255*256 - (int)blocks[26]) >> (8 - VID_CBITS); blocks[26] = (t < (1 << 6)) ? (1 << 6) : t;
         case 25: t = (255*256 - (int)blocks[25]) >> (8 - VID_CBITS); blocks[25] = (t < (1 << 6)) ? (1 << 6) : t;
         case 24: t = (255*256 - (int)blocks[24]) >> (8 - VID_CBITS); blocks[24] = (t < (1 << 6)) ? (1 << 6) : t;
         case 23: t = (255*256 - (int)blocks[23]) >> (8 - VID_CBITS); blocks[23] = (t < (1 << 6)) ? (1 << 6) : t;
         case 22: t = (255*256 - (int)blocks[22]) >> (8 - VID_CBITS); blocks[22] = (t < (1 << 6)) ? (1 << 6) : t;
         case 21: t = (255*256 - (int)blocks[21]) >> (8 - VID_CBITS); blocks[21] = (t < (1 << 6)) ? (1 << 6) : t;
         case 20: t = (255*256 - (int)blocks[20]) >> (8 - VID_CBITS); blocks[20] = (t < (1 << 6)) ? (1 << 6) : t;
         case 19: t = (255*256 - (int)blocks[19]) >> (8 - VID_CBITS); blocks[19] = (t < (1 << 6)) ? (1 << 6) : t;
         case 18: t = (255*256 - (int)blocks[18]) >> (8 - VID_CBITS); blocks[18] = (t < (1 << 6)) ? (1 << 6) : t;
         case 17: t = (255*256 - (int)blocks[17]) >> (8 - VID_CBITS); blocks[17] = (t < (1 << 6)) ? (1 << 6) : t;
         case 16: t = (255*256 - (int)blocks[16]) >> (8 - VID_CBITS); blocks[16] = (t < (1 << 6)) ? (1 << 6) : t;
         case 15: t = (255*256 - (int)blocks[15]) >> (8 - VID_CBITS); blocks[15] = (t < (1 << 6)) ? (1 << 6) : t;
         case 14: t = (255*256 - (int)blocks[14]) >> (8 - VID_CBITS); blocks[14] = (t < (1 << 6)) ? (1 << 6) : t;
         case 13: t = (255*256 - (int)blocks[13]) >> (8 - VID_CBITS); blocks[13] = (t < (1 << 6)) ? (1 << 6) : t;
         case 12: t = (255*256 - (int)blocks[12]) >> (8 - VID_CBITS); blocks[12] = (t < (1 << 6)) ? (1 << 6) : t;
         case 11: t = (255*256 - (int)blocks[11]) >> (8 - VID_CBITS); blocks[11] = (t < (1 << 6)) ? (1 << 6) : t;
         case 10: t = (255*256 - (int)blocks[10]) >> (8 - VID_CBITS); blocks[10] = (t < (1 << 6)) ? (1 << 6) : t;
         case  9: t = (255*256 - (int)blocks[ 9]) >> (8 - VID_CBITS); blocks[ 9] = (t < (1 << 6)) ? (1 << 6) : t;
         case  8: t = (255*256 - (int)blocks[ 8]) >> (8 - VID_CBITS); blocks[ 8] = (t < (1 << 6)) ? (1 << 6) : t;
         case  7: t = (255*256 - (int)blocks[ 7]) >> (8 - VID_CBITS); blocks[ 7] = (t < (1 << 6)) ? (1 << 6) : t;
         case  6: t = (255*256 - (int)blocks[ 6]) >> (8 - VID_CBITS); blocks[ 6] = (t < (1 << 6)) ? (1 << 6) : t;
         case  5: t = (255*256 - (int)blocks[ 5]) >> (8 - VID_CBITS); blocks[ 5] = (t < (1 << 6)) ? (1 << 6) : t;
         case  4: t = (255*256 - (int)blocks[ 4]) >> (8 - VID_CBITS); blocks[ 4] = (t < (1 << 6)) ? (1 << 6) : t;
         case  3: t = (255*256 - (int)blocks[ 3]) >> (8 - VID_CBITS); blocks[ 3] = (t < (1 << 6)) ? (1 << 6) : t;
         case  2: t = (255*256 - (int)blocks[ 2]) >> (8 - VID_CBITS); blocks[ 2] = (t < (1 << 6)) ? (1 << 6) : t;
         case  1: t = (255*256 - (int)blocks[ 1]) >> (8 - VID_CBITS); blocks[ 1] = (t < (1 << 6)) ? (1 << 6) : t;
         case  0: t = (255*256 - (int)blocks[ 0]) >> (8 - VID_CBITS); blocks[ 0] = (t < (1 << 6)) ? (1 << 6) : t;
         default: break;
      }
   }
#else
   // mankrip - end
   for (i=0 ; i<size ; i++)
   {
      t = (255*256 - (int)blocklights[i]) >> (8 - VID_CBITS);

      blocklights[i] = (t < (1 << 6)) ? (1 << 6) : t; // mankrip
   }
#endif
}

Code: Select all
void R_DrawSurface (void)
{
   unsigned char   *basetptr;
   int            smax, tmax, twidth;
   int            u;
   int            soffset, basetoffset, texwidth;
   int            horzblockstep;
   unsigned char   *pcolumndest;
   void         (*pblockdrawer)(void);
   texture_t      *mt;

   ditherlight = (d_ditherlight.value && !r_fullbright.value); // mankrip

[...]

}

Code: Select all
// mankrip - begin
int lightmap_dither_kernel[2][2] =
// same as dither_kernel[X][Y][0] >> 8
// set to >> 8 for more accuracy and less smoothness
{
   {
       -8192 >> 8
   ,    24576 >> 8
   }
,   {
        8192 >> 8
   ,   -24576 >> 8 // needs clamping, otherwise makes artifacts on overbright
   }
};
static int
// integers for dithering
//   u
//,   v
//,   X
   Y
,   XY0a
,   XY0b
,   XY0a1 // for mip3
,   XY0b1 // for mip3
   ;

static int
   b
,   i
,   lightstep
,   light
   ;
static byte
   * psource
,   * prowdest
   ;
// mankrip - end

Code: Select all
void R_DrawSurfaceBlock8_mip0_C (void)
{
   psource = pbasesource;
   prowdest = prowdestbase;

   for (b=0 ; b<r_numvblocks ; b++)
   {
   // FIXME: make these locals?
   // FIXME: use delta rather than both right and left, like ASM?
      lightleft = r_lightptr[0];
      lightright = r_lightptr[1];
      r_lightptr += r_lightwidth;
      lightleftstep = (r_lightptr[0] - lightleft) >> 4;
      lightrightstep = (r_lightptr[1] - lightright) >> 4;

      for (i=0 ; i<16 ; i++)
      {
         // mankrip - begin
         if (ditherlight)
         {
            Y = i & 1;
            XY0a = lightmap_dither_kernel[1][Y];
            XY0b = lightmap_dither_kernel[0][Y];
         }
         else
         {
            XY0a = 0;
            XY0b = 0;
         }
         // mankrip - end

         lightstep = (lightleft - lightright) >> 4; // mankrip

         light = lightright;

         // mankrip - begin
         prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
         prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
         prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
         prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
         prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
         prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
         prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
         prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
         prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
         prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
         prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
         prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
         prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
         prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
         prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
         prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
         // mankrip - end

         psource += sourcetstep;
         lightright += lightrightstep;
         lightleft += lightleftstep;
         prowdest += surfrowbytes;
      }

      if (psource >= r_sourcemax)
         psource -= r_stepback;
   }
}


void R_DrawSurfaceBlock8_mip1_C (void)
{
   psource = pbasesource;
   prowdest = prowdestbase;

   for (b=0 ; b<r_numvblocks ; b++)
   {
   // FIXME: make these locals?
   // FIXME: use delta rather than both right and left, like ASM?
      lightleft = r_lightptr[0];
      lightright = r_lightptr[1];
      r_lightptr += r_lightwidth;
      lightleftstep = (r_lightptr[0] - lightleft) >> 3;
      lightrightstep = (r_lightptr[1] - lightright) >> 3;

      for (i=0 ; i<8 ; i++)
      {
         // mankrip - begin
         if (ditherlight)
         {
            Y = i & 1;
            XY0a = lightmap_dither_kernel[1][Y];
            XY0b = lightmap_dither_kernel[0][Y];
         }
         else
         {
            XY0a = 0;
            XY0b = 0;
         }
         // mankrip - end

         lightstep = (lightleft - lightright) >> 3; // mankrip

         light = lightright;

         // mankrip - begin
         prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
         prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
         prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
         prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
         prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
         prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
         prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
         prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
         // mankrip - end

         psource += sourcetstep;
         lightright += lightrightstep;
         lightleft += lightleftstep;
         prowdest += surfrowbytes;
      }

      if (psource >= r_sourcemax)
         psource -= r_stepback;
   }
}


void R_DrawSurfaceBlock8_mip2_C (void)
{
   psource = pbasesource;
   prowdest = prowdestbase;

   for (b=0 ; b<r_numvblocks ; b++)
   {
   // FIXME: make these locals?
   // FIXME: use delta rather than both right and left, like ASM?
      lightleft = r_lightptr[0];
      lightright = r_lightptr[1];
      r_lightptr += r_lightwidth;
      lightleftstep = (r_lightptr[0] - lightleft) >> 2;
      lightrightstep = (r_lightptr[1] - lightright) >> 2;

      for (i=0 ; i<4 ; i++)
      {
         // mankrip - begin
         if (ditherlight)
         {
            Y = i & 1;
            XY0a = lightmap_dither_kernel[1][Y];
            XY0b = lightmap_dither_kernel[0][Y];
         }
         else
         {
            XY0a = 0;
            XY0b = 0;
         }
         // mankrip - end

         lightstep = (lightleft - lightright) >> 2; // mankrip

         light = lightright;

         // mankrip - begin
         prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
         prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
         prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
         prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
         // mankrip - end

         psource += sourcetstep;
         lightright += lightrightstep;
         lightleft += lightleftstep;
         prowdest += surfrowbytes;
      }

      if (psource >= r_sourcemax)
         psource -= r_stepback;
   }
}


void R_DrawSurfaceBlock8_mip3_C (void)
{
   psource = pbasesource;
   prowdest = prowdestbase;
   // mankrip - begin
   if (ditherlight)
   {
      XY0a  = lightmap_dither_kernel[1][0];
      XY0b  = lightmap_dither_kernel[0][0];
      XY0a1 = lightmap_dither_kernel[1][1];
      XY0b1 = lightmap_dither_kernel[0][1];
   }
   else
   {
      XY0a  = 0;
      XY0b  = 0;
      XY0a1 = 0;
      XY0b1 = 0;
   }
   // mankrip - end

   for (b=0 ; b<r_numvblocks ; b++)
   {
   // FIXME: make these locals?
   // FIXME: use delta rather than both right and left, like ASM?
      lightleft = r_lightptr[0];
      lightright = r_lightptr[1];
      r_lightptr += r_lightwidth;
      lightleftstep = (r_lightptr[0] - lightleft) >> 1;
      lightrightstep = (r_lightptr[1] - lightright) >> 1;

      lightstep = (lightleft - lightright) >> 1; // mankrip
      light = lightright;
      // mankrip - begin
      prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
      prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]]; light += lightstep;
      // mankrip - end
      psource += sourcetstep;
      lightright += lightrightstep;
      lightleft += lightleftstep;
      prowdest += surfrowbytes;

      lightstep = (lightleft - lightright) >> 1; // mankrip
      light = lightright;
      // mankrip - begin
      prowdest[ 1] = vid.colormap[ ( (light + XY0a1) & 0xFF00) + psource[ 1]]; light += lightstep;
      prowdest[ 0] = vid.colormap[ ( (light + XY0b1) & 0xFF00) + psource[ 0]]; light += lightstep;
      // mankrip - end
      psource += sourcetstep;
      prowdest += surfrowbytes;

      if (psource >= r_sourcemax)
         psource -= r_stepback;
   }
}

You also need to create and register a cvar called d_ditherlight.

The theory is quite simple. The variables XY0a and XY0b means basically that we're filtering the dimension 0 (which is the only one) of the lighting along the XY coordinates of the destination. The a and b versions of the variable are just an optimization so we don't have to read the X coordinate on every pixel. Also, due to the nature of the surface cache drawing algorithm, X is always guaranteed to start at zero.
And the shading for each color is unidimensional, so dimension 0 is all we need. For bidimensional sources, like textures, the dimension 1 is also needed.

As you might have guessed, this code doesn't work with colored lighting. But my intention here was just to increase the quality of the original lighting, and not to add more features to it.

Also, don't forget to disable it on BSP entities that uses cel-shading, if your engine supports that feature.

This dithering may look too sharp on distant mip0 surfaces, so setting d_mipscale to a higher value, like 1.4, may be recommended to reduce artifacts like this on the lighting.

And here's the code for dithered lighting on the padded mip0 surfaces, which are used in Makaqu's texture dithering:
Code: Select all
void R_DrawSurfaceBlock8_mip0_Padded (void) // mankrip - for dithering
{
   // mankrip - begin
   // in most cases, square textures are tiled, and non-square textures aren't.
   qboolean tiled = (r_drawsurf.texture->width == r_drawsurf.texture->height);
   // mankrip - end
   prowdest = prowdestbase;
   // mankrip - begin
   if (d_dither.value)
   {
      if (tiled)
      {
         // duplicate the last line of texture, wrapping the texture
         psource = pbasesource + r_stepback - r_drawsurf.texture->width;
         if (psource >= r_sourcemax)
            psource -= r_stepback;
      }
      else
         psource = pbasesource;

      prowdest++; // skip the first column, we'll use -1 for that
      r_lightptr_start = r_lightptr; // duplicate the first line of light
      lightleft = r_lightptr_start[0];
      lightright = r_lightptr_start[1];
      r_lightptr_start += r_lightwidth;
      lightleftstep = (r_lightptr_start[0] - lightleft) >> 4;
      lightrightstep = (r_lightptr_start[1] - lightright) >> 4;

      if (ditherlight)
      {
         XY0a = lightmap_dither_kernel[1][1];
         XY0b = lightmap_dither_kernel[0][1];
      }
      else
      {
         XY0a = 0;
         XY0b = 0;
      }

      lightstep = (lightleft - lightright) >> 4;

      light = lightright;

      if (padr)
      prowdest[16] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ (tiled) ? ( (psource + 16 >= r_sourcemax) ? (16 - r_stepback) : 16) : 15]]; // right padding
      prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
      prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
      prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
      prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
      prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
      prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
      prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
      prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
      prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
      prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
      prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
      prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
      prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
      prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
      prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
      prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]];
      if (padl)
      prowdest[-1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ (tiled) ? ( (psource - 1 < 0) ? (r_stepback - 1) : -1) : 0]]; // left padding

      prowdest += surfrowbytes;
   }
   // mankrip - end
   psource = pbasesource;
   #if 0
   for (v=0 ; v<r_numvblocks ; v++)
   {
      for (i=0 ; i<16 ; i++)
      {
         if (padr)
         prowdest[16 + i * surfrowbytes] = 0;
         prowdest[15 + i * surfrowbytes] = 0;
         prowdest[14 + i * surfrowbytes] = 0;
         prowdest[13 + i * surfrowbytes] = 0;
         prowdest[12 + i * surfrowbytes] = 0;
         prowdest[11 + i * surfrowbytes] = 0;
         prowdest[10 + i * surfrowbytes] = 0;
         prowdest[ 9 + i * surfrowbytes] = 0;
         prowdest[ 8 + i * surfrowbytes] = 0;
         prowdest[ 7 + i * surfrowbytes] = 0;
         prowdest[ 6 + i * surfrowbytes] = 0;
         prowdest[ 5 + i * surfrowbytes] = 0;
         prowdest[ 4 + i * surfrowbytes] = 0;
         prowdest[ 3 + i * surfrowbytes] = 0;
         prowdest[ 2 + i * surfrowbytes] = 0;
         prowdest[ 1 + i * surfrowbytes] = 0;
         prowdest[ 0 + i * surfrowbytes] = 0;
         if (padl)
         prowdest[-1 + i * surfrowbytes] = 0;
      }
   }
   #endif

   for (b=0 ; b<r_numvblocks ; b++)
   {
   // FIXME: make these locals?
   // FIXME: use delta rather than both right and left, like ASM?
      lightleft = r_lightptr[0];
      lightright = r_lightptr[1];
      r_lightptr += r_lightwidth;
      lightleftstep = (r_lightptr[0] - lightleft) >> 4;
      lightrightstep = (r_lightptr[1] - lightright) >> 4;

      for (i=0 ; i<16 ; i++)
      {
         // mankrip - begin
         if (ditherlight)
         {
            Y = i & 1;
            XY0a = lightmap_dither_kernel[1][Y];
            XY0b = lightmap_dither_kernel[0][Y];
         }
         else
         {
            XY0a = 0;
            XY0b = 0;
         }

         lightstep = (lightleft - lightright) >> 4;
         // mankrip - end

         light = lightright;

         // mankrip - begin
         if (padr)
         prowdest[16] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ (tiled) ? ( (psource + 16 >= r_sourcemax) ? (16 - r_stepback) : 16) : 15]]; // right padding
         prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
         prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
         prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
         prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
         prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
         prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
         prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
         prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
         prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
         prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
         prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
         prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
         prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
         prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
         prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
         prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]];
         if (padl)
         prowdest[-1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ (tiled) ? ( (psource - 1 < 0) ? (r_stepback - 1) : -1) : 0]]; // left padding
         // mankrip - end

         psource += sourcetstep;
         lightright += lightrightstep;
         lightleft += lightleftstep;
         prowdest += surfrowbytes;
      }

      // mankrip - begin
      if (!tiled && b == r_numvblocks - 1)
         psource -= sourcetstep;
      // mankrip - end
      if (psource >= r_sourcemax)
         psource -= r_stepback;
   }
   // mankrip - begin
   if (d_dither.value)
   {
      if (ditherlight)
      {
         XY0a = lightmap_dither_kernel[1][0];
         XY0b = lightmap_dither_kernel[0][0];
      }
      else
      {
         XY0a = 0;
         XY0b = 0;
      }

      light -= lightstep * 15; // step back to the last line of light

      if (padr)
      prowdest[16] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ (tiled) ? ( (psource + 16 >= r_sourcemax) ? (16 - r_stepback) : 16) : 15]]; // right padding
      prowdest[15] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[15]]; light += lightstep;
      prowdest[14] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[14]]; light += lightstep;
      prowdest[13] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[13]]; light += lightstep;
      prowdest[12] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[12]]; light += lightstep;
      prowdest[11] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[11]]; light += lightstep;
      prowdest[10] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[10]]; light += lightstep;
      prowdest[ 9] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 9]]; light += lightstep;
      prowdest[ 8] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 8]]; light += lightstep;
      prowdest[ 7] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 7]]; light += lightstep;
      prowdest[ 6] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 6]]; light += lightstep;
      prowdest[ 5] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 5]]; light += lightstep;
      prowdest[ 4] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 4]]; light += lightstep;
      prowdest[ 3] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 3]]; light += lightstep;
      prowdest[ 2] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 2]]; light += lightstep;
      prowdest[ 1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ 1]]; light += lightstep;
      prowdest[ 0] = vid.colormap[ ( (light + XY0b) & 0xFF00) + psource[ 0]];
      if (padl)
      prowdest[-1] = vid.colormap[ ( (light + XY0a) & 0xFF00) + psource[ (tiled) ? ( (psource - 1 < 0) ? (r_stepback - 1) : -1) : 0]]; // left padding
   }
   // mankrip - end
}
Ph'nglui mglw'nafh mankrip Hell's end wgah'nagl fhtagn.
==-=-=-=-=-=-=-=-=-=-==
Dev blog / Twitter / YouTube
User avatar
mankrip
 
Posts: 915
Joined: Fri Jul 04, 2008 3:02 am

Re: Kernel-based dithering of BSP surface lighting (in softw

Postby mh » Thu Aug 29, 2013 6:13 pm

I'm wondering at this stage if there's better mileage in just biting the bullet and writing a 32-bit software renderer?

One theoretical way of doing this is by drawing 3 views - one for each of the R, G and B channels - to offscreen buffers, then combining them during write-out to the back buffer. You'd need 3 copies of each colormap, again one for each of R, G and B, but that's a lot more lightweight than lots of huge LUTs. An optimization would be to run the vertex transforms and Z test once only, then only the actual drawing runs 3 times. You could even reuse the original ASM code for much of this.
We had the power, we had the space, we had a sense of time and place
We knew the words, we knew the score, we knew what we were fighting for
User avatar
mh
 
Posts: 2287
Joined: Sat Jan 12, 2008 1:38 am

Re: Kernel-based dithering of BSP surface lighting (in softw

Postby mankrip » Thu Aug 29, 2013 7:00 pm

Well, this algorithm is blazing fast. The only change for each texel is using (light + XY0a) (or (light + XY0b)) instead of light. It should be quite slower than the x86 ASM version, but someone with enough skills could port it to Abrash's code.

But the main point for me is that it keeps the original colors. In the screens I've posted, notice how the darker parts of the wall goes a bit cyan, and then goes somewhat between green and brown before going fully black.

To me, this is part of Quake's visual identity. In the software renderer, the darkest spots of the game are actually very rich with all those different and somewhat unexpected colors showing up in the shading. It gives a better sense of the ravaged, desolated places where the player goes, because the darker it is, the more chaotic it gets. This is the same reason why I like this kind of filtering in PrBoom's software renderer, and always wanted to do the same in Quake.

In hardware-accelerated engines, the shading looks too clean, too perfect, and that makes the atmosphere more bland.

However, I often think about how to implement colored lighting, and it's a case where I agree that 24 or 32 bit color should be better. Maps with colored lighting are almost always made for hardware-accelerated engines, so using true color rendering for them is the best to make them look as their authors intended.

About your ideas for optimization, the easiest way for doing that would be to switch the surface cache and virtual screen buffer for each color before calling the function to draw the spans. But since the dithered versions of those functions are in C, it would be more appropriate to convert them to 32-bit, using 32-bit surface caches.
Ph'nglui mglw'nafh mankrip Hell's end wgah'nagl fhtagn.
==-=-=-=-=-=-=-=-=-=-==
Dev blog / Twitter / YouTube
User avatar
mankrip
 
Posts: 915
Joined: Fri Jul 04, 2008 3:02 am

Re: Kernel-based dithering of BSP surface lighting (in softw

Postby Spike » Thu Aug 29, 2013 10:33 pm

32bit surface caches rock, although depending on surface caches means you're stuck with 'nearest' filtering (including dithered nearest sampling).
you can do linear sampling with or without a surface cache - it just looks terrible in 8bit, and slow in 24+bit.
Spike
 
Posts: 2883
Joined: Fri Nov 05, 2004 3:12 am
Location: UK

Re: Kernel-based dithering of BSP surface lighting (in softw

Postby leileilol » Thu Aug 29, 2013 10:40 pm

mh wrote:One theoretical way of doing this is by drawing 3 views - one for each of the R, G and B channels - to offscreen buffers, then combining them during write-out to the back buffer. You'd need 3 copies of each colormap, again one for each of R, G and B, but that's a lot more lightweight than lots of huge LUTs. An optimization would be to run the vertex transforms and Z test once only, then only the actual drawing runs 3 times. You could even reuse the original ASM code for much of this.



I was planning to do this same thing (and have error-diffusion dithering similar to what i did for models at the span level combining a lighting surfacecache with a texture surfacecache (along with planned lighting features to go in the buffer similar to deferred rendering)) but I had trouble with multiple buffers arleady with the water. You'd see lagged behind frames of them.

I was also thinking about a lighting-only surfacecache too, but I get enough SHOWRAM spam with just one expanded-for-15bpp-rendering surfacecache anyway


Also I wonder what Makaqu's platform target is anymore. Is it still the Dreamcast? Is it still a 6th generation Win9x computer?
i should not be here
leileilol
 
Posts: 2783
Joined: Fri Oct 15, 2004 3:23 am


Return to Programming Tutorials

Who is online

Users browsing this forum: No registered users and 1 guest