Several of the maps have a very heavy dynamic/animated light content, with almost every surface in a scene having some kind of dynamic lighting on it, so the performance of dynamic light updates is utterly critical to overall performance of the engine. Most of the time it was perfectly fine, but there was one machine I tested on where things ground down to 10 FPS.
After trying various workarounds (PBOs, updating per-surface vs bulk updating), I finally did what I should have done in the first place and ground out some code to measure actual timings inside glTexSubImage2D. The results were quite surprising.
GLQuake by default uses internal format GL_RGBA, format GL_RGBA and type GL_UNSIGNED_BYTE. However, the fastest mode in all cases (even on machines that didn't slow down) turned out to actually be GL_RGBA/GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV. This needs OpenGL 1.2 support so I guess 3DFX owners are excluded, but everyone else can join the party.
Measurements:
Code: Select all
mode: 0 320ms [GL_RGBA/GL_UNSIGNED_BYTE] (OK)
mode: 1 317ms [GL_BGRA/GL_UNSIGNED_BYTE] (OK)
mode: 2 377ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8] (OK)
mode: 3 375ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8] (OK)
mode: 4 376ms [GL_RGBA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK)
mode: 5 12ms [GL_BGRA/GL_UNSIGNED_INT_8_8_8_8_REV] (OK)
Code: Select all
#define WINDOW_WIDTH 800
#define WINDOW_HEIGHT 600
#include "SDL.h"
#include "SDL_opengl.h"
#pragma comment (lib, "SDL.lib")
#pragma comment (lib, "SDLmain.lib")
#pragma comment (lib, "opengl32.lib")
#define TEX_WIDTH 512
#define TEX_HEIGHT 512
unsigned int sibuffer[TEX_WIDTH * TEX_HEIGHT];
unsigned int teximage = 0;
unsigned int framecount = 0;
// find the fastest modes to use for glTexSubImage2D
typedef struct tsitest_s
{
char formatstr[64];
char typestr[64];
GLenum format;
GLenum type;
int modespeed;
bool failed;
} tsitest_t;
tsitest_t tsimodes[] =
{
{"GL_RGBA", "GL_UNSIGNED_BYTE", GL_RGBA, GL_UNSIGNED_BYTE, 666, true},
{"GL_BGRA", "GL_UNSIGNED_BYTE", GL_BGRA, GL_UNSIGNED_BYTE, 666, true},
{"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
{"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8, 666, true},
{"GL_RGBA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true},
{"GL_BGRA", "GL_UNSIGNED_INT_8_8_8_8_REV", GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, 666, true}
};
int fastest = 666;
int bestspeed = 32768;
GLuint R_MakeMeATexture (int width, int height, GLenum format, GLenum type)
{
GLuint texnum = 0;
glEnable (GL_TEXTURE_2D);
glGenTextures (1, &texnum);
glBindTexture (GL_TEXTURE_2D, texnum);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri (GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D (GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, format, type, NULL);
// commit the buffer so that timings are valid
glFinish ();
return texnum;
}
void R_SetTSIMode (void)
{
int numtsimodes = sizeof (tsimodes) / sizeof (tsitest_t);
for (int i = 0; i < numtsimodes; i++)
{
// clear last the error (if any)
glGetError ();
// create a new texture object
GLuint texnum = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[i].format, tsimodes[i].type);
Uint32 start = SDL_GetTicks ();
// SDL_GetTicks has insufficient resolution to measure one call so we need to run a few of them
for (int t = 0; t < 16; t++)
glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[i].format, tsimodes[i].type, sibuffer);
Uint32 end = SDL_GetTicks ();
// commit the buffer so that timings are valid
glFinish ();
glDeleteTextures (1, &texnum);
if (glGetError () != GL_NO_ERROR)
tsimodes[i].failed = true;
else tsimodes[i].failed = false;
tsimodes[i].modespeed = (end - start);
}
for (int i = 0; i < numtsimodes; i++)
{
printf ("mode: %i %4ims [%s/%s] (%s)\n", i, tsimodes[i].modespeed, tsimodes[i].formatstr,
tsimodes[i].typestr, tsimodes[i].failed ? "FAILED" : "OK");
if (tsimodes[i].modespeed <= bestspeed && !tsimodes[i].failed)
{
bestspeed = tsimodes[i].modespeed;
fastest = i;
}
}
if (fastest == 666)
{
MessageBox (NULL, "Failed to find a format!", "Error", MB_OK | MB_ICONSTOP);
exit (0);
}
}
void RenderOpenGL (void)
{
framecount++;
for (int i = 0, w = 0; w < TEX_WIDTH; w++)
{
for (int h = 0; h < TEX_HEIGHT; h++, i++)
{
unsigned char *rgba = (unsigned char *) &sibuffer[i];
// 2 == red, 1 == green, 0 == blue
rgba[2] = ((h * i) + framecount) & 255;
rgba[1] = ((w * h) + framecount) & 255;
rgba[0] = ((w * i) + framecount) & 255;
rgba[3] = 255;
}
}
glClear (GL_COLOR_BUFFER_BIT);
glViewport (0, 0, WINDOW_WIDTH, WINDOW_HEIGHT);
glMatrixMode (GL_MODELVIEW);
glLoadIdentity ();
glMatrixMode (GL_PROJECTION);
glLoadIdentity ();
glOrtho (0, WINDOW_WIDTH, WINDOW_HEIGHT, 0, -99999, 99999);
glBindTexture (GL_TEXTURE_2D, teximage);
glTexEnvi (GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
glTexSubImage2D (GL_TEXTURE_2D, 0, 0, 0, TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type, sibuffer);
glBegin (GL_QUADS);
glTexCoord2f (0, 0);
glVertex2f (0, 0);
glTexCoord2f (1, 0);
glVertex2f (TEX_WIDTH, 0);
glTexCoord2f (1, 1);
glVertex2f (TEX_WIDTH, TEX_HEIGHT);
glTexCoord2f (0, 1);
glVertex2f (0, TEX_HEIGHT);
glEnd ();
}
int main (int argc, char *argv[])
{
if (SDL_Init (SDL_INIT_VIDEO | SDL_INIT_NOPARACHUTE) != 0)
{
printf ("Unable to initialize SDL: %s\n", SDL_GetError ());
return 1;
}
SDL_GL_SetAttribute (SDL_GL_DOUBLEBUFFER, 1);
SDL_Surface *screen = SDL_SetVideoMode (WINDOW_WIDTH, WINDOW_HEIGHT, 32, SDL_OPENGL);
R_SetTSIMode ();
teximage = R_MakeMeATexture (TEX_WIDTH, TEX_HEIGHT, tsimodes[fastest].format, tsimodes[fastest].type);
glClearColor (0, 0, 0, 1);
int done = 0;
SDL_Event evt;
while (!done)
{
while (!done && SDL_PollEvent (&evt))
{
if (evt.type == SDL_QUIT)
{
done = 1;
break;
}
}
// run the screen update here
RenderOpenGL ();
SDL_GL_SwapBuffers ();
}
return 0;
}
It's definitely worthwhile including something like this in your video startup code and adjusting your formats and types.