reckless wrote:Nice one

id like a look at the code unless its (propriarity)

Don't say you weren't warned...! (OpenGL can get really ugly sometimes)
Code: Select all
!!ARBvp1.0
TEMP transnorm, dot, dotlow, dothigh;
TEMP transvert;
DPH transvert.x, vertex.position, vertex.texcoord[1];
DPH transvert.y, vertex.position, vertex.texcoord[2];
DPH transvert.z, vertex.position, vertex.texcoord[3];
MOV transvert.w, vertex.position.w;
TEMP outtmp;
DP4 outtmp.x, state.matrix.mvp.row[0], transvert;
DP4 outtmp.y, state.matrix.mvp.row[1], transvert;
DP4 outtmp.z, state.matrix.mvp.row[2], transvert;
DP4 outtmp.w, state.matrix.mvp.row[3], transvert;
MOV result.position, outtmp;
MOV result.fogcoord, outtmp.z;
DP3 transnorm.x, vertex.normal, vertex.texcoord[1];
DP3 transnorm.y, vertex.normal, vertex.texcoord[2];
DP3 transnorm.z, vertex.normal, vertex.texcoord[3];
MUL result.texcoord[0], vertex.texcoord[0], program.local[0];
MUL result.texcoord[1], vertex.texcoord[0], program.local[1];
DP3 dot, transnorm, program.env[0];
ADD dothigh, dot, 1.0;
MAD dotlow, dot, 0.2954545, 1.0;
MAX result.texcoord[2], dotlow, dothigh;
END
Code: Select all
glClientActiveTexture (GL_TEXTURE1);
glEnableClientState (GL_TEXTURE_COORD_ARRAY);
glTexCoordPointer (4, GL_FLOAT, sizeof (matrix3x4_t), ((matrix3x4_t *) scratchbuf)->a);
glClientActiveTexture (GL_TEXTURE2);
glEnableClientState (GL_TEXTURE_COORD_ARRAY);
glTexCoordPointer (4, GL_FLOAT, sizeof (matrix3x4_t), ((matrix3x4_t *) scratchbuf)->b);
glClientActiveTexture (GL_TEXTURE3);
glEnableClientState (GL_TEXTURE_COORD_ARRAY);
glTexCoordPointer (4, GL_FLOAT, sizeof (matrix3x4_t), ((matrix3x4_t *) scratchbuf)->c);
Code: Select all
// valiant attempt at clawing back some CPU from IQM animation #1
void Matrix3x4_ScaleAdd (matrix3x4_t *out, matrix3x4_t *base, float scale, matrix3x4_t *add)
{
out->a[0] = base->a[0] * scale + add->a[0];
out->a[1] = base->a[1] * scale + add->a[1];
out->a[2] = base->a[2] * scale + add->a[2];
out->a[3] = base->a[3] * scale + add->a[3];
out->b[0] = base->b[0] * scale + add->b[0];
out->b[1] = base->b[1] * scale + add->b[1];
out->b[2] = base->b[2] * scale + add->b[2];
out->b[3] = base->b[3] * scale + add->b[3];
out->c[0] = base->c[0] * scale + add->c[0];
out->c[1] = base->c[1] * scale + add->c[1];
out->c[2] = base->c[2] * scale + add->c[2];
out->c[3] = base->c[3] * scale + add->c[3];
}
// valiant attempt at clawing back some CPU from IQM animation #2
void Matrix3x4_ScaleScaleAdd (matrix3x4_t *out, matrix3x4_t *base1, float scale1, matrix3x4_t *base2, float scale2)
{
out->a[0] = ((base2->a[0] - base1->a[0]) * scale2) + base1->a[0];
out->a[1] = ((base2->a[1] - base1->a[1]) * scale2) + base1->a[1];
out->a[2] = ((base2->a[2] - base1->a[2]) * scale2) + base1->a[2];
out->a[3] = ((base2->a[3] - base1->a[3]) * scale2) + base1->a[3];
out->b[0] = ((base2->b[0] - base1->b[0]) * scale2) + base1->b[0];
out->b[1] = ((base2->b[1] - base1->b[1]) * scale2) + base1->b[1];
out->b[2] = ((base2->b[2] - base1->b[2]) * scale2) + base1->b[2];
out->b[3] = ((base2->b[3] - base1->b[3]) * scale2) + base1->b[3];
out->c[0] = ((base2->c[0] - base1->c[0]) * scale2) + base1->c[0];
out->c[1] = ((base2->c[1] - base1->c[1]) * scale2) + base1->c[1];
out->c[2] = ((base2->c[2] - base1->c[2]) * scale2) + base1->c[2];
out->c[3] = ((base2->c[3] - base1->c[3]) * scale2) + base1->c[3];
}
// valiant attempt at clawing back some CPU from IQM animation #3
void Matrix3x4_MultiplyByRef (matrix3x4_t *out, matrix3x4_t *mat1, matrix3x4_t *mat2)
{
// this was slower than it should be; it's already slow, let's not make it worse
out->a[0] = (mat2->a[0] * mat1->a[0]) + (mat2->b[0] * mat1->a[1]) + (mat2->c[0] * mat1->a[2]);
out->a[1] = (mat2->a[1] * mat1->a[0]) + (mat2->b[1] * mat1->a[1]) + (mat2->c[1] * mat1->a[2]);
out->a[2] = (mat2->a[2] * mat1->a[0]) + (mat2->b[2] * mat1->a[1]) + (mat2->c[2] * mat1->a[2]);
out->a[3] = (mat2->a[3] * mat1->a[0]) + (mat2->b[3] * mat1->a[1]) + (mat2->c[3] * mat1->a[2]) + mat1->a[3];
out->b[0] = (mat2->a[0] * mat1->b[0]) + (mat2->b[0] * mat1->b[1]) + (mat2->c[0] * mat1->b[2]);
out->b[1] = (mat2->a[1] * mat1->b[0]) + (mat2->b[1] * mat1->b[1]) + (mat2->c[1] * mat1->b[2]);
out->b[2] = (mat2->a[2] * mat1->b[0]) + (mat2->b[2] * mat1->b[1]) + (mat2->c[2] * mat1->b[2]);
out->b[3] = (mat2->a[3] * mat1->b[0]) + (mat2->b[3] * mat1->b[1]) + (mat2->c[3] * mat1->b[2]) + mat1->b[3];
out->c[0] = (mat2->a[0] * mat1->c[0]) + (mat2->b[0] * mat1->c[1]) + (mat2->c[0] * mat1->c[2]);
out->c[1] = (mat2->a[1] * mat1->c[0]) + (mat2->b[1] * mat1->c[1]) + (mat2->c[1] * mat1->c[2]);
out->c[2] = (mat2->a[2] * mat1->c[0]) + (mat2->b[2] * mat1->c[1]) + (mat2->c[2] * mat1->c[2]);
out->c[3] = (mat2->a[3] * mat1->c[0]) + (mat2->b[3] * mat1->c[1]) + (mat2->c[3] * mat1->c[2]) + mat1->c[3];
}
void GL_AnimateIQMFrame (iqmdata_t *iqm, int currframe, int lastframe, float lerp)
{
int i;
int frame1 = lastframe;
int frame2 = currframe;
matrix3x4_t *destmatrix = (matrix3x4_t *) scratchbuf;
// frame sanity - prevent from going out of bounds
frame1 %= iqm->num_poses;
frame2 %= iqm->num_poses;
// test for flipping the frames
if (frame1 == iqm->cachedcurrframe && frame2 == iqm->cachedlastframe)
{
int temp = frame1;
frame1 = frame2;
frame2 = temp;
lerp = 1.0f - lerp;
}
// coarsen the lerp so that we don't need to recache too often
// this animates at 50 FPS which should be enough for anyone
lerp = (float) ((int) (lerp * 5)) / 5;
// if the cached frames and lerp don't change there is no need to re-animate
// these are all that it's safe to cache as the transformed bones are stored in the scratchbuf
// we can only cache per model too as the data set is just too big to cache per entity
if (frame1 != iqm->cachedlastframe || frame2 != iqm->cachedcurrframe || lerp != iqm->cachedlerp)
{
matrix3x4_t *mat1 = &iqm->frames[frame1 * iqm->num_joints];
matrix3x4_t *mat2 = &iqm->frames[frame2 * iqm->num_joints];
matrix3x4_t mat;
if (iqm->version == IQM_VERSION1)
{
for (i = 0; i < iqm->num_joints; i++)
{
Matrix3x4_ScaleScaleAdd (&mat, &mat1[i], (1 - lerp), &mat2[i], lerp);
if (iqm->jointsv1[i].parent >= 0)
Matrix3x4_MultiplyByRef (&iqm->outframe[i], &iqm->outframe[iqm->jointsv1[i].parent], &mat);
else Matrix3x4_Copy (&iqm->outframe[i], &mat);
}
}
else
{
for (i = 0; i < iqm->num_joints; i++)
{
Matrix3x4_ScaleScaleAdd (&mat, &mat1[i], (1 - lerp), &mat2[i], lerp);
if (iqm->jointsv2[i].parent >= 0)
Matrix3x4_MultiplyByRef (&iqm->outframe[i], &iqm->outframe[iqm->jointsv2[i].parent], &mat);
else Matrix3x4_Copy (&iqm->outframe[i], &mat);
}
}
}
// cache back the frames
iqm->cachedlastframe = frame1;
iqm->cachedcurrframe = frame2;
iqm->cachedlerp = lerp;
// The actual vertex generation based on the matrixes follows...
{
// blendweights were converted to float on load; this consumes maybe an extra 20k memory
// for a big model but runs a coupla percent faster - that's the tradeoff
const unsigned char *index = iqm->blendindexes;
const float *weight = iqm->blendweights;
for (i = 0; i < iqm->numvertexes; i++, index += 4, weight += 4)
{
Matrix3x4_Scale (&destmatrix[i], &iqm->outframe[index[0]], weight[0]);
// yet another valiant attempt at clawing back some CPU from IQM animation
if (weight[1])
{
Matrix3x4_ScaleAdd (&destmatrix[i], &iqm->outframe[index[1]], weight[1], &destmatrix[i]);
if (weight[2])
{
Matrix3x4_ScaleAdd (&destmatrix[i], &iqm->outframe[index[2]], weight[2], &destmatrix[i]);
if (weight[3])
{
Matrix3x4_ScaleAdd (&destmatrix[i], &iqm->outframe[index[3]], weight[3], &destmatrix[i]);
}
}
}
}
}
}