Quake 2 Inline Assembly in r_part.c conversion to GAS

Discuss programming topics for the various GPL'd game engine sources.
Post Reply
Maraakate
Posts: 8
Joined: Sun May 31, 2015 9:37 am

Quake 2 Inline Assembly in r_part.c conversion to GAS

Post by Maraakate »

Hello all,

I've been working on the Q2DOS port for quite a few years with Sezero and one of the TODOs that have stared me in the face the entire time is the inline assembly for the particle drawing/blending code that was MSVC specific in r_part.c. Specifically R_DrawParticle() function. I did some reading on ASM and first moved the 3 blending declspec "naked" functions inside R_DrawParticle, made sure that worked. Then I broke this entire code out to an actual ASM file for use with ML.EXE. Still good. From here I started the conversion to GAS. It seems pretty straightforward, basically from what I understand all src, dst is switched on mov, cmp, etc. Typically things are declared by size for these functions like movl, movb, and so on. No big deal there. A gotcha (but the compiler is good enough to warn on this) is floating point math such as faddp, fsubp, etc. have the registers reversed.

Anyways, I converted it over. The engine didn't blow up (woo-hoo!) but no particles. I looked closer and noticed if I went to q2dm1 looked up at the sky and turned on cl_testparticles 1 I can see part of the test particles. If I fire the gun at the sky and look down quickly I can see part of it. So it looks like the z-clipping plane is wrong and the screen centering. I'm guessing an alignment issue? I have ruled out that it could be the 24-bit floating point mode that is set in some inline ASM because commenting that out in MSVC makes no difference, probably a small speed difference but I can port that part later. I have no idea at this point. But relevant code:

MASM version:

Code: Select all

 .386P
 .model FLAT
;
; r_parta.s
; x86 assembly-language particle code
;

include qasm.inc
include d_if.inc

if	id386

_DATA SEGMENT
_DATA ENDS

CONST SEGMENT
eight_thousand_hex dd 047000000r
PARTICLE_33     equ 0
PARTICLE_66     equ 1
PARTICLE_OPAQUE equ 2
CONST ENDS

_BSS SEGMENT
short_izi DW 01H DUP (?)
align 4
zi DD 01h DUP (?)
u DD 01H DUP (?)
v DD 01H DUP (?)
tmp DD 01H DUP (?)
transformed_vec DD 03H DUP (?)
local_vec DD 03H DUP (?)
ebpsave DD 01H DUP (?)
_BSS ENDS

_TEXT SEGMENT
 align 4
 public _R_DrawParticle
_R_DrawParticle:
;
; save trashed variables
;
 mov  dword ptr [ebpsave], ebp
 push esi
 push edi

;
; transform the particle
;
; VectorSubtract (pparticle->origin, r_origin, local);
 mov  esi, dword ptr [_partparms+partparms_particle]
 fld  dword ptr [esi+0]          ; p_o.x
 fsub dword ptr [_r_origin+0]     ; p_o.x-r_o.x
 fld  dword ptr [esi+4]          ; p_o.y | p_o.x-r_o.x
 fsub dword ptr [_r_origin+4]     ; p_o.y-r_o.y | p_o.x-r_o.x
 fld  dword ptr [esi+8]          ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x
 fsub dword ptr [_r_origin+8]     ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x
 fxch st(2)                      ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z
 fstp dword ptr [local_vec+0]        ; p_o.y-r_o.y | p_o.z-r_o.z
 fstp dword ptr [local_vec+4]        ; p_o.z-r_o.z
 fstp dword ptr [local_vec+8]        ; (empty)

; transformed[0] = DotProduct(local, r_pright);
; transformed[1] = DotProduct(local, r_pup);
; transformed[2] = DotProduct(local, r_ppn);
 fld  dword ptr [local_vec+0]        ; l.x
 fmul dword ptr [_r_pright+0]     ; l.x*pr.x
 fld  dword ptr [local_vec+4]        ; l.y | l.x*pr.x
 fmul dword ptr [_r_pright+4]     ; l.y*pr.y | l.x*pr.x
 fld  dword ptr [local_vec+8]        ; l.z | l.y*pr.y | l.x*pr.x
 fmul dword ptr [_r_pright+8]     ; l.z*pr.z | l.y*pr.y | l.x*pr.x
 fxch st(2)                      ; l.x*pr.x | l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y + l.z*pr.z
 fstp  dword ptr [transformed_vec+0] ; (empty)

 fld  dword ptr [local_vec+0]        ; l.x
 fmul dword ptr [_r_pup+0]        ; l.x*pr.x
 fld  dword ptr [local_vec+4]        ; l.y | l.x*pr.x
 fmul dword ptr [_r_pup+4]        ; l.y*pr.y | l.x*pr.x
 fld  dword ptr [local_vec+8]        ; l.z | l.y*pr.y | l.x*pr.x
 fmul dword ptr [_r_pup+8]        ; l.z*pr.z | l.y*pr.y | l.x*pr.x
 fxch st(2)                      ; l.x*pr.x | l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y + l.z*pr.z
 fstp  dword ptr [transformed_vec+4] ; (empty)

 fld  dword ptr [local_vec+0]        ; l.x
 fmul dword ptr [_r_ppn+0]        ; l.x*pr.x
 fld  dword ptr [local_vec+4]        ; l.y | l.x*pr.x
 fmul dword ptr [_r_ppn+4]        ; l.y*pr.y | l.x*pr.x
 fld  dword ptr [local_vec+8]        ; l.z | l.y*pr.y | l.x*pr.x
 fmul dword ptr [_r_ppn+8]        ; l.z*pr.z | l.y*pr.y | l.x*pr.x
 fxch st(2)                      ; l.x*pr.x | l.y*pr.y | l.z*pr.z
 faddp st(1), st(0)                 ; l.x*pr.x + l.y*pr.y | l.z*pr.z
 faddp st(1), st(0)                 ; l.x*pr.x + l.y*pr.y + l.z*pr.z
 fstp  dword ptr [transformed_vec+8] ; (empty)

;
; make sure that the transformed particle is not in front of
; the particle Z clip plane.  We can do the comparison in 
; integer space since we know the sign of one of the inputs
; and can figure out the sign of the other easily enough.
;
;	if (transformed[2] < PARTICLE_Z_CLIP)
;		return;

 mov  eax, dword ptr [transformed_vec+8]
 and  eax, eax
 js   endpartfunc
 cmp  eax, float_particle_z_clip
 jl   endpartfunc

;
; project the point by initiating the 1/z calc
;
;	zi = 1.0 / transformed[2];
 fld   float_1
 fdiv  dword ptr [transformed_vec+8]

; prefetch the next particle
 mov ebp, ds:dword ptr [_s_prefetch_address]
 mov ebp, [ebp]

; finish the above divide
 fstp  dword ptr [zi]

; u = (int)(xcenter + zi * transformed[0] + 0.5);
; v = (int)(ycenter - zi * transformed[1] + 0.5);
 fld   dword ptr [zi]                           ; zi
 fmul  dword ptr [transformed_vec+0]    ; zi * transformed[0]
 fld   dword ptr [zi]                           ; zi | zi * transformed[0]
 fmul  dword ptr [transformed_vec+4]    ; zi * transformed[1] | zi * transformed[0]
 fxch  st(1)                        ; zi * transformed[0] | zi * transformed[1]
 fadd  ds:dword ptr[_xcenter]                      ; xcenter + zi * transformed[0] | zi * transformed[1]
 fxch  st(1)                        ; zi * transformed[1] | xcenter + zi * transformed[0]
 fld   ds:dword ptr[_ycenter]                      ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0]
 fsubrp st(1), st(0)                ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0]
 fxch  st(1)                        ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1]
 fadd  float_point5                   ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1]
 fxch  st(1)                        ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5 
 fadd  float_point5                   ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5 
 fxch  st(1)                        ; u | v
 fistp dword ptr [u]                ; v
 fistp dword ptr [v]                ; (empty)

;
; clip out the particle
;

;	if ((v > d_vrectbottom_particle) || 
;		(u > d_vrectright_particle) ||
;		(v < d_vrecty) ||
;		(u < d_vrectx))
;	{
;		return;
;	}

 mov ebx, u
 mov ecx, v
 cmp ecx, ds:dword ptr [_d_vrectbottom_particle]
 jg  endpartfunc
 cmp ecx, ds:dword ptr [_d_vrecty]
 jl  endpartfunc
 cmp ebx, ds:dword ptr [_d_vrectright_particle]
 jg  endpartfunc
 cmp ebx, ds:dword ptr [_d_vrectx]
 jl  endpartfunc

;
; compute addresses of zbuffer, framebuffer, and 
; compute the Z-buffer reference value.
;
; EBX      = U
; ECX      = V
;
; Outputs:
; ESI = Z-buffer address
; EDI = framebuffer address
;
; ESI = d_pzbuffer + (d_zwidth * v) + u;
 mov esi, ds:dword ptr[_d_pzbuffer]             ; esi = d_pzbuffer
 mov eax, ds:dword ptr[_d_zwidth]               ; eax = d_zwidth
 mul ecx                         ; eax = d_zwidth*v
 add eax, ebx                    ; eax = d_zwidth*v+u
 shl eax, 1                      ; eax = 2*(d_zwidth*v+u)
 add esi, eax                    ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u )

; initiate
; izi = (int)(zi * 0x8000);
 fld  dword ptr [zi]
 fmul eight_thousand_hex

; EDI = pdest = d_viewbuffer + d_scantable[v] + u;
 lea edi, ds:dword ptr _d_scantable[ecx*4]
 mov edi, dword ptr [edi]
 add edi, ds:dword ptr[_d_viewbuffer]
 add edi, ebx

; complete
; izi = (int)(zi * 0x8000);
 fistp dword ptr [tmp]
 mov eax, dword ptr [tmp]
 mov word ptr [short_izi], ax

;
; determine the screen area covered by the particle,
; which also means clamping to a min and max
;
;	pix = izi >> d_pix_shift;
 xor edx, edx
 mov dx, word ptr [short_izi]
 mov ecx, ds:dword ptr[_d_pix_shift]
 shr dx, cl

;	if (pix < d_pix_min)
;		pix = d_pix_min;
 cmp edx, ds:dword ptr[_d_pix_min]
 jge check_pix_max
 mov edx, ds:dword ptr[_d_pix_min]
 jmp skip_pix_clamp

;	else if (pix > d_pix_max)
;		pix = d_pix_max;
check_pix_max:
 cmp edx, ds:dword ptr[_d_pix_max]
 jle skip_pix_clamp
 mov edx, ds:dword ptr[_d_pix_max]

skip_pix_clamp:

;
; render the appropriate pixels
;
; ECX = count (used for inner loop)
; EDX = count (used for outer loop)
; ESI = zbuffer
; EDI = framebuffer
;
 mov ecx, edx

 cmp ecx, 1
 ja  over

over:

;
; at this point:
;
; ECX = count
;
 push ecx
 push edi
 push esi

top_of_pix_vert_loop:

top_of_pix_horiz_loop:

;	for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
;	{
;		for (i=0 ; i<pix ; i++)
;		{
;			if (pz[i] <= izi)
;			{
;				pdest[i] = blendparticle( color, pdest[i] );
;			}
;		}
;	}
 xor   eax, eax

 mov   ax, word ptr [esi]

 cmp   ax, word ptr [short_izi]
 jg    end_of_horiz_loop

 mov   eax, ds:dword ptr [_partparms+partparms_color]

 cmp ds:dword ptr [_partparms+partparms_level], PARTICLE_66
 je  blendfunc_66
 jl  blendfunc_33
; BlendParticle100
 mov byte ptr [edi], al
 jmp done_blending
blendfunc_33:
 mov ebp, ds:dword ptr [_vid+vid_alphamap]
 xor ebx, ebx

 mov bl,  byte ptr [edi]
 shl ebx, 8

 add ebp, ebx
 add ebp, eax

 mov al,  byte ptr [ebp]

 mov byte ptr [edi], al
 jmp done_blending
blendfunc_66:
 mov ebp, ds:dword ptr [_vid+vid_alphamap]
 xor ebx, ebx

 shl eax,  8
 mov bl,   byte ptr [edi]

 add ebp, ebx
 add ebp, eax

 mov al,  byte ptr [ebp]

 mov byte ptr [edi], al
done_blending:

 add   edi, 1
 add   esi, 2

end_of_horiz_loop:

 dec   ecx
 jnz   top_of_pix_horiz_loop

 pop   esi
 pop   edi

 mov   ebp, ds:dword ptr[_d_zwidth]
 shl   ebp, 1

 add   esi, ebp
 add   edi, ds:dword ptr [_r_screenwidth]

 pop   ecx
 push  ecx

 push  edi
 push  esi

 dec   edx
 jnz   top_of_pix_vert_loop

 pop   ecx
 pop   ecx
 pop   ecx

endpartfunc:
 pop edi
 pop esi
 mov ebp, dword ptr[ebpsave]
 ret

_TEXT ENDS
endif	;id386
 END
GAS Version:

Code: Select all

//
// r_parta.s
// x86 assembly-language particle code.
//

#include "qasm.h"

#if	id386

	.data
eight_thousand_hex: .long	32768

	.bss
.lcomm	short_izi, 1
	.align 4
.lcomm	zi, 1, 4
.lcomm	u, 1, 4
.lcomm	v, 1, 4
.lcomm	tmp, 1, 4
.lcomm	transformed_vec, 12, 4
.lcomm	local_vec, 12, 4
.lcomm	ebpsave, 1, 4

	.text
#define PARTICLE_33	0
#define PARTICLE_66	1
#define PARTICLE_OPAQUE	2

	.align 4
.globl C(R_DrawParticle)
C(R_DrawParticle):
//
// save trashed variables
//
	movl %ebp, ebpsave
	pushl %esi
	pushl %edi

//
// transform the particle
//
// VectorSubtract (pparticle->origin, r_origin, local);
	movl  C(partparms)+partparms_particle, %esi
	flds  0(%esi)          // p_o.x
	fsubs C(r_origin)     // p_o.x-r_o.x
	flds  4(%esi)          // p_o.y | p_o.x-r_o.x
	fsubs C(r_origin)+4     // p_o.y-r_o.y | p_o.x-r_o.x
	flds  8(%esi)          // p_o.z | p_o.y-r_o.y | p_o.x-r_o.x
	fsubs C(r_origin)+8     // p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x
	fxch  %st(2)                      // p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z
	fstps local_vec+0        // p_o.y-r_o.y | p_o.z-r_o.z
	fstps local_vec+4        // p_o.z-r_o.z
	fstps local_vec+8        // (empty)

// transformed[0] = DotProduct(local, r_pright);
// transformed[1] = DotProduct(local, r_pup);
// transformed[2] = DotProduct(local, r_ppn);
	flds local_vec+0        // l.x
	fmuls C(r_pright)+0     // l.x*pr.x
	flds local_vec+4        // l.y | l.x*pr.x
	fmuls C(r_pright)+4     // l.y*pr.y | l.x*pr.x
	flds local_vec+8        // l.z | l.y*pr.y | l.x*pr.x
	fmuls C(r_pright)+8     // l.z*pr.z | l.y*pr.y | l.x*pr.x
	fxch %st(2)                      // l.x*pr.x | l.y*pr.y | l.z*pr.z
	faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y | l.z*pr.z
	faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y + l.z*pr.z
	fstps transformed_vec+0 // (empty)

	flds local_vec+0        // l.x
	fmuls C(r_pup)+0        // l.x*pr.x
	flds local_vec+4        // l.y | l.x*pr.x
	fmuls C(r_pup)+4        // l.y*pr.y | l.x*pr.x
	flds local_vec+8        // l.z | l.y*pr.y | l.x*pr.x
	fmuls C(r_pup)+8        // l.z*pr.z | l.y*pr.y | l.x*pr.x
	fxch %st(2)                      // l.x*pr.x | l.y*pr.y | l.z*pr.z
	faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y | l.z*pr.z
	faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y + l.z*pr.z
	fstps transformed_vec+4 // (empty)

	flds local_vec+0        // l.x
	fmuls C(r_ppn)+0        // l.x*pr.x
	flds local_vec+4        // l.y | l.x*pr.x
	fmuls C(r_ppn)+4        // l.y*pr.y | l.x*pr.x
	flds local_vec+8        // l.z | l.y*pr.y | l.x*pr.x
	fmuls C(r_ppn)+8        // l.z*pr.z | l.y*pr.y | l.x*pr.x
	fxch %st(2)                      // l.x*pr.x | l.y*pr.y | l.z*pr.z
	faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y | l.z*pr.z
	faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y + l.z*pr.z
	fstps transformed_vec+8 // (empty)

//
// make sure that the transformed particle is not in front of
// the particle Z clip plane.  We can do the comparison in 
// integer space since we know the sign of one of the inputs
// and can figure out the sign of the other easily enough.
//
//	if (transformed[2] < PARTICLE_Z_CLIP)
//		return;

	movl transformed_vec+8, %eax
	andl  %eax, %eax
	js   endpartfunc
	cmpl  float_particle_z_clip, %eax
	jl   endpartfunc

//
// project the point by initiating the 1/z calc
//
//	zi = 1.0 / transformed[2];
	flds   float_1
	fdiv  transformed_vec+8

// prefetch the next particle
	movl C(s_prefetch_address), %ebp
	movl (%ebp), %ebp

// finish the above divide
	fstps  zi

// u = (int)(xcenter + zi * transformed[0] + 0.5)
// v = (int)(ycenter - zi * transformed[1] + 0.5)
	flds zi                           // zi
	fmuls transformed_vec+0    // zi * transformed[0]
	flds zi                           // zi | zi * transformed[0]
	fmuls transformed_vec+4    // zi * transformed[1] | zi * transformed[0]
	fxch %st(1)                        // zi * transformed[0] | zi * transformed[1]
	fadds C(xcenter)                      // xcenter + zi * transformed[0] | zi * transformed[1]
	fxch %st(1)                        // zi * transformed[1] | xcenter + zi * transformed[0]
	flds C(ycenter)                      // ycenter | zi * transformed[1] | xcenter + zi * transformed[0]
	fsubrp %st(0), %st(1)                // ycenter - zi * transformed[1] | xcenter + zi * transformed[0]
	fxch  %st(1)                        // xcenter + zi * transformed[0] | ycenter + zi * transformed[1]
	fadds  float_point5                   // xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1]
	fxch  %st(1)                        // ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5 
	fadds  float_point5                   // ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5 
	fxch  %st(1)                        // u | v
	fistps u                // v
	fistps v                // (empty)

//
// clip out the particle
//

//	if ((v > d_vrectbottom_particle) || 
//		(u > d_vrectright_particle) ||
//		(v < d_vrecty) ||
//		(u < d_vrectx))
//	{
//		return;
//	}

	movl u, %ebx
	movl v, %ecx
	cmpl C(d_vrectbottom_particle), %ecx
	jg  endpartfunc
	cmpl C(d_vrecty), %ecx
	jl  endpartfunc
	cmpl C(d_vrectright_particle), %ebx
	jg  endpartfunc
	cmpl C(d_vrectx), %ebx
	jl  endpartfunc

//
// compute addresses of zbuffer, framebuffer, and 
// compute the Z-buffer reference value.
//
// EBX      = U
// ECX      = V
//
// Outputs:
// ESI = Z-buffer address
// EDI = framebuffer address
//
// ESI = d_pzbuffer + (d_zwidth * v) + u;
	movl C(d_pzbuffer), %esi           // esi = d_pzbuffer
	movl C(d_zwidth), %eax             // eax = d_zwidth
	mull %ecx                          // eax = d_zwidth*v
	addl %ebx, %eax                    // eax = d_zwidth*v+u
	shll $1, %eax                      // eax = 2*(d_zwidth*v+u)
	addl %eax, %esi                    // esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u )

// initiate
// izi = (int)(zi * 0x8000);
	flds zi
	fimuls eight_thousand_hex

// EDI = pdest = d_viewbuffer + d_scantable[v] + u;
	leal C(d_scantable)(,%ecx,4),%edi
	movl (%edi), %edi
	addl C(d_viewbuffer), %edi
	addl %ebx, %edi

// complete
// izi = (int)(zi * 0x8000);
	fistps tmp
	movl tmp, %eax
	movw %ax, (short_izi)

//
// determine the screen area covered by the particle,
// which also means clamping to a min and max
//
//	pix = izi >> d_pix_shift;
	xorl %edx, %edx
	movw (short_izi), %dx
	movl C(d_pix_shift), %ecx
	shrw %cl, %dx

//	if (pix < d_pix_min)
//		pix = d_pix_min;
	cmpl C(d_pix_min), %edx
	jge check_pix_max
	movl C(d_pix_min), %edx
	jmp skip_pix_clamp

//	else if (pix > d_pix_max)
//		pix = d_pix_max;
check_pix_max:
	cmpl C(d_pix_max), %edx
	jle skip_pix_clamp
	movl C(d_pix_max), %edx

skip_pix_clamp:

//
// render the appropriate pixels
//
// ECX = count (used for inner loop)
// EDX = count (used for outer loop)
// ESI = zbuffer
// EDI = framebuffer
//
	movl %edx, %ecx

	cmpl $1, %ecx
	ja  over

over:

//
// at this point:
//
// ECX = count
//
	pushl %ecx
	pushl %edi
	pushl %esi

top_of_pix_vert_loop:

top_of_pix_horiz_loop:

//	for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
//	{
//		for (i=0 ; i<pix ; i++)
//		{
//			if (pz[i] <= izi)
//			{
//				pdest[i] = blendparticle( color, pdest[i] );
//			}
//		}
//	}
	xorl   %eax, %eax

	movw  (%esi), %ax

	cmpw  (short_izi), %ax
	jg    end_of_horiz_loop

	movl  C(partparms)+partparms_color, %eax

	cmpl $PARTICLE_66, C(partparms)+partparms_level
	je blendfunc_66
	jl blendfunc_33
// BlendParticle100
	movb	%al, (%edi)
	jmp	done_blending
blendfunc_33:
	movl C(vid)+vid_alphamap, %ebp
	xorl %ebx, %ebx

	movb (%edi), %bl
	shll $8, %ebx

	addl %ebx, %ebp
	addl %eax, %ebp

	movb (%ebp), %al

	movb %al, (%edi)
	jmp done_blending
blendfunc_66:
	movl C(vid)+vid_alphamap, %ebp
	xorl %ebx, %ebx

	shll $8, %eax
	movb (%edi), %bl

	addl %ebx, %ebp
	addl %eax, %ebp

	movb (%ebp), %al

	movb %al, (%edi)

done_blending:

	addl	$1, %edi
	addl	$2, %esi

end_of_horiz_loop:
	decl   %ecx
	jnz   top_of_pix_horiz_loop

	popl   %esi
	popl   %edi

	movl   C(d_zwidth), %ebp
	shll   $1, %ebp

	addl   %ebp, %esi
	addl   C(r_screenwidth), %edi

	popl   %ecx
	pushl  %ecx

	pushl  %edi
	pushl  %esi

	decl   %edx
	jnz   top_of_pix_vert_loop

	popl   %ecx
	popl   %ecx
	popl   %ecx

endpartfunc:
	popl %edi
	popl %esi
	movl ebpsave, %ebp
	ret

#endif	// id386
The code looks correct to me, the float_1, float_point5 and friends are defined in qasm.h/qasm.inc. So I don't believe that is the problem. Code is at https://bitbucket.org/neozeed/q2dos/branch/win32_asm.

Huge thanks to someone who can point me in the right direction and tell me what I did wrong so I can learn from it. :)
Maraakate
Posts: 8
Joined: Sun May 31, 2015 9:37 am

Re: Quake 2 Inline Assembly in r_part.c conversion to GAS

Post by Maraakate »

Figured it out. Took objdump from the GCC .o and MSVC .obj and compared both. fsubrp becomes fsubp in MSVC. Changing it to this fixed the alignment issue. :cool:
Post Reply