Forum

Quake 2 Inline Assembly in r_part.c conversion to GAS

Discuss programming topics for the various GPL'd game engine sources.

Moderator: InsideQC Admins

Quake 2 Inline Assembly in r_part.c conversion to GAS

Postby Maraakate » Sat May 18, 2019 3:13 pm

Hello all,

I've been working on the Q2DOS port for quite a few years with Sezero and one of the TODOs that have stared me in the face the entire time is the inline assembly for the particle drawing/blending code that was MSVC specific in r_part.c. Specifically R_DrawParticle() function. I did some reading on ASM and first moved the 3 blending declspec "naked" functions inside R_DrawParticle, made sure that worked. Then I broke this entire code out to an actual ASM file for use with ML.EXE. Still good. From here I started the conversion to GAS. It seems pretty straightforward, basically from what I understand all src, dst is switched on mov, cmp, etc. Typically things are declared by size for these functions like movl, movb, and so on. No big deal there. A gotcha (but the compiler is good enough to warn on this) is floating point math such as faddp, fsubp, etc. have the registers reversed.

Anyways, I converted it over. The engine didn't blow up (woo-hoo!) but no particles. I looked closer and noticed if I went to q2dm1 looked up at the sky and turned on cl_testparticles 1 I can see part of the test particles. If I fire the gun at the sky and look down quickly I can see part of it. So it looks like the z-clipping plane is wrong and the screen centering. I'm guessing an alignment issue? I have ruled out that it could be the 24-bit floating point mode that is set in some inline ASM because commenting that out in MSVC makes no difference, probably a small speed difference but I can port that part later. I have no idea at this point. But relevant code:

MASM version:
Code: Select all
 .386P
 .model FLAT
;
; r_parta.s
; x86 assembly-language particle code
;

include qasm.inc
include d_if.inc

if   id386

_DATA SEGMENT
_DATA ENDS

CONST SEGMENT
eight_thousand_hex dd 047000000r
PARTICLE_33     equ 0
PARTICLE_66     equ 1
PARTICLE_OPAQUE equ 2
CONST ENDS

_BSS SEGMENT
short_izi DW 01H DUP (?)
align 4
zi DD 01h DUP (?)
u DD 01H DUP (?)
v DD 01H DUP (?)
tmp DD 01H DUP (?)
transformed_vec DD 03H DUP (?)
local_vec DD 03H DUP (?)
ebpsave DD 01H DUP (?)
_BSS ENDS

_TEXT SEGMENT
 align 4
 public _R_DrawParticle
_R_DrawParticle:
;
; save trashed variables
;
 mov  dword ptr [ebpsave], ebp
 push esi
 push edi

;
; transform the particle
;
; VectorSubtract (pparticle->origin, r_origin, local);
 mov  esi, dword ptr [_partparms+partparms_particle]
 fld  dword ptr [esi+0]          ; p_o.x
 fsub dword ptr [_r_origin+0]     ; p_o.x-r_o.x
 fld  dword ptr [esi+4]          ; p_o.y | p_o.x-r_o.x
 fsub dword ptr [_r_origin+4]     ; p_o.y-r_o.y | p_o.x-r_o.x
 fld  dword ptr [esi+8]          ; p_o.z | p_o.y-r_o.y | p_o.x-r_o.x
 fsub dword ptr [_r_origin+8]     ; p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x
 fxch st(2)                      ; p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z
 fstp dword ptr [local_vec+0]        ; p_o.y-r_o.y | p_o.z-r_o.z
 fstp dword ptr [local_vec+4]        ; p_o.z-r_o.z
 fstp dword ptr [local_vec+8]        ; (empty)

; transformed[0] = DotProduct(local, r_pright);
; transformed[1] = DotProduct(local, r_pup);
; transformed[2] = DotProduct(local, r_ppn);
 fld  dword ptr [local_vec+0]        ; l.x
 fmul dword ptr [_r_pright+0]     ; l.x*pr.x
 fld  dword ptr [local_vec+4]        ; l.y | l.x*pr.x
 fmul dword ptr [_r_pright+4]     ; l.y*pr.y | l.x*pr.x
 fld  dword ptr [local_vec+8]        ; l.z | l.y*pr.y | l.x*pr.x
 fmul dword ptr [_r_pright+8]     ; l.z*pr.z | l.y*pr.y | l.x*pr.x
 fxch st(2)                      ; l.x*pr.x | l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y + l.z*pr.z
 fstp  dword ptr [transformed_vec+0] ; (empty)

 fld  dword ptr [local_vec+0]        ; l.x
 fmul dword ptr [_r_pup+0]        ; l.x*pr.x
 fld  dword ptr [local_vec+4]        ; l.y | l.x*pr.x
 fmul dword ptr [_r_pup+4]        ; l.y*pr.y | l.x*pr.x
 fld  dword ptr [local_vec+8]        ; l.z | l.y*pr.y | l.x*pr.x
 fmul dword ptr [_r_pup+8]        ; l.z*pr.z | l.y*pr.y | l.x*pr.x
 fxch st(2)                      ; l.x*pr.x | l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y | l.z*pr.z
 faddp st(1), st                 ; l.x*pr.x + l.y*pr.y + l.z*pr.z
 fstp  dword ptr [transformed_vec+4] ; (empty)

 fld  dword ptr [local_vec+0]        ; l.x
 fmul dword ptr [_r_ppn+0]        ; l.x*pr.x
 fld  dword ptr [local_vec+4]        ; l.y | l.x*pr.x
 fmul dword ptr [_r_ppn+4]        ; l.y*pr.y | l.x*pr.x
 fld  dword ptr [local_vec+8]        ; l.z | l.y*pr.y | l.x*pr.x
 fmul dword ptr [_r_ppn+8]        ; l.z*pr.z | l.y*pr.y | l.x*pr.x
 fxch st(2)                      ; l.x*pr.x | l.y*pr.y | l.z*pr.z
 faddp st(1), st(0)                 ; l.x*pr.x + l.y*pr.y | l.z*pr.z
 faddp st(1), st(0)                 ; l.x*pr.x + l.y*pr.y + l.z*pr.z
 fstp  dword ptr [transformed_vec+8] ; (empty)

;
; make sure that the transformed particle is not in front of
; the particle Z clip plane.  We can do the comparison in
; integer space since we know the sign of one of the inputs
; and can figure out the sign of the other easily enough.
;
;   if (transformed[2] < PARTICLE_Z_CLIP)
;      return;

 mov  eax, dword ptr [transformed_vec+8]
 and  eax, eax
 js   endpartfunc
 cmp  eax, float_particle_z_clip
 jl   endpartfunc

;
; project the point by initiating the 1/z calc
;
;   zi = 1.0 / transformed[2];
 fld   float_1
 fdiv  dword ptr [transformed_vec+8]

; prefetch the next particle
 mov ebp, ds:dword ptr [_s_prefetch_address]
 mov ebp, [ebp]

; finish the above divide
 fstp  dword ptr [zi]

; u = (int)(xcenter + zi * transformed[0] + 0.5);
; v = (int)(ycenter - zi * transformed[1] + 0.5);
 fld   dword ptr [zi]                           ; zi
 fmul  dword ptr [transformed_vec+0]    ; zi * transformed[0]
 fld   dword ptr [zi]                           ; zi | zi * transformed[0]
 fmul  dword ptr [transformed_vec+4]    ; zi * transformed[1] | zi * transformed[0]
 fxch  st(1)                        ; zi * transformed[0] | zi * transformed[1]
 fadd  ds:dword ptr[_xcenter]                      ; xcenter + zi * transformed[0] | zi * transformed[1]
 fxch  st(1)                        ; zi * transformed[1] | xcenter + zi * transformed[0]
 fld   ds:dword ptr[_ycenter]                      ; ycenter | zi * transformed[1] | xcenter + zi * transformed[0]
 fsubrp st(1), st(0)                ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0]
 fxch  st(1)                        ; xcenter + zi * transformed[0] | ycenter + zi * transformed[1]
 fadd  float_point5                   ; xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1]
 fxch  st(1)                        ; ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5
 fadd  float_point5                   ; ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5
 fxch  st(1)                        ; u | v
 fistp dword ptr [u]                ; v
 fistp dword ptr [v]                ; (empty)

;
; clip out the particle
;

;   if ((v > d_vrectbottom_particle) ||
;      (u > d_vrectright_particle) ||
;      (v < d_vrecty) ||
;      (u < d_vrectx))
;   {
;      return;
;   }

 mov ebx, u
 mov ecx, v
 cmp ecx, ds:dword ptr [_d_vrectbottom_particle]
 jg  endpartfunc
 cmp ecx, ds:dword ptr [_d_vrecty]
 jl  endpartfunc
 cmp ebx, ds:dword ptr [_d_vrectright_particle]
 jg  endpartfunc
 cmp ebx, ds:dword ptr [_d_vrectx]
 jl  endpartfunc

;
; compute addresses of zbuffer, framebuffer, and
; compute the Z-buffer reference value.
;
; EBX      = U
; ECX      = V
;
; Outputs:
; ESI = Z-buffer address
; EDI = framebuffer address
;
; ESI = d_pzbuffer + (d_zwidth * v) + u;
 mov esi, ds:dword ptr[_d_pzbuffer]             ; esi = d_pzbuffer
 mov eax, ds:dword ptr[_d_zwidth]               ; eax = d_zwidth
 mul ecx                         ; eax = d_zwidth*v
 add eax, ebx                    ; eax = d_zwidth*v+u
 shl eax, 1                      ; eax = 2*(d_zwidth*v+u)
 add esi, eax                    ; esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u )

; initiate
; izi = (int)(zi * 0x8000);
 fld  dword ptr [zi]
 fmul eight_thousand_hex

; EDI = pdest = d_viewbuffer + d_scantable[v] + u;
 lea edi, ds:dword ptr _d_scantable[ecx*4]
 mov edi, dword ptr [edi]
 add edi, ds:dword ptr[_d_viewbuffer]
 add edi, ebx

; complete
; izi = (int)(zi * 0x8000);
 fistp dword ptr [tmp]
 mov eax, dword ptr [tmp]
 mov word ptr [short_izi], ax

;
; determine the screen area covered by the particle,
; which also means clamping to a min and max
;
;   pix = izi >> d_pix_shift;
 xor edx, edx
 mov dx, word ptr [short_izi]
 mov ecx, ds:dword ptr[_d_pix_shift]
 shr dx, cl

;   if (pix < d_pix_min)
;      pix = d_pix_min;
 cmp edx, ds:dword ptr[_d_pix_min]
 jge check_pix_max
 mov edx, ds:dword ptr[_d_pix_min]
 jmp skip_pix_clamp

;   else if (pix > d_pix_max)
;      pix = d_pix_max;
check_pix_max:
 cmp edx, ds:dword ptr[_d_pix_max]
 jle skip_pix_clamp
 mov edx, ds:dword ptr[_d_pix_max]

skip_pix_clamp:

;
; render the appropriate pixels
;
; ECX = count (used for inner loop)
; EDX = count (used for outer loop)
; ESI = zbuffer
; EDI = framebuffer
;
 mov ecx, edx

 cmp ecx, 1
 ja  over

over:

;
; at this point:
;
; ECX = count
;
 push ecx
 push edi
 push esi

top_of_pix_vert_loop:

top_of_pix_horiz_loop:

;   for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
;   {
;      for (i=0 ; i<pix ; i++)
;      {
;         if (pz[i] <= izi)
;         {
;            pdest[i] = blendparticle( color, pdest[i] );
;         }
;      }
;   }
 xor   eax, eax

 mov   ax, word ptr [esi]

 cmp   ax, word ptr [short_izi]
 jg    end_of_horiz_loop

 mov   eax, ds:dword ptr [_partparms+partparms_color]

 cmp ds:dword ptr [_partparms+partparms_level], PARTICLE_66
 je  blendfunc_66
 jl  blendfunc_33
; BlendParticle100
 mov byte ptr [edi], al
 jmp done_blending
blendfunc_33:
 mov ebp, ds:dword ptr [_vid+vid_alphamap]
 xor ebx, ebx

 mov bl,  byte ptr [edi]
 shl ebx, 8

 add ebp, ebx
 add ebp, eax

 mov al,  byte ptr [ebp]

 mov byte ptr [edi], al
 jmp done_blending
blendfunc_66:
 mov ebp, ds:dword ptr [_vid+vid_alphamap]
 xor ebx, ebx

 shl eax,  8
 mov bl,   byte ptr [edi]

 add ebp, ebx
 add ebp, eax

 mov al,  byte ptr [ebp]

 mov byte ptr [edi], al
done_blending:

 add   edi, 1
 add   esi, 2

end_of_horiz_loop:

 dec   ecx
 jnz   top_of_pix_horiz_loop

 pop   esi
 pop   edi

 mov   ebp, ds:dword ptr[_d_zwidth]
 shl   ebp, 1

 add   esi, ebp
 add   edi, ds:dword ptr [_r_screenwidth]

 pop   ecx
 push  ecx

 push  edi
 push  esi

 dec   edx
 jnz   top_of_pix_vert_loop

 pop   ecx
 pop   ecx
 pop   ecx

endpartfunc:
 pop edi
 pop esi
 mov ebp, dword ptr[ebpsave]
 ret

_TEXT ENDS
endif   ;id386
 END


GAS Version:
Code: Select all
//
// r_parta.s
// x86 assembly-language particle code.
//

#include "qasm.h"

#if   id386

   .data
eight_thousand_hex: .long   32768

   .bss
.lcomm   short_izi, 1
   .align 4
.lcomm   zi, 1, 4
.lcomm   u, 1, 4
.lcomm   v, 1, 4
.lcomm   tmp, 1, 4
.lcomm   transformed_vec, 12, 4
.lcomm   local_vec, 12, 4
.lcomm   ebpsave, 1, 4

   .text
#define PARTICLE_33   0
#define PARTICLE_66   1
#define PARTICLE_OPAQUE   2

   .align 4
.globl C(R_DrawParticle)
C(R_DrawParticle):
//
// save trashed variables
//
   movl %ebp, ebpsave
   pushl %esi
   pushl %edi

//
// transform the particle
//
// VectorSubtract (pparticle->origin, r_origin, local);
   movl  C(partparms)+partparms_particle, %esi
   flds  0(%esi)          // p_o.x
   fsubs C(r_origin)     // p_o.x-r_o.x
   flds  4(%esi)          // p_o.y | p_o.x-r_o.x
   fsubs C(r_origin)+4     // p_o.y-r_o.y | p_o.x-r_o.x
   flds  8(%esi)          // p_o.z | p_o.y-r_o.y | p_o.x-r_o.x
   fsubs C(r_origin)+8     // p_o.z-r_o.z | p_o.y-r_o.y | p_o.x-r_o.x
   fxch  %st(2)                      // p_o.x-r_o.x | p_o.y-r_o.y | p_o.z-r_o.z
   fstps local_vec+0        // p_o.y-r_o.y | p_o.z-r_o.z
   fstps local_vec+4        // p_o.z-r_o.z
   fstps local_vec+8        // (empty)

// transformed[0] = DotProduct(local, r_pright);
// transformed[1] = DotProduct(local, r_pup);
// transformed[2] = DotProduct(local, r_ppn);
   flds local_vec+0        // l.x
   fmuls C(r_pright)+0     // l.x*pr.x
   flds local_vec+4        // l.y | l.x*pr.x
   fmuls C(r_pright)+4     // l.y*pr.y | l.x*pr.x
   flds local_vec+8        // l.z | l.y*pr.y | l.x*pr.x
   fmuls C(r_pright)+8     // l.z*pr.z | l.y*pr.y | l.x*pr.x
   fxch %st(2)                      // l.x*pr.x | l.y*pr.y | l.z*pr.z
   faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y | l.z*pr.z
   faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y + l.z*pr.z
   fstps transformed_vec+0 // (empty)

   flds local_vec+0        // l.x
   fmuls C(r_pup)+0        // l.x*pr.x
   flds local_vec+4        // l.y | l.x*pr.x
   fmuls C(r_pup)+4        // l.y*pr.y | l.x*pr.x
   flds local_vec+8        // l.z | l.y*pr.y | l.x*pr.x
   fmuls C(r_pup)+8        // l.z*pr.z | l.y*pr.y | l.x*pr.x
   fxch %st(2)                      // l.x*pr.x | l.y*pr.y | l.z*pr.z
   faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y | l.z*pr.z
   faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y + l.z*pr.z
   fstps transformed_vec+4 // (empty)

   flds local_vec+0        // l.x
   fmuls C(r_ppn)+0        // l.x*pr.x
   flds local_vec+4        // l.y | l.x*pr.x
   fmuls C(r_ppn)+4        // l.y*pr.y | l.x*pr.x
   flds local_vec+8        // l.z | l.y*pr.y | l.x*pr.x
   fmuls C(r_ppn)+8        // l.z*pr.z | l.y*pr.y | l.x*pr.x
   fxch %st(2)                      // l.x*pr.x | l.y*pr.y | l.z*pr.z
   faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y | l.z*pr.z
   faddp %st(0), %st(1)                 // l.x*pr.x + l.y*pr.y + l.z*pr.z
   fstps transformed_vec+8 // (empty)

//
// make sure that the transformed particle is not in front of
// the particle Z clip plane.  We can do the comparison in
// integer space since we know the sign of one of the inputs
// and can figure out the sign of the other easily enough.
//
//   if (transformed[2] < PARTICLE_Z_CLIP)
//      return;

   movl transformed_vec+8, %eax
   andl  %eax, %eax
   js   endpartfunc
   cmpl  float_particle_z_clip, %eax
   jl   endpartfunc

//
// project the point by initiating the 1/z calc
//
//   zi = 1.0 / transformed[2];
   flds   float_1
   fdiv  transformed_vec+8

// prefetch the next particle
   movl C(s_prefetch_address), %ebp
   movl (%ebp), %ebp

// finish the above divide
   fstps  zi

// u = (int)(xcenter + zi * transformed[0] + 0.5)
// v = (int)(ycenter - zi * transformed[1] + 0.5)
   flds zi                           // zi
   fmuls transformed_vec+0    // zi * transformed[0]
   flds zi                           // zi | zi * transformed[0]
   fmuls transformed_vec+4    // zi * transformed[1] | zi * transformed[0]
   fxch %st(1)                        // zi * transformed[0] | zi * transformed[1]
   fadds C(xcenter)                      // xcenter + zi * transformed[0] | zi * transformed[1]
   fxch %st(1)                        // zi * transformed[1] | xcenter + zi * transformed[0]
   flds C(ycenter)                      // ycenter | zi * transformed[1] | xcenter + zi * transformed[0]
   fsubrp %st(0), %st(1)                // ycenter - zi * transformed[1] | xcenter + zi * transformed[0]
   fxch  %st(1)                        // xcenter + zi * transformed[0] | ycenter + zi * transformed[1]
   fadds  float_point5                   // xcenter + zi * transformed[0] + 0.5 | ycenter - zi * transformed[1]
   fxch  %st(1)                        // ycenter - zi * transformed[1] | xcenter + zi * transformed[0] + 0.5
   fadds  float_point5                   // ycenter - zi * transformed[1] + 0.5 | xcenter + zi * transformed[0] + 0.5
   fxch  %st(1)                        // u | v
   fistps u                // v
   fistps v                // (empty)

//
// clip out the particle
//

//   if ((v > d_vrectbottom_particle) ||
//      (u > d_vrectright_particle) ||
//      (v < d_vrecty) ||
//      (u < d_vrectx))
//   {
//      return;
//   }

   movl u, %ebx
   movl v, %ecx
   cmpl C(d_vrectbottom_particle), %ecx
   jg  endpartfunc
   cmpl C(d_vrecty), %ecx
   jl  endpartfunc
   cmpl C(d_vrectright_particle), %ebx
   jg  endpartfunc
   cmpl C(d_vrectx), %ebx
   jl  endpartfunc

//
// compute addresses of zbuffer, framebuffer, and
// compute the Z-buffer reference value.
//
// EBX      = U
// ECX      = V
//
// Outputs:
// ESI = Z-buffer address
// EDI = framebuffer address
//
// ESI = d_pzbuffer + (d_zwidth * v) + u;
   movl C(d_pzbuffer), %esi           // esi = d_pzbuffer
   movl C(d_zwidth), %eax             // eax = d_zwidth
   mull %ecx                          // eax = d_zwidth*v
   addl %ebx, %eax                    // eax = d_zwidth*v+u
   shll $1, %eax                      // eax = 2*(d_zwidth*v+u)
   addl %eax, %esi                    // esi = ( short * ) ( d_pzbuffer + ( d_zwidth * v ) + u )

// initiate
// izi = (int)(zi * 0x8000);
   flds zi
   fimuls eight_thousand_hex

// EDI = pdest = d_viewbuffer + d_scantable[v] + u;
   leal C(d_scantable)(,%ecx,4),%edi
   movl (%edi), %edi
   addl C(d_viewbuffer), %edi
   addl %ebx, %edi

// complete
// izi = (int)(zi * 0x8000);
   fistps tmp
   movl tmp, %eax
   movw %ax, (short_izi)

//
// determine the screen area covered by the particle,
// which also means clamping to a min and max
//
//   pix = izi >> d_pix_shift;
   xorl %edx, %edx
   movw (short_izi), %dx
   movl C(d_pix_shift), %ecx
   shrw %cl, %dx

//   if (pix < d_pix_min)
//      pix = d_pix_min;
   cmpl C(d_pix_min), %edx
   jge check_pix_max
   movl C(d_pix_min), %edx
   jmp skip_pix_clamp

//   else if (pix > d_pix_max)
//      pix = d_pix_max;
check_pix_max:
   cmpl C(d_pix_max), %edx
   jle skip_pix_clamp
   movl C(d_pix_max), %edx

skip_pix_clamp:

//
// render the appropriate pixels
//
// ECX = count (used for inner loop)
// EDX = count (used for outer loop)
// ESI = zbuffer
// EDI = framebuffer
//
   movl %edx, %ecx

   cmpl $1, %ecx
   ja  over

over:

//
// at this point:
//
// ECX = count
//
   pushl %ecx
   pushl %edi
   pushl %esi

top_of_pix_vert_loop:

top_of_pix_horiz_loop:

//   for ( ; count ; count--, pz += d_zwidth, pdest += screenwidth)
//   {
//      for (i=0 ; i<pix ; i++)
//      {
//         if (pz[i] <= izi)
//         {
//            pdest[i] = blendparticle( color, pdest[i] );
//         }
//      }
//   }
   xorl   %eax, %eax

   movw  (%esi), %ax

   cmpw  (short_izi), %ax
   jg    end_of_horiz_loop

   movl  C(partparms)+partparms_color, %eax

   cmpl $PARTICLE_66, C(partparms)+partparms_level
   je blendfunc_66
   jl blendfunc_33
// BlendParticle100
   movb   %al, (%edi)
   jmp   done_blending
blendfunc_33:
   movl C(vid)+vid_alphamap, %ebp
   xorl %ebx, %ebx

   movb (%edi), %bl
   shll $8, %ebx

   addl %ebx, %ebp
   addl %eax, %ebp

   movb (%ebp), %al

   movb %al, (%edi)
   jmp done_blending
blendfunc_66:
   movl C(vid)+vid_alphamap, %ebp
   xorl %ebx, %ebx

   shll $8, %eax
   movb (%edi), %bl

   addl %ebx, %ebp
   addl %eax, %ebp

   movb (%ebp), %al

   movb %al, (%edi)

done_blending:

   addl   $1, %edi
   addl   $2, %esi

end_of_horiz_loop:
   decl   %ecx
   jnz   top_of_pix_horiz_loop

   popl   %esi
   popl   %edi

   movl   C(d_zwidth), %ebp
   shll   $1, %ebp

   addl   %ebp, %esi
   addl   C(r_screenwidth), %edi

   popl   %ecx
   pushl  %ecx

   pushl  %edi
   pushl  %esi

   decl   %edx
   jnz   top_of_pix_vert_loop

   popl   %ecx
   popl   %ecx
   popl   %ecx

endpartfunc:
   popl %edi
   popl %esi
   movl ebpsave, %ebp
   ret

#endif   // id386


The code looks correct to me, the float_1, float_point5 and friends are defined in qasm.h/qasm.inc. So I don't believe that is the problem. Code is at https://bitbucket.org/neozeed/q2dos/branch/win32_asm.

Huge thanks to someone who can point me in the right direction and tell me what I did wrong so I can learn from it. :)
Maraakate
 
Posts: 8
Joined: Sun May 31, 2015 9:37 am

Re: Quake 2 Inline Assembly in r_part.c conversion to GAS

Postby Maraakate » Sat May 18, 2019 11:40 pm

Figured it out. Took objdump from the GCC .o and MSVC .obj and compared both. fsubrp becomes fsubp in MSVC. Changing it to this fixed the alignment issue. :cool:
Maraakate
 
Posts: 8
Joined: Sun May 31, 2015 9:37 am


Return to Engine Programming

Who is online

Users browsing this forum: Google [Bot] and 2 guests