diff --git a/src/platform/gba/asm/common_asm.inc b/src/platform/gba/asm/common_asm.inc index 46af4800..0ff99981 100644 --- a/src/platform/gba/asm/common_asm.inc +++ b/src/platform/gba/asm/common_asm.inc @@ -121,11 +121,11 @@ .endm .macro scaleUV uv, tmp, tmp2, f - smull \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32 + smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32 lsl \uv, #16 - asr \uv, #16 - mul \uv, \f // v = f * int16(uv) + asrs \uv, #16 + mulne \uv, \f // v = f * int16(uv) lsr \uv, #16 orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16) diff --git a/src/platform/gba/asm/rasterizeF.s b/src/platform/gba/asm/rasterizeF.s index 47b74879..f04a29c4 100644 --- a/src/platform/gba/asm/rasterizeF.s +++ b/src/platform/gba/asm/rasterizeF.s @@ -25,8 +25,6 @@ Lxy .req tmp Ly2 .req Lh LMAP .req Lx ptr .req tmp -Ltmp .req N -Rtmp .req N .global rasterizeF_asm rasterizeF_asm: @@ -57,9 +55,9 @@ rasterizeF_asm: divLUT tmp, Lh // tmp = FixedInvU(Lh) - ldrsh Ltmp, [L, #VERTEX_X] - sub Ltmp, Lx, asr #16 - mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - L->v.x) + ldrsh Ldx, [L, #VERTEX_X] + subs Ldx, Lx, asr #16 + mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - L->v.x) .calc_left_end: cmp Rh, #0 @@ -81,9 +79,9 @@ rasterizeF_asm: divLUT tmp, Rh // tmp = FixedInvU(Rh) - ldrsh Rtmp, [R, #VERTEX_X] - sub Rtmp, Rx, asr #16 - mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) + ldrsh Rdx, [R, #VERTEX_X] + subs Rdx, Rx, asr #16 + mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx) .calc_right_end: cmp Rh, Lh // if (Rh < Lh) diff --git a/src/platform/gba/asm/rasterizeFT.s b/src/platform/gba/asm/rasterizeFT.s index 0e89b0cb..cc18c701 100644 --- a/src/platform/gba/asm/rasterizeFT.s +++ b/src/platform/gba/asm/rasterizeFT.s @@ -95,12 +95,12 @@ rasterizeFT_asm: divLUT tmp, Lh // tmp = FixedInvU(Lh) - ldrsh Ltmp, [L, #VERTEX_X] - sub Ltmp, Lx, asr #16 - mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) + ldrsh Ldx, [L, #VERTEX_X] + subs Ldx, Lx, asr #16 + mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx) ldr Ldt, [L, #VERTEX_T] - sub Ldt, Lt // Ldt = N->v.t - Lt + subs Ldt, Lt // Ldt = N->v.t - Lt scaleUV Ldt, Ltmp, Ltmp2, tmp .calc_left_end: @@ -125,12 +125,12 @@ rasterizeFT_asm: divLUT tmp, Rh // tmp = FixedInvU(Rh) - ldrsh Rtmp, [R, #VERTEX_X] - sub Rtmp, Rx, asr #16 - mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) + ldrsh Rdx, [R, #VERTEX_X] + subs Rdx, Rx, asr #16 + mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx) ldr Rdt, [R, #VERTEX_T] - sub Rdt, Rt // Rdt = N->v.t - Rt + subs Rdt, Rt // Rdt = N->v.t - Rt scaleUV Rdt, Rtmp, Rtmp2, tmp .calc_right_end: @@ -153,7 +153,7 @@ rasterizeFT_asm: divLUT inv, width // inv = FixedInvU(width) - sub dtdx, Rt, Lt // duv = Rt - Lt + subs dtdx, Rt, Lt // duv = Rt - Lt scaleUV dtdx, dtmp, dtmp2, inv mov t, Lt // t = Lt diff --git a/src/platform/gba/asm/rasterizeFTA.s b/src/platform/gba/asm/rasterizeFTA.s index 96e492c8..0fee0170 100644 --- a/src/platform/gba/asm/rasterizeFTA.s +++ b/src/platform/gba/asm/rasterizeFTA.s @@ -96,12 +96,12 @@ rasterizeFTA_asm: divLUT tmp, Lh // tmp = FixedInvU(Lh) - ldrsh Ltmp, [L, #VERTEX_X] - sub Ltmp, Lx, asr #16 - mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) + ldrsh Ldx, [L, #VERTEX_X] + subs Ldx, Lx, asr #16 + mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx) ldr Ldt, [L, #VERTEX_T] - sub Ldt, Lt // Ldt = N->v.t - Lt + subs Ldt, Lt // Ldt = N->v.t - Lt scaleUV Ldt, Ltmp, Ltmp2, tmp .calc_left_end: @@ -126,12 +126,12 @@ rasterizeFTA_asm: divLUT tmp, Rh // tmp = FixedInvU(Rh) - ldrsh Rtmp, [R, #VERTEX_X] - sub Rtmp, Rx, asr #16 - mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) + ldrsh Rdx, [R, #VERTEX_X] + subs Rdx, Rx, asr #16 + mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx) ldr Rdt, [R, #VERTEX_T] - sub Rdt, Rt // Rdt = N->v.t - Rt + subs Rdt, Rt // Rdt = N->v.t - Rt scaleUV Rdt, Rtmp, Rtmp2, tmp .calc_right_end: @@ -154,7 +154,7 @@ rasterizeFTA_asm: divLUT inv, width // inv = FixedInvU(width) - sub dtdx, Rt, Lt // duv = Rt - Lt + subs dtdx, Rt, Lt // duv = Rt - Lt scaleUV dtdx, dtmp, dtmp2, inv mov t, Lt // t = Lt diff --git a/src/platform/gba/asm/rasterizeGT.s b/src/platform/gba/asm/rasterizeGT.s index f427f2a8..fea32d17 100644 --- a/src/platform/gba/asm/rasterizeGT.s +++ b/src/platform/gba/asm/rasterizeGT.s @@ -107,17 +107,17 @@ rasterizeGT_asm: divLUT tmp, Lh // tmp = FixedInvU(Lh) fiq_on - ldrsh Ltmp, [N, #VERTEX_X] - sub Ltmp, Lx, asr #16 - mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) + ldrsh Ldx, [N, #VERTEX_X] + subs Ldx, Lx, asr #16 + mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx) - ldrb Ltmp, [N, #VERTEX_G] - sub Ltmp, Lg, lsr #(8 + G_EXTRA) - mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg) + ldrb Ldg, [N, #VERTEX_G] + subs Ldg, Lg, lsr #(8 + G_EXTRA) + mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg) asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part ldr Ldt, [N, #VERTEX_T] - sub Ldt, Lt // Ldt = N->v.t - Lt + subs Ldt, Lt // Ldt = N->v.t - Lt scaleUV Ldt, Ltmp, Ltmp2, tmp fiq_off .calc_left_end: @@ -146,17 +146,17 @@ rasterizeGT_asm: divLUT tmp, Rh // tmp = FixedInvU(Rh) fiq_on - ldrsh Rtmp, [N, #VERTEX_X] - sub Rtmp, Rx, asr #16 - mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) + ldrsh Rdx, [N, #VERTEX_X] + subs Rdx, Rx, asr #16 + mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx) - ldrb Rtmp, [N, #VERTEX_G] - sub Rtmp, Rg, lsr #(8 + G_EXTRA) - mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg) + ldrb Rdg, [N, #VERTEX_G] + subs Rdg, Rg, lsr #(8 + G_EXTRA) + mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg) asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part ldr Rdt, [N, #VERTEX_T] - sub Rdt, Rt // Rdt = N->v.t - Rt + subs Rdt, Rt // Rdt = N->v.t - Rt scaleUV Rdt, Rtmp, Rtmp2, tmp fiq_off .calc_right_end: @@ -183,12 +183,12 @@ rasterizeGT_asm: divLUT inv, width // inv = FixedInvU(width) - sub dtdx, Rt, Lt // dtdx = Rt - Lt + subs dtdx, Rt, Lt // dtdx = Rt - Lt scaleUV dtdx, dtmp, dtmp2, inv // t == Lt (alias) - sub dgdx, Rg, Lg // dgdx = Rg - Lg - mul dgdx, inv // dgdx *= FixedInvU(width) + subs dgdx, Rg, Lg // dgdx = Rg - Lg + mulne dgdx, inv // dgdx *= FixedInvU(width) asr dgdx, #16 // dgdx >>= 16 // g == Lg (alias) diff --git a/src/platform/gba/asm/rasterizeGTA.s b/src/platform/gba/asm/rasterizeGTA.s index 94587744..a1716966 100644 --- a/src/platform/gba/asm/rasterizeGTA.s +++ b/src/platform/gba/asm/rasterizeGTA.s @@ -106,17 +106,17 @@ rasterizeGTA_asm: divLUT tmp, Lh // tmp = FixedInvU(Lh) fiq_on - ldrsh Ltmp, [N, #VERTEX_X] - sub Ltmp, Lx, asr #16 - mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) + ldrsh Ldx, [N, #VERTEX_X] + subs Ldx, Lx, asr #16 + mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx) - ldrb Ltmp, [N, #VERTEX_G] - sub Ltmp, Lg, lsr #8 - mul Ldg, tmp, Ltmp // Ldg = tmp * (N->v.g - Lg) + ldrb Ldg, [N, #VERTEX_G] + subs Ldg, Lg, lsr #8 + mulne Ldg, tmp, Ldg // Ldg = tmp * (N->v.g - Lg) asr Ldg, #8 // 8-bit for fractional part ldr Ldt, [N, #VERTEX_T] - sub Ldt, Lt // Ldt = N->v.t - Lt + subs Ldt, Lt // Ldt = N->v.t - Lt scaleUV Ldt, Ltmp, Ltmp2, tmp fiq_off .calc_left_end: @@ -145,17 +145,17 @@ rasterizeGTA_asm: divLUT tmp, Rh // tmp = FixedInvU(Rh) fiq_on - ldrsh Rtmp, [N, #VERTEX_X] - sub Rtmp, Rx, asr #16 - mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) + ldrsh Rdx, [N, #VERTEX_X] + subs Rdx, Rx, asr #16 + mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx) - ldrb Rtmp, [N, #VERTEX_G] - sub Rtmp, Rg, lsr #8 - mul Rdg, tmp, Rtmp // Rdg = tmp * (N->v.g - Rg) + ldrb Rdg, [N, #VERTEX_G] + subs Rdg, Rg, lsr #8 + mulne Rdg, tmp, Rdg // Rdg = tmp * (N->v.g - Rg) asr Rdg, #8 // 8-bit for fractional part ldr Rdt, [N, #VERTEX_T] - sub Rdt, Rt // Rdt = N->v.t - Rt + subs Rdt, Rt // Rdt = N->v.t - Rt scaleUV Rdt, Rtmp, Rtmp2, tmp fiq_off .calc_right_end: @@ -182,12 +182,12 @@ rasterizeGTA_asm: divLUT inv, width // inv = FixedInvU(width) - sub dtdx, Rt, Lt // dtdx = Rt - Lt + subs dtdx, Rt, Lt // dtdx = Rt - Lt scaleUV dtdx, dtmp, dtmp2, inv // t == Lt (alias) - sub dgdx, Rg, Lg // dgdx = Rg - Lg - mul dgdx, inv // dgdx *= FixedInvU(width) + subs dgdx, Rg, Lg // dgdx = Rg - Lg + mulne dgdx, inv // dgdx *= FixedInvU(width) asr dgdx, #16 // dgdx >>= 16 // g == Lg (alias) diff --git a/src/platform/gba/asm/rasterizeS.s b/src/platform/gba/asm/rasterizeS.s index 32e57d1f..227b9867 100644 --- a/src/platform/gba/asm/rasterizeS.s +++ b/src/platform/gba/asm/rasterizeS.s @@ -23,8 +23,6 @@ Ry2 .req Rh Lxy .req tmp Ly2 .req Lh indexB .req pair -Ltmp .req N -Rtmp .req N .global rasterizeS_asm rasterizeS_asm: @@ -52,9 +50,9 @@ rasterizeS_asm: divLUT tmp, Lh // tmp = FixedInvU(Lh) - ldrsh Ltmp, [L, #VERTEX_X] - sub Ltmp, Lx, asr #16 - mul Ldx, tmp, Ltmp // Ldx = tmp * (N->v.x - Lx) + ldrsh Ldx, [L, #VERTEX_X] + subs Ldx, Lx, asr #16 + mulne Ldx, tmp, Ldx // Ldx = tmp * (N->v.x - Lx) .calc_left_end: cmp Rh, #0 @@ -76,9 +74,9 @@ rasterizeS_asm: divLUT tmp, Rh // tmp = FixedInvU(Rh) - ldrsh Rtmp, [R, #VERTEX_X] - sub Rtmp, Rx, asr #16 - mul Rdx, tmp, Rtmp // Rdx = tmp * (N->v.x - Rx) + ldrsh Rdx, [R, #VERTEX_X] + subs Rdx, Rx, asr #16 + mulne Rdx, tmp, Rdx // Rdx = tmp * (N->v.x - Rx) .calc_right_end: cmp Rh, Lh // if (Rh < Lh)