From 3ba5ec38183bec7194729b4e8d09d36f0d5801d9 Mon Sep 17 00:00:00 2001 From: XProger Date: Sun, 4 Dec 2022 03:15:53 +0300 Subject: [PATCH] #368 GBA matrixRotateYXZ fetch sincos address lut once --- src/platform/gba/asm/faceAddMeshQuads.s | 13 +++++------ src/platform/gba/asm/faceAddMeshTriangles.s | 11 +++++----- src/platform/gba/asm/matrixRotate.s | 24 +++++++++++++++------ 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/src/platform/gba/asm/faceAddMeshQuads.s b/src/platform/gba/asm/faceAddMeshQuads.s index ca035a1a..8d54b55c 100644 --- a/src/platform/gba/asm/faceAddMeshQuads.s +++ b/src/platform/gba/asm/faceAddMeshQuads.s @@ -49,9 +49,10 @@ faceAddMeshQuads_asm: .loop: ldrh vp0, [polys], #2 + ldrh vp2, [polys], #4 // + flags + lsr vp1, vp0, #8 and vp0, #0xFF - ldrh vp2, [polys], #4 // + flags lsr vp3, vp2, #8 and vp2, #0xFF @@ -85,11 +86,11 @@ faceAddMeshQuads_asm: orrne flags, #FACE_CLIPPED // depth = AVG_Z4 - lsl vg0, #16 - add depth, vg0, vg1, lsl #16 - add depth, vg2, lsl #16 - add depth, vg3, lsl #16 - lsr depth, #(16 + 2) + lsl vg0, #16 // clip g part (high half) + add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1 + add depth, vg2, lsl #16 // depth += vz2 + add depth, vg3, lsl #16 // depth += vz3 + lsr depth, #(16 + 2) // dpeth /= 4 // faceAdd rsb vp0, vertices, vp0, lsr #3 diff --git a/src/platform/gba/asm/faceAddMeshTriangles.s b/src/platform/gba/asm/faceAddMeshTriangles.s index 87c10bd4..38d49a54 100644 --- a/src/platform/gba/asm/faceAddMeshTriangles.s +++ b/src/platform/gba/asm/faceAddMeshTriangles.s @@ -47,9 +47,10 @@ faceAddMeshTriangles_asm: .loop: ldrh vp0, [polys], #2 + ldrh vp2, [polys], #4 // + flags + lsr vp1, vp0, #8 and vp0, #0xFF - ldrh vp2, [polys], #4 // + flags and vp2, #0xFF add vp0, vp, vp0, lsl #3 @@ -77,10 +78,10 @@ faceAddMeshTriangles_asm: orrne flags, #FACE_CLIPPED // depth = AVG_Z3 - lsl vg0, #16 - add depth, vg0, vg1, lsl #16 - add depth, vg2, lsl #17 - lsr depth, #(16 + 2) + lsl vg0, #16 // clip g part (high half) + add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1 + add depth, vg2, lsl #17 // depth += vz2 * 2 + lsr depth, #(16 + 2) // depth /= 4 // faceAdd rsb vp0, vertices, vp0, lsr #3 diff --git a/src/platform/gba/asm/matrixRotate.s b/src/platform/gba/asm/matrixRotate.s index e7776af3..114b4fc0 100644 --- a/src/platform/gba/asm/matrixRotate.s +++ b/src/platform/gba/asm/matrixRotate.s @@ -8,6 +8,13 @@ mov \sin, \sin, asr #16 .endm +.macro sincosLUT lut, angle, sin, cos + ldr \sin, [\lut, \angle, lsl #2] + mov \cos, \sin, lsl #16 + mov \cos, \cos, asr #16 + mov \sin, \sin, asr #16 +.endm + .macro rotxy x, y, sin, cos, t mul \t, \y, \cos mla \t, \x, \sin, \t @@ -126,6 +133,7 @@ e00 .req r3 e01 .req r4 e02 .req r5 e10 .req r6 +scLUT .req r7 // FIQ regs e11 .req r8 e12 .req r9 @@ -151,14 +159,16 @@ matrixRotateYXZ_asm: and angleY, mask, angleY, lsr #4 and angleZ, mask, angleZ, lsr #4 -matrixRotateYXZ_fast_asm: +matrixRotateYXZ_fast_asm: // routine for pre-shifted angles orr mask, angleX, angleY orrs mask, mask, angleZ bxeq lr - stmfd sp!, {r4-r6} + stmfd sp!, {r4-r7} fiq_on + ldr scLUT, =gSinCosTable + ldr mm, =gMatrixPtr ldr mm, [mm] ldmia mm, {e00, e01, e02} @@ -171,7 +181,7 @@ matrixRotateYXZ_fast_asm: cmp angleY, #0 beq .rotX - sincos angleY, sinY, cosY + sincosLUT scLUT, angleY, sinY, cosY rotxy e00, e02, sinY, cosY, tmp rotxy e10, e12, sinY, cosY, tmp @@ -181,7 +191,7 @@ matrixRotateYXZ_fast_asm: cmp angleX, #0 beq .rotZ - sincos angleX, sinX, cosX + sincosLUT scLUT, angleX, sinX, cosX rotxy e02, e01, sinX, cosX, tmp rotxy e12, e11, sinX, cosX, tmp @@ -191,13 +201,13 @@ matrixRotateYXZ_fast_asm: cmp angleZ, #0 beq .done - sincos angleZ, sinZ, cosZ + sincosLUT scLUT, angleZ, sinZ, cosZ rotxy e01, e00, sinZ, cosZ, tmp rotxy e11, e10, sinZ, cosZ, tmp rotxy e21, e20, sinZ, cosZ, tmp -.done: +.done: ldr mm, =gMatrixPtr ldr mm, [mm] @@ -208,7 +218,7 @@ matrixRotateYXZ_fast_asm: stmia mm, {e20, e21, e22} fiq_off - ldmfd sp!, {r4-r6} + ldmfd sp!, {r4-r7} bx lr q .req r0 // arg