Skip to content

Commit

Permalink
#368 GBA delta encoding for room quads and mesh indices, OT depth cal…
Browse files Browse the repository at this point in the history
…c optimization, merge near/far clipping flags (CLIP_PLANE) to clear one of high-bits, revert smull optimization for scaleUV due overflow issues (low half word affects high)
  • Loading branch information
XProger committed Dec 11, 2022
1 parent 40eb268 commit f4d36d8
Show file tree
Hide file tree
Showing 18 changed files with 369 additions and 232 deletions.
11 changes: 7 additions & 4 deletions src/fixed/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -641,8 +641,9 @@ struct RoomQuad
uint32 flags;
uint16 indices[4];
#else
int8 indices[4];
uint16 flags;
uint16 indices[4];
uint16 padding;
#endif
};

Expand All @@ -652,8 +653,8 @@ struct RoomTriangle
uint32 flags;
uint16 indices[4];
#else
uint16 flags;
uint16 indices[3];
uint16 flags;
#endif
};

Expand All @@ -663,8 +664,9 @@ struct MeshQuad
uint32 flags;
uint32 indices;
#else
int8 indices[4];
uint16 flags;
uint8 indices[4];
uint16 padding;
#endif
};

Expand All @@ -674,8 +676,9 @@ struct MeshTriangle
uint32 flags;
uint32 indices;
#else
int8 indices[4];
uint16 flags;
uint8 indices[4];
uint16 padding;
#endif
};

Expand Down
12 changes: 8 additions & 4 deletions src/fixed/level.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,18 @@ void readLevel(const uint8* data)
m->start = spriteSeq->start;
}

#ifdef USE_VRAM_MESH // experimental, should be per level or dynamic
vramMeshesCount = 0;
// experimental
#if defined(USE_VRAM_MESH) || defined(USE_VRAM_ROOM)
vramPtr = (uint8*)0x06014000;
#endif

#ifdef USE_VRAM_MESH // should be per level or dynamic
vramMeshesCount = 0;
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA);
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_PISTOLS);
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_SHOTGUN);
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_MAGNUMS);
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_UZIS);
//vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_MAGNUMS);
//vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_UZIS);
vramPtr = pushToVRAM(vramPtr, models + ITEM_WOLF);
vramPtr = pushToVRAM(vramPtr, models + ITEM_BAT);
vramPtr = pushToVRAM(vramPtr, models + ITEM_BRIDGE_FLAT);
Expand Down
25 changes: 10 additions & 15 deletions src/fixed/room.h
Original file line number Diff line number Diff line change
Expand Up @@ -383,10 +383,10 @@ bool Room::checkPortal(const Portal* portal)
return false;
//#endif

int32 x0 = clip.x1;
int32 y0 = clip.y1;
int32 x1 = clip.x0;
int32 y1 = clip.y0;
int32 x0 = clip.x1 - (FRAME_WIDTH >> 1);
int32 y0 = clip.y1 - (FRAME_HEIGHT >> 1);
int32 x1 = clip.x0 - (FRAME_WIDTH >> 1);
int32 y1 = clip.y0 - (FRAME_HEIGHT >> 1);

int32 znear = 0, zfar = 0;

Expand Down Expand Up @@ -419,17 +419,7 @@ bool Room::checkPortal(const Portal* portal)
y >>= FIXED_SHIFT;
z >>= FIXED_SHIFT;

int32 dz = PERSPECTIVE_DZ(z);

if (dz > 0) {
PERSPECTIVE(x, y, z);

x += FRAME_WIDTH >> 1;
y += FRAME_HEIGHT >> 1;
} else {
x = (x < 0) ? viewport.x0 : viewport.x1;
y = (y < 0) ? viewport.y0 : viewport.y1;
}
PERSPECTIVE(x, y, z);

if (x < x0) x0 = x;
if (x > x1) x1 = x;
Expand All @@ -440,6 +430,11 @@ bool Room::checkPortal(const Portal* portal)
if (znear == 4 || zfar == 4)
return false;

x0 += (FRAME_WIDTH >> 1);
y0 += (FRAME_HEIGHT >> 1);
x1 += (FRAME_WIDTH >> 1);
y1 += (FRAME_HEIGHT >> 1);

if (znear)
{
vec3i *a = pv;
Expand Down
23 changes: 12 additions & 11 deletions src/platform/gba/asm/common_asm.inc
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,9 @@
.equ CLIP_RIGHT, ((1 << 1) << 8)
.equ CLIP_TOP, ((1 << 2) << 8)
.equ CLIP_BOTTOM, ((1 << 3) << 8)
.equ CLIP_FAR, ((1 << 4) << 8)
.equ CLIP_NEAR, ((1 << 5) << 8)
.equ CLIP_FRAME, ((1 << 6) << 8)
.equ CLIP_DISCARD, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR)
.equ CLIP_PLANE, ((1 << 4) << 8)
.equ CLIP_FRAME, ((1 << 5) << 8)
.equ CLIP_DISCARD, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_PLANE)
.equ CLIP_MASK, 0xFF00

.equ FACE_TYPE_SHIFT, 14
Expand Down Expand Up @@ -121,15 +120,17 @@
ble \skip
.endm

.macro scaleUV uv, tmp, tmp2, f
smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
.macro scaleUV uv, tmp, unused, f
asrs \tmp, \uv, #16
mulne \tmp, \f, \tmp // u = f * int16(uv >> 16)
lsrne \tmp, #16
lslne \tmp, #16

lsl \uv, #16
asrs \uv, #16
mulne \uv, \f // v = f * int16(uv)
lsr \uv, #16
lsls \uv, \uv, #16
asrne \uv, #16
mulne \uv, \f, \uv // v = f * int16(uv)

orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)
orr \uv, \tmp, \uv, lsr #16 // uv = (u & 0xFFFF0000) | (v >> 16)
.endm

.macro tex index, uv
Expand Down
70 changes: 39 additions & 31 deletions src/platform/gba/asm/faceAddMeshQuads.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

polys .req r0 // arg
count .req r1 // arg
vp .req r2
mask .req r2
vg0 .req r3
vg1 .req r4
vg2 .req r5
Expand All @@ -26,6 +26,14 @@ vy2 .req vg2

depth .req vg0

i0 .req vp0
i1 .req vp1
i2 .req vp2
i3 .req vg3 // vg to save vp3 value between iterations

vp01 .req vp1
vp23 .req vp2

tmp .req flags
next .req vp0

Expand All @@ -34,8 +42,8 @@ faceAddMeshQuads_asm:
stmfd sp!, {r4-r7}
fiq_on

ldr vp, =gVerticesBase
ldr vp, [vp]
ldr vp3, =gVerticesBase
ldr vp3, [vp3]

ldr vertices, =gVertices
lsr vertices, #3
Expand All @@ -44,26 +52,27 @@ faceAddMeshQuads_asm:
ldr face, [face]

ldr ot, =gOT

add polys, #2 // skip flags
mov mask, #(0xFF << 24)
orr mask, #(3 << 8) // div 4 mul 4 for depth

.loop:
ldrh vp0, [polys], #2
ldrh vp2, [polys], #4 // + flags
// sizeof(MeshQuad) == 8
ldr tmp, [polys], #8 // skip flags

lsr vp1, vp0, #8
and vp0, #0xFF
lsr vp3, vp2, #8
and vp2, #0xFF
// unpack index deltas
and vg0, mask, tmp, lsl #24
and vg1, mask, tmp, lsl #16
and vg2, mask, tmp, lsl #8
and vg3, mask, tmp

add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3
add vp2, vp, vp2, lsl #3
// sizeof(Vertex) = (1 << 3)
add vp0, vp3, vg0, asr #(24 - 3)
add vp1, vp0, vg1, asr #(24 - 3)
add vp2, vp1, vg2, asr #(24 - 3)
add vp3, vp2, vg3, asr #(24 - 3)

CCW .skip

add vp3, vp, vp3, lsl #3

// fetch [c, g, zz]
ldr vg0, [vp0, #VERTEX_Z]
ldr vg1, [vp1, #VERTEX_Z]
Expand All @@ -82,28 +91,27 @@ faceAddMeshQuads_asm:
orr tmp, vg2
orr tmp, vg3
tst tmp, #(CLIP_FRAME << 16)
ldrh flags, [polys, #-8]
ldrh flags, [polys, #-4]
orrne flags, #FACE_CLIPPED

// depth = AVG_Z4
lsl vg0, #16 // clip g part (high half)
add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1
add depth, vg2, lsl #16 // depth += vz2
add depth, vg3, lsl #16 // depth += vz3
lsr depth, #(16 + 2) // dpeth /= 4
add depth, vg0, vg1 // depth = vz0 + vz1
add depth, vg2 // depth += vz2
add depth, vg3 // depth += vz3
bic depth, depth, mask, asr #8 // clear high half (g & clip flags) and low 2 bits

// faceAdd
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
rsb vp3, vertices, vp3, lsr #3
rsb i0, vertices, vp0, lsr #3
rsb i1, vertices, vp1, lsr #3
rsb i2, vertices, vp2, lsr #3
rsb i3, vertices, vp3, lsr #3

orr vp1, vp0, vp1, lsl #16
orr vp3, vp2, vp3, lsl #16
orr vp01, i0, i1, lsl #16
orr vp23, i2, i3, lsl #16

ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp3}
ldr next, [ot, depth]
str face, [ot, depth]
stmia face!, {flags, next, vp01, vp23}
.skip:
subs count, #1
bne .loop
Expand Down
77 changes: 42 additions & 35 deletions src/platform/gba/asm/faceAddMeshTriangles.s
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@

polys .req r0 // arg
count .req r1 // arg
vp .req r2
vg0 .req r3
vg1 .req r4
vg2 .req r5
vg3 .req r6
mask .req r2
flags .req r3
vp0 .req r4
vp1 .req r5
vp2 .req r6
// FIQ regs
flags .req r8
vp0 .req r9
vp1 .req r10
vp2 .req r11
vg0 .req r8
vg1 .req r9
vg2 .req r10
vg3 .req r11
vertices .req r12
ot .req r13
face .req r14
Expand All @@ -25,6 +25,12 @@ vy2 .req vg2

depth .req vg0

i0 .req vg1
i1 .req vg2
i2 .req vg3 // vg to save vp2 value between iterations

vp01 .req vp1

tmp .req flags
next .req vp0

Expand All @@ -33,29 +39,31 @@ faceAddMeshTriangles_asm:
stmfd sp!, {r4-r6}
fiq_on

ldr vp, =gVerticesBase
ldr vp, [vp]
ldr vp2, =gVerticesBase
ldr vp2, [vp2]

ldr vertices, =gVertices
lsr vertices, #3

ldr face, =gFacesBase
ldr face, [face]

ldr ot, =gOT
ldr vertices, =gVertices
lsr vertices, #3

add polys, #2 // skip flags
mov mask, #(0xFF << 24)
orr mask, #(3 << 8) // div 4 mul 4 for depth

.loop:
ldrh vp0, [polys], #2
ldrh vp2, [polys], #4 // + flags
// sizeof(MeshTriangle) == 8
ldr tmp, [polys], #8 // skip flags

lsr vp1, vp0, #8
and vp0, #0xFF
and vp2, #0xFF
// unpack index deltas
and vg0, mask, tmp, lsl #24
and vg1, mask, tmp, lsl #16

add vp0, vp, vp0, lsl #3
add vp1, vp, vp1, lsl #3
add vp2, vp, vp2, lsl #3
// sizeof(Vertex) = (1 << 3)
add vp0, vp2, vg0, asr #(24 - 3)
add vp1, vp0, vg1, asr #(24 - 3)
add vp2, vp1, tmp, asr #(24 - 3) // 3rd vertex in 4th byte after zero byte to save one masking op

CCW .skip

Expand All @@ -74,27 +82,26 @@ faceAddMeshTriangles_asm:
orr tmp, vg0, vg1
orr tmp, vg2
tst tmp, #(CLIP_FRAME << 16)
ldrh flags, [polys, #-8]
ldrh flags, [polys, #-4]
orrne flags, #FACE_CLIPPED

// depth = AVG_Z3
lsl vg0, #16 // clip g part (high half)
add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1
add depth, vg2, lsl #17 // depth += vz2 * 2
lsr depth, #(16 + 2) // depth /= 4
add depth, vg0, vg1 // depth = vz0 + vz1
add depth, vg2, lsl #1 // depth += vz2 * 2
bic depth, depth, mask, asr #8 // clear high half (g & clip flags) and low 2 bits

// faceAdd
rsb vp0, vertices, vp0, lsr #3
rsb vp1, vertices, vp1, lsr #3
rsb vp2, vertices, vp2, lsr #3
rsb i0, vertices, vp0, lsr #3
rsb i1, vertices, vp1, lsr #3
rsb i2, vertices, vp2, lsr #3

orr vp1, vp0, vp1, lsl #16
orr vp01, i0, i1, lsl #16

orr flags, #FACE_TRIANGLE

ldr next, [ot, depth, lsl #2]
str face, [ot, depth, lsl #2]
stmia face!, {flags, next, vp1, vp2}
ldr next, [ot, depth]
str face, [ot, depth]
stmia face!, {flags, next, vp01, i2}
.skip:
subs count, #1
bne .loop
Expand Down
Loading

0 comments on commit f4d36d8

Please sign in to comment.