diff --git a/src/fixed/common.h b/src/fixed/common.h
index 5878d9cb..21b679a4 100644
--- a/src/fixed/common.h
+++ b/src/fixed/common.h
@@ -2146,17 +2146,21 @@ X_INLINE Matrix& matrixGet()
     return *matrixPtr;
 }
 
-X_INLINE void matrixPush()
+#ifdef USE_ASM
+    extern "C" void matrixPush_asm();
+    #define matrixPush() matrixPush_asm();
+#else
+    #define matrixPush() matrixPush_c();
+
+X_INLINE void matrixPush_c()
 {
     ASSERT(matrixPtr - matrixStack < MAX_MATRICES);
     memcpy(matrixPtr + 1, matrixPtr, sizeof(Matrix));
     matrixPtr++;
 }
+#endif
 
-X_INLINE void matrixPop()
-{
-    matrixPtr--;
-}
+#define matrixPop() matrixPtr--
 
 X_INLINE void matrixSetBasis(Matrix &dst, const Matrix &src)
 {
diff --git a/src/platform/3do/ccbMap3.s b/src/platform/3do/ccbMap3.s
deleted file mode 100644
index b9b10391..00000000
--- a/src/platform/3do/ccbMap3.s
+++ /dev/null
@@ -1,73 +0,0 @@
-    AREA |C$$code|, CODE, READONLY
-|x$codeseg|
-
-    INCLUDE common_asm.inc
-
-    EXPORT ccbMap3_asm
-
-ccbMap3_asm
-
-face     RN r0
-vp0      RN r1
-vp1      RN r2
-vp2      RN r3
-
-vx0      RN vp0
-vy0      RN vp1
-xpos     RN vx0
-ypos     RN vy0
-
-vx1      RN vp2
-vy1      RN r4
-hdx0     RN vx1
-hdy0     RN vy1
-
-vx2      RN r5
-vy2      RN r6
-vdx0     RN vx2
-vdy0     RN vy2
-
-ws       RN r12
-hs       RN lr
-shift    RN hs
-
-hddx     RN ws
-hddy     RN hs
-
-        stmfd sp!, {r4-r6, lr}
-
-        add face, face, #16     ; offset to ccb_XPos
-        ldr shift, [sp, #16]    ; skip 4 regs stored on the stack
-        ldmia vp2, {vx2, vy2}
-        ldmia vp1, {vx1, vy1}
-        ldmia vp0, {vx0, vy0}
-
-        and ws, shift, #0xFF
-        mov hs, shift, lsr #8
-        and hs, hs, #0xFF
-
-        sub hdx0, vx1, vx0
-        sub hdy0, vy1, vy0
-        mov hdx0, hdx0, lsl ws
-        mov hdy0, hdy0, lsl ws
-
-        sub vdx0, vx2, vx0
-        sub vdy0, vy2, vy0
-        mov vdx0, vdx0, lsl hs
-        mov vdy0, vdy0, lsl hs
-
-        rsb hs, hs, #16
-        mov hddx, hdx0, asr hs
-        mov hddy, hdy0, asr hs
-        rsb hddx, hddx, #0
-        rsb hddy, hddy, #0
-
-        mov xpos, vx0, lsl #16
-        mov ypos, vy0, lsl #16
-        add xpos, xpos, #(FRAME_WIDTH << 15)
-        add ypos, ypos, #(FRAME_HEIGHT << 15)
-
-        stmia face, {xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy}
-
-        ldmfd sp!, {r4-r6, pc}
-    END
diff --git a/src/platform/3do/ccbMap4.s b/src/platform/3do/ccbMap4.s
deleted file mode 100644
index 500a8747..00000000
--- a/src/platform/3do/ccbMap4.s
+++ /dev/null
@@ -1,85 +0,0 @@
-    AREA |C$$code|, CODE, READONLY
-|x$codeseg|
-
-    INCLUDE common_asm.inc
-
-    EXPORT ccbMap4_asm
-
-ccbMap4_asm
-
-face     RN r0
-vp0      RN r1
-vp1      RN r2
-vp2      RN r3
-vp3      RN r4
-
-vx0      RN vp0
-vy0      RN vp1
-xpos     RN vx0
-ypos     RN vy0
-
-vx1      RN vp2
-vy1      RN vp3
-hdx0     RN vx1
-hdy0     RN vy1
-
-vx3      RN r5
-vy3      RN r6
-vdx0     RN vx3
-vdy0     RN vy3
-
-vx2      RN r7
-vy2      RN r8
-hdx1     RN vx2
-hdy1     RN vy2
-hddx     RN hdx1
-hddy     RN hdy1
-
-ws       RN r12
-hs       RN lr
-shift    RN hs
-
-        stmfd sp!, {r4-r8, lr}
-
-        add face, face, #16     ; offset to ccb_XPos
-        add shift, sp, #24      ; skip 6 regs stored on the stack
-        ldmia shift, {vp3, shift}
-        ldmia vp3, {vx3, vy3}
-        ldmia vp2, {vx2, vy2}
-        ldmia vp1, {vx1, vy1}
-        ldmia vp0, {vx0, vy0}
-
-        and ws, shift, #0xFF
-        mov hs, shift, lsr #8
-        and hs, hs, #0xFF
-
-        sub hdx1, vx2, vx3
-        sub hdy1, vy2, vy3
-        mov hdx1, hdx1, lsl ws
-        mov hdy1, hdy1, lsl ws
-
-        sub hdx0, vx1, vx0
-        sub hdy0, vy1, vy0
-        mov hdx0, hdx0, lsl ws
-        mov hdy0, hdy0, lsl ws
-
-        sub vdx0, vx3, vx0
-        sub vdy0, vy3, vy0
-        mov vdx0, vdx0, lsl hs
-        mov vdy0, vdy0, lsl hs
-
-        rsb hs, hs, #16
-        sub hddx, hdx1, hdx0
-        sub hddy, hdy1, hdy0
-        mov hddx, hddx, asr hs
-        mov hddy, hddy, asr hs
-
-        mov xpos, vx0, lsl #16
-        mov ypos, vy0, lsl #16
-        add xpos, xpos, #(FRAME_WIDTH << 15)
-        add ypos, ypos, #(FRAME_HEIGHT << 15)
-
-        stmia face, {xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy}
-
-        ldmfd sp!, {r4-r8, pc}
-    END
diff --git a/src/platform/3do/common_asm.inc b/src/platform/3do/common_asm.inc
index 34866910..63635808 100644
--- a/src/platform/3do/common_asm.inc
+++ b/src/platform/3do/common_asm.inc
@@ -16,6 +16,7 @@ CCB_ACW         EQU 0x00040000
 CCB_ALSC        EQU 0x00080000
 CCB_ACSC        EQU 0x00100000
 CCB_YOXY        EQU 0x00200000
+CCB_CCBPRE      EQU 0x00400000
 CCB_LDPLUT      EQU 0x00800000
 CCB_LDPPMP      EQU 0x01000000
 CCB_LDPRS       EQU 0x02000000
diff --git a/src/platform/3do/faceAddMeshQuads.s b/src/platform/3do/faceAddMeshQuads.s
new file mode 100644
index 00000000..99a15358
--- /dev/null
+++ b/src/platform/3do/faceAddMeshQuads.s
@@ -0,0 +1,249 @@
+    AREA |C$$code|, CODE, READONLY
+|x$codeseg|
+
+    INCLUDE common_asm.inc
+
+    EXPORT faceAddMeshQuads_asm
+
+faceAddMeshQuads_asm
+
+polysArg    RN r0
+countArg    RN r1
+shadeArg    RN r2
+
+flags       RN polysArg
+
+vx0         RN shadeArg
+vy0         RN r3
+
+vx1         RN r4
+vy1         RN r5
+
+vx3         RN r6
+vy3         RN r7
+
+vx2         RN r8
+vy2         RN r9
+
+pixc        RN r10
+tex         RN r11
+
+face        RN r12
+depth       RN lr
+
+mask        RN depth
+
+fPolys      RN countArg
+fLast       RN tex
+fVertices   RN face
+
+spPolys     RN vx0
+spLast      RN vx1
+spVertices  RN vy3
+spOT        RN vx2
+spFaceBase  RN vy2
+spTextures  RN tex
+spPalette   RN face
+
+faceBase    RN vy2
+cross       RN vy2
+
+indices     RN vy0
+
+vz0         RN vy0
+vz1         RN vy1
+vz2         RN vy2
+vz3         RN vy3
+
+vp0         RN vx0
+vp1         RN vx1
+vp2         RN vx2
+vp3         RN vx3
+
+xpos        RN vx0
+ypos        RN vy0
+hdx0        RN vx1
+hdy0        RN vy1
+hdx1        RN vx2
+hdy1        RN vy2
+vdx0        RN vx3
+vdy0        RN vy3
+hddx        RN hdx1
+hddy        RN hdy1
+
+nextPtr     RN vy2
+dataPtr     RN polysArg
+plutPtr     RN countArg
+
+tmp         RN countArg
+ot          RN countArg
+otTail      RN depth
+nextFace    RN depth
+
+plutOffset  RN vy2
+texIndex    RN vy2
+
+ws          RN tex
+hs          RN depth
+shift       RN depth
+
+SP_POLYS    EQU 0
+SP_LAST     EQU 4
+SP_VERTICES EQU 8
+SP_OT       EQU 12
+SP_FACEBASE EQU 16
+SP_TEXTURES EQU 20
+SP_PALETTE  EQU 24
+SP_SIZE     EQU 28
+
+        stmfd sp!, {r4-r11, lr}
+        sub sp, sp, #SP_SIZE
+
+        mov pixc, shadeArg
+
+        add spLast, polysArg, countArg, lsl #3
+        ldr spVertices, =gVertices
+        ldr spOT, =gOT
+        ldr spFaceBase, =gFacesBase
+        ldr spTextures, =level
+        ldr spTextures, [spTextures, #LVL_TEX_OFFSET]
+        ldr spPalette, =gPalette
+        ldr spPalette, [spPalette]
+
+        stmia sp, {polysArg, spLast, spVertices, spOT, spFaceBase, spTextures, spPalette}
+
+loop    ldmia sp, {fPolys, fLast, fVertices}
+skip    cmp fPolys, fLast
+        bge done
+
+        ldmia fPolys!, {flags, indices}
+
+        ; get vertex pointers
+        and vp0, indices, #0xFF
+        mov vp1, indices, lsr #8
+        and vp1, vp1, #0xFF
+        mov vp2, indices, lsr #16
+        and vp2, vp2, #0xFF
+        mov vp3, indices, lsr #24
+
+        add vp0, vp0, vp0, lsl #1
+        add vp1, vp1, vp1, lsl #1
+        add vp2, vp2, vp2, lsl #1
+        add vp3, vp3, vp3, lsl #1
+
+        add vp0, fVertices, vp0, lsl #2
+        add vp1, fVertices, vp1, lsl #2
+        add vp2, fVertices, vp2, lsl #2
+        add vp3, fVertices, vp3, lsl #2
+
+        ; read z value with clip mask
+        ldr vz0, [vp0, #8]
+        ldr vz1, [vp1, #8]
+        ldr vz2, [vp2, #8]
+        ldr vz3, [vp3, #8]
+
+        ; check clipping
+        and mask, vz1, vz0
+        and mask, vz2, mask
+        and mask, vz3, mask
+        tst mask, #CLIP_MASK
+        bne skip
+
+        ; depth = (vz0 + vz1 + vz2 + vz3) (DEPTH_Q_AVG)
+        add depth, vz0, vz1
+        add depth, depth, vz2
+        add depth, depth, vz3
+
+        ; (vx1 - vx0) * (vy3 - vy0) <= (vy1 - vy0) * (vx3 - vx0)
+        ldmia vp0, {vx0, vy0}
+        ldmia vp1, {vx1, vy1}
+        ldmia vp3, {vx3, vy3}
+        sub hdx0, vx1, vx0
+        sub hdy0, vy1, vy0
+        sub vdx0, vx3, vx0
+        sub vdy0, vy3, vy0
+        mul cross, hdy0, vdx0
+        rsb cross, cross, #0
+        mla cross, hdx0, vdy0, cross
+        teq cross, flags
+        bmi skip
+
+        ; poly is visible, store fPolys on the stack to reuse the reg
+        str fPolys, [sp, #SP_POLYS]
+
+        ; depth = max(0, (depth / 4) >> (CLIP_SHIFT + OT_SHIFT))
+        movs depth, depth, lsr #(2 + CLIP_SHIFT + OT_SHIFT)
+        movmi depth, #0
+
+        add tmp, sp, #SP_OT
+        ldmia tmp, {ot, faceBase, tex}
+
+        ; faceAdd
+        cmp depth, #(OT_SIZE - 1)
+        movgt depth, #(OT_SIZE - 1)
+        add ot, ot, depth, lsl #3   ; mul by size of OT element
+
+        ldr face, [faceBase]
+        add nextFace, face, #SIZE_OF_CCB
+        str nextFace, [faceBase]
+
+        ; get texture ptr
+        mov texIndex, flags, lsl #(32 - FACE_MIP_SHIFT)
+        add tex, tex, texIndex, lsr #(32 - FACE_MIP_SHIFT - 3)  ; sizeof(Texture) = 2^3
+
+        ; add face to Ordering Table
+        ldmia ot, {nextPtr, otTail}
+        cmp nextPtr, #0
+        moveq otTail, face
+        stmia ot, {face, otTail}
+
+        ; ccb flags
+        ands flags, flags, #(1 << 30)
+        movne flags, #(CCB_BGND)
+        orr flags, flags, #(CCB_NOBLK)
+        orr flags, flags, #(CCB_ACE + CCB_ACCW + CCB_ACW + CCB_ALSC + CCB_ACSC + CCB_YOXY)
+        orr flags, flags, #(CCB_LDPLUT + CCB_LDPPMP + CCB_LDPRS + CCB_LDSIZE + CCB_PPABS + CCB_SPABS + CCB_NPABS)
+
+        ; ccbMap4
+        stmia face!, {flags, nextPtr}
+        ldmia tex, {dataPtr, shift}
+
+        ; plutPtr = plutOffset + (tex->shift >> 16) * sizeof(PLUT)
+        ldr plutOffset, [sp, #SP_PALETTE]
+        mov plutPtr, shift, lsr #16
+        add plutPtr, plutOffset, plutPtr, lsl #5
+
+        ldmia vp2, {vx2, vy2}
+        sub vx2, vx2, vx0
+        sub vy2, vy2, vy0
+        sub hdx1, vx2, vx3
+        sub hdy1, vy2, vy3
+
+        and ws, shift, #0xFF
+        mov hs, shift, lsr #8
+        and hs, hs, #0xFF
+
+        mov hdx0, hdx0, lsl ws
+        mov hdy0, hdy0, lsl ws
+
+        mov vdx0, vdx0, lsl hs
+        mov vdy0, vdy0, lsl hs
+
+        rsb hs, hs, #16
+        rsb hddx, hdx0, hdx1, lsl ws
+        rsb hddy, hdy0, hdy1, lsl ws
+        mov hddx, hddx, asr hs
+        mov hddy, hddy, asr hs
+
+        mov xpos, vx0, lsl #16
+        mov ypos, vy0, lsl #16
+        add xpos, xpos, #(FRAME_WIDTH << 15)
+        add ypos, ypos, #(FRAME_HEIGHT << 15)
+
+        stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
+
+        b loop
+
+done    add sp, sp, #SP_SIZE
+        ldmfd sp!, {r4-r11, pc}
+    END
diff --git a/src/platform/3do/faceAddMeshQuadsFlat.s b/src/platform/3do/faceAddMeshQuadsFlat.s
new file mode 100644
index 00000000..6e9007ec
--- /dev/null
+++ b/src/platform/3do/faceAddMeshQuadsFlat.s
@@ -0,0 +1,229 @@
+    AREA |C$$code|, CODE, READONLY
+|x$codeseg|
+
+    INCLUDE common_asm.inc
+
+    EXPORT faceAddMeshQuadsFlat_asm
+
+faceAddMeshQuadsFlat_asm
+
+polysArg    RN r0
+countArg    RN r1
+shadeArg    RN r2
+
+flags       RN polysArg
+
+vx0         RN shadeArg
+vy0         RN r3
+
+vx1         RN r4
+vy1         RN r5
+
+vx3         RN r6
+vy3         RN r7
+
+vx2         RN r8
+vy2         RN r9
+
+pixc        RN r10
+color       RN r11
+
+face        RN r12
+depth       RN lr
+
+mask        RN depth
+
+fPolys      RN countArg
+fLast       RN color
+fVertices   RN face
+
+spPolys     RN vx0
+spLast      RN vx1
+spVertices  RN vy3
+spFlags     RN vx2
+spOT        RN vy2
+spFaceBase  RN color
+spPalette   RN face
+
+faceBase    RN vy2
+cross       RN vy2
+
+indices     RN vy0
+
+vz0         RN vy0
+vz1         RN vy1
+vz2         RN vy2
+vz3         RN vy3
+
+vp0         RN vx0
+vp1         RN vx1
+vp2         RN vx2
+vp3         RN vx3
+
+xpos        RN vx0
+ypos        RN vy0
+hdx0        RN vx1
+hdy0        RN vy1
+hdx1        RN vx2
+hdy1        RN vy2
+vdx0        RN vx3
+vdy0        RN vy3
+hddx        RN hdx1
+hddy        RN hdy1
+
+nextPtr     RN vy2
+dataPtr     RN color
+plutPtr     RN countArg
+
+tmp         RN countArg
+ot          RN countArg
+otTail      RN depth
+nextFace    RN depth
+
+plutOffset  RN color
+colorIndex  RN face
+
+SP_POLYS    EQU 0
+SP_LAST     EQU 4
+SP_VERTICES EQU 8
+SP_FLAGS    EQU 12
+SP_OT       EQU 16
+SP_FACEBASE EQU 20
+SP_PALETTE  EQU 24
+SP_SIZE     EQU 28
+
+        stmfd sp!, {r4-r11, lr}
+        sub sp, sp, #SP_SIZE
+
+        mov pixc, shadeArg
+
+        add spLast, polysArg, countArg, lsl #3
+        ldr spVertices, =gVertices
+        mov spFlags, #(CCB_NOBLK + CCB_BGND)
+        orr spFlags, spFlags, #(CCB_ACE + CCB_ACCW + CCB_ACW + CCB_ALSC + CCB_ACSC + CCB_YOXY)
+        orr spFlags, spFlags, #(CCB_CCBPRE + CCB_LDPPMP + CCB_LDPRS + CCB_LDSIZE + CCB_PPABS + CCB_SPABS + CCB_NPABS)
+        ldr spOT, =gOT
+        ldr spFaceBase, =gFacesBase
+        ldr spPalette, =gPalette
+        ldr spPalette, [spPalette]
+
+        stmia sp, {polysArg, spLast, spVertices, spFlags, spOT, spFaceBase, spPalette}
+
+loop    ldmia sp, {fPolys, fLast, fVertices}
+skip    cmp fPolys, fLast
+        bge done
+
+        ldmia fPolys!, {flags, indices}
+
+        ; get vertex pointers
+        and vp0, indices, #0xFF
+        mov vp1, indices, lsr #8
+        and vp1, vp1, #0xFF
+        mov vp2, indices, lsr #16
+        and vp2, vp2, #0xFF
+        mov vp3, indices, lsr #24
+
+        add vp0, vp0, vp0, lsl #1
+        add vp1, vp1, vp1, lsl #1
+        add vp2, vp2, vp2, lsl #1
+        add vp3, vp3, vp3, lsl #1
+
+        add vp0, fVertices, vp0, lsl #2
+        add vp1, fVertices, vp1, lsl #2
+        add vp2, fVertices, vp2, lsl #2
+        add vp3, fVertices, vp3, lsl #2
+
+        ; read z value with clip mask
+        ldr vz0, [vp0, #8]
+        ldr vz1, [vp1, #8]
+        ldr vz2, [vp2, #8]
+        ldr vz3, [vp3, #8]
+
+        ; check clipping
+        and mask, vz1, vz0
+        and mask, vz2, mask
+        and mask, vz3, mask
+        tst mask, #CLIP_MASK
+        bne skip
+
+        ; depth = (vz0 + vz1 + vz2 + vz3) (DEPTH_Q_AVG)
+        add depth, vz0, vz1
+        add depth, depth, vz2
+        add depth, depth, vz3
+
+        ; (vx1 - vx0) * (vy3 - vy0) <= (vy1 - vy0) * (vx3 - vx0)
+        ldmia vp0, {vx0, vy0}
+        ldmia vp1, {vx1, vy1}
+        ldmia vp3, {vx3, vy3}
+        sub hdx0, vx1, vx0
+        sub hdy0, vy1, vy0
+        sub vdx0, vx3, vx0
+        sub vdy0, vy3, vy0
+        mul cross, hdy0, vdx0
+        rsb cross, cross, #0
+        mlas cross, hdx0, vdy0, cross
+        ble skip
+
+        ; poly is visible, store fPolys on the stack to reuse the reg
+        str fPolys, [sp, #SP_POLYS]
+
+        ; depth = max(0, (depth / 4) >> (CLIP_SHIFT + OT_SHIFT))
+        movs depth, depth, lsr #(2 + CLIP_SHIFT + OT_SHIFT)
+        movmi depth, #0
+
+        ; get color index from flags
+        and colorIndex, flags, #0xFF
+
+        add tmp, sp, #SP_FLAGS
+        ldmia tmp, {flags, ot, faceBase, plutOffset}
+
+        ; get color ptr
+        add dataPtr, plutOffset, colorIndex, lsl #1
+
+        ; faceAdd
+        cmp depth, #(OT_SIZE - 1)
+        movgt depth, #(OT_SIZE - 1)
+        add ot, ot, depth, lsl #3   ; mul by size of OT element
+
+        ldr face, [faceBase]
+        add nextFace, face, #SIZE_OF_CCB
+        str nextFace, [faceBase]
+
+        ; add face to Ordering Table
+        ldmia ot, {nextPtr, otTail}
+        cmp nextPtr, #0
+        moveq otTail, face
+        stmia ot, {face, otTail}
+
+        ; ccbMap4 (colored)
+        stmia face, {flags, nextPtr, dataPtr}
+
+        ldmia vp2, {vx2, vy2}
+        sub vx2, vx2, vx0
+        sub vy2, vy2, vy0
+        sub hdx1, vx2, vx3
+        sub hdy1, vy2, vy3
+
+        mov hdx0, hdx0, lsl #20
+        mov hdy0, hdy0, lsl #20
+
+        mov vdx0, vdx0, lsl #16
+        mov vdy0, vdy0, lsl #16
+
+        rsb hddx, hdx0, hdx1, lsl #20
+        rsb hddy, hdy0, hdy1, lsl #20
+
+        mov xpos, vx0, lsl #16
+        mov ypos, vy0, lsl #16
+        add xpos, xpos, #(FRAME_WIDTH << 15)
+        add ypos, ypos, #(FRAME_HEIGHT << 15)
+
+        add face, face, #16    ; skip flags, nextPtr, dataPtr, plutPtr
+
+        stmia face, {xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
+
+        b loop
+
+done    add sp, sp, #SP_SIZE
+        ldmfd sp!, {r4-r11, pc}
+    END
diff --git a/src/platform/3do/faceAddMeshTriangles.s b/src/platform/3do/faceAddMeshTriangles.s
new file mode 100644
index 00000000..9d4b6f5a
--- /dev/null
+++ b/src/platform/3do/faceAddMeshTriangles.s
@@ -0,0 +1,230 @@
+    AREA |C$$code|, CODE, READONLY
+|x$codeseg|
+
+    INCLUDE common_asm.inc
+
+    EXPORT faceAddMeshTriangles_asm
+
+faceAddMeshTriangles_asm
+
+polysArg    RN r0
+countArg    RN r1
+shadeArg    RN r2
+
+flags       RN polysArg
+
+vx0         RN shadeArg
+vy0         RN r3
+
+vx1         RN r4
+vy1         RN r5
+
+vx2         RN r6
+vy2         RN r7
+
+vx3         RN r8
+vy3         RN r9
+
+pixc        RN r10
+tex         RN r11
+
+face        RN r12
+depth       RN lr
+
+mask        RN depth
+
+fPolys      RN countArg
+fLast       RN tex
+fVertices   RN face
+
+spPolys     RN vx0
+spLast      RN vx1
+spVertices  RN vy2
+spOT        RN vx3
+spPalette   RN vy3
+spFaceBase  RN tex
+spTextures  RN face
+
+faceBase    RN vy3
+cross       RN vy3
+
+indices     RN vy0
+
+vz0         RN vy0
+vz1         RN vy1
+vz2         RN vy2
+
+vp0         RN vx0
+vp1         RN vx1
+vp2         RN vx2
+
+xpos        RN vx0
+ypos        RN vy0
+hdx0        RN vx1
+hdy0        RN vy1
+vdx0        RN vx2
+vdy0        RN vy2
+hddx        RN vx3
+hddy        RN vy3
+
+nextPtr     RN vy3
+dataPtr     RN polysArg
+plutPtr     RN countArg
+
+tmp         RN countArg
+ot          RN countArg
+otTail      RN depth
+nextFace    RN depth
+
+plutOffset  RN vx3
+texIndex    RN vy3
+
+ws          RN tex
+hs          RN depth
+shift       RN depth
+
+SP_POLYS    EQU 0
+SP_LAST     EQU 4
+SP_VERTICES EQU 8
+SP_OT       EQU 12
+SP_PALETTE  EQU 16
+SP_FACEBASE EQU 20
+SP_TEXTURES EQU 24
+SP_SIZE     EQU 28
+
+        stmfd sp!, {r4-r11, lr}
+        sub sp, sp, #SP_SIZE
+
+        mov pixc, shadeArg
+
+        add spLast, polysArg, countArg, lsl #3
+        ldr spVertices, =gVertices
+        ldr spOT, =gOT
+        ldr spPalette, =gPalette
+        ldr spPalette, [spPalette]
+        ldr spFaceBase, =gFacesBase
+        ldr spTextures, =level
+        ldr spTextures, [spTextures, #LVL_TEX_OFFSET]
+
+        stmia sp, {polysArg, spLast, spVertices, spOT, spPalette, spFaceBase, spTextures}
+
+loop    ldmia sp, {fPolys, fLast, fVertices}
+skip    cmp fPolys, fLast
+        bge done
+
+        ldmia fPolys!, {flags, indices}
+
+        ; get vertex pointers
+        and vp0, indices, #0xFF
+        mov vp1, indices, lsr #8
+        and vp1, vp1, #0xFF
+        mov vp2, indices, lsr #16
+
+        add vp0, vp0, vp0, lsl #1
+        add vp1, vp1, vp1, lsl #1
+        add vp2, vp2, vp2, lsl #1
+
+        add vp0, fVertices, vp0, lsl #2
+        add vp1, fVertices, vp1, lsl #2
+        add vp2, fVertices, vp2, lsl #2
+
+        ; read z value with clip mask
+        ldr vz0, [vp0, #8]
+        ldr vz1, [vp1, #8]
+        ldr vz2, [vp2, #8]
+
+        ; check clipping
+        and mask, vz1, vz0
+        and mask, vz2, mask
+        tst mask, #CLIP_MASK
+        bne skip
+
+        ; depth = (vz0 + vz1 + vz2 + vz2) (DEPTH_T_AVG)
+        add depth, vz0, vz1
+        add depth, depth, vz2, lsl #1
+
+        ; (vx1 - vx0) * (vy2 - vy0) - (vy1 - vy0) * (vx2 - vx0) <= 0
+        ldmia vp0, {vx0, vy0}
+        ldmia vp1, {vx1, vy1}
+        ldmia vp2, {vx2, vy2}
+        sub hdx0, vx1, vx0
+        sub hdy0, vy1, vy0
+        sub vdx0, vx2, vx0
+        sub vdy0, vy2, vy0
+        mul cross, hdy0, vdx0
+        rsb cross, cross, #0
+        mlas cross, hdx0, vdy0, cross
+        ble skip
+
+        ; poly is visible, store fPolys on the stack to reuse the reg
+        str fPolys, [sp, #SP_POLYS]
+
+        ; depth = max(0, (depth / 4) >> (CLIP_SHIFT + OT_SHIFT))
+        movs depth, depth, lsr #(2 + CLIP_SHIFT + OT_SHIFT)
+        movmi depth, #0
+
+        add tmp, sp, #SP_OT
+        ldmia tmp, {ot, plutOffset, faceBase, tex}
+
+        ; faceAdd
+        cmp depth, #(OT_SIZE - 1)
+        movgt depth, #(OT_SIZE - 1)
+        add ot, ot, depth, lsl #3   ; mul by size of OT element
+
+        ldr face, [faceBase]
+        add nextFace, face, #SIZE_OF_CCB
+        str nextFace, [faceBase]
+
+        ; get texture ptr
+        mov texIndex, flags, lsl #(32 - FACE_MIP_SHIFT)
+        add tex, tex, texIndex, lsr #(32 - FACE_MIP_SHIFT - 3)  ; sizeof(Texture) = 2^3
+
+        ; add face to Ordering Table
+        ldmia ot, {nextPtr, otTail}
+        cmp nextPtr, #0
+        moveq otTail, face
+        stmia ot, {face, otTail}
+
+        ; ccb flags
+        ands flags, flags, #(1 << 30)
+        movne flags, #(CCB_BGND)
+        orr flags, flags, #(CCB_NOBLK)
+        orr flags, flags, #(CCB_ACE + CCB_ACCW + CCB_ACW + CCB_ALSC + CCB_ACSC + CCB_YOXY)
+        orr flags, flags, #(CCB_LDPLUT + CCB_LDPPMP + CCB_LDPRS + CCB_LDSIZE + CCB_PPABS + CCB_SPABS + CCB_NPABS)
+
+        ; ccbMap3
+        stmia face!, {flags, nextPtr}
+        ldmia tex, {dataPtr, shift}
+
+        ; plutPtr = plutOffset + (tex->shift >> 16) * sizeof(PLUT)
+        mov plutPtr, shift, lsr #16
+        add plutPtr, plutOffset, plutPtr, lsl #5
+
+        and ws, shift, #0xFF
+        mov hs, shift, lsr #8
+        and hs, hs, #0xFF
+
+        mov hdx0, hdx0, lsl ws
+        mov hdy0, hdy0, lsl ws
+
+        mov vdx0, vdx0, lsl hs
+        mov vdy0, vdy0, lsl hs
+
+        rsb hs, hs, #16
+        rsb hddx, hdx0, #0
+        rsb hddy, hdy0, #0
+        mov hddx, hddx, asr hs
+        mov hddy, hddy, asr hs
+
+        mov xpos, vx0, lsl #16
+        mov ypos, vy0, lsl #16
+        add xpos, xpos, #(FRAME_WIDTH << 15)
+        add ypos, ypos, #(FRAME_HEIGHT << 15)
+
+        stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
+
+        b loop
+
+done    add sp, sp, #SP_SIZE
+        ldmfd sp!, {r4-r11, pc}
+    END
diff --git a/src/platform/3do/faceAddMeshTrianglesFlat.s b/src/platform/3do/faceAddMeshTrianglesFlat.s
new file mode 100644
index 00000000..584f6260
--- /dev/null
+++ b/src/platform/3do/faceAddMeshTrianglesFlat.s
@@ -0,0 +1,212 @@
+    AREA |C$$code|, CODE, READONLY
+|x$codeseg|
+
+    INCLUDE common_asm.inc
+
+    EXPORT faceAddMeshTrianglesFlat_asm
+
+faceAddMeshTrianglesFlat_asm
+
+polysArg    RN r0
+countArg    RN r1
+shadeArg    RN r2
+
+flags       RN polysArg
+
+vx0         RN shadeArg
+vy0         RN r3
+
+vx1         RN r4
+vy1         RN r5
+
+vx2         RN r6
+vy2         RN r7
+
+vx3         RN r8
+vy3         RN r9
+
+pixc        RN r10
+color       RN r11
+
+face        RN r12
+depth       RN lr
+
+mask        RN depth
+
+fPolys      RN countArg
+fLast       RN color
+fVertices   RN face
+
+spPolys     RN vx0
+spLast      RN vx1
+spVertices  RN vy2
+spFlags     RN vx3
+spOT        RN vy3
+spFaceBase  RN color
+spPalette   RN face
+
+faceBase    RN vy3
+cross       RN vy3
+
+indices     RN vy0
+
+vz0         RN vy0
+vz1         RN vy1
+vz2         RN vy3
+
+vp0         RN vx0
+vp1         RN vx1
+vp2         RN vx3
+
+xpos        RN vx0
+ypos        RN vy0
+hdx0        RN vx1
+hdy0        RN vy1
+vdx0        RN vx2
+vdy0        RN vy2
+hddx        RN vx3
+hddy        RN vy3
+
+nextPtr     RN vy3
+dataPtr     RN color
+plutPtr     RN countArg
+
+tmp         RN countArg
+ot          RN countArg
+otTail      RN depth
+nextFace    RN depth
+
+plutOffset  RN color
+colorIndex  RN face
+
+SP_POLYS    EQU 0
+SP_LAST     EQU 4
+SP_VERTICES EQU 8
+SP_FLAGS    EQU 12
+SP_OT       EQU 16
+SP_FACEBASE EQU 20
+SP_PALETTE  EQU 24
+SP_SIZE     EQU 28
+
+        stmfd sp!, {r4-r11, lr}
+        sub sp, sp, #SP_SIZE
+
+        mov pixc, shadeArg
+
+        add spLast, polysArg, countArg, lsl #3
+        ldr spVertices, =gVertices
+        mov spFlags, #(CCB_NOBLK + CCB_BGND)
+        orr spFlags, spFlags, #(CCB_ACE + CCB_ACCW + CCB_ACW + CCB_ALSC + CCB_ACSC + CCB_YOXY)
+        orr spFlags, spFlags, #(CCB_CCBPRE + CCB_LDPPMP + CCB_LDPRS + CCB_LDSIZE + CCB_PPABS + CCB_SPABS + CCB_NPABS)
+        ldr spOT, =gOT
+        ldr spFaceBase, =gFacesBase
+        ldr spPalette, =gPalette
+        ldr spPalette, [spPalette]
+
+        stmia sp, {polysArg, spLast, spVertices, spFlags, spOT, spFaceBase, spPalette}
+
+loop    ldmia sp, {fPolys, fLast, fVertices}
+skip    cmp fPolys, fLast
+        bge done
+
+        ldmia fPolys!, {flags, indices}
+
+        ; get vertex pointers
+        and vp0, indices, #0xFF
+        mov vp1, indices, lsr #8
+        and vp1, vp1, #0xFF
+        mov vp2, indices, lsr #16
+
+        add vp0, vp0, vp0, lsl #1
+        add vp1, vp1, vp1, lsl #1
+        add vp2, vp2, vp2, lsl #1
+
+        add vp0, fVertices, vp0, lsl #2
+        add vp1, fVertices, vp1, lsl #2
+        add vp2, fVertices, vp2, lsl #2
+
+        ; read z value with clip mask
+        ldr vz0, [vp0, #8]
+        ldr vz1, [vp1, #8]
+        ldr vz2, [vp2, #8]
+
+        ; check clipping
+        and mask, vz1, vz0
+        and mask, vz2, mask
+        tst mask, #CLIP_MASK
+        bne skip
+
+        ; depth = (vz0 + vz1 + vz2 + vz3) (DEPTH_T_AVG)
+        add depth, vz0, vz1
+        add depth, depth, vz2, lsl #1
+
+        ; (vx1 - vx0) * (vy2 - vy0) - (vy1 - vy0) * (vx2 - vx0) <= 0
+        ldmia vp0, {vx0, vy0}
+        ldmia vp1, {vx1, vy1}
+        ldmia vp2, {vx2, vy2}
+        sub hdx0, vx1, vx0
+        sub hdy0, vy1, vy0
+        sub vdx0, vx2, vx0
+        sub vdy0, vy2, vy0
+        mul cross, hdy0, vdx0
+        rsb cross, cross, #0
+        mlas cross, hdx0, vdy0, cross
+        ble skip
+
+        ; poly is visible, store fPolys on the stack to reuse the reg
+        str fPolys, [sp, #SP_POLYS]
+
+        ; depth = max(0, (depth / 4) >> (CLIP_SHIFT + OT_SHIFT))
+        movs depth, depth, lsr #(2 + CLIP_SHIFT + OT_SHIFT)
+        movmi depth, #0
+
+        ; get color index from flags
+        and colorIndex, flags, #0xFF
+
+        add tmp, sp, #SP_FLAGS
+        ldmia tmp, {flags, ot, faceBase, plutOffset}
+
+        ; get color ptr
+        add dataPtr, plutOffset, colorIndex, lsl #1
+
+        ; faceAdd
+        cmp depth, #(OT_SIZE - 1)
+        movgt depth, #(OT_SIZE - 1)
+        add ot, ot, depth, lsl #3   ; mul by size of OT element
+
+        ldr face, [faceBase]
+        add nextFace, face, #SIZE_OF_CCB
+        str nextFace, [faceBase]
+
+        ; add face to Ordering Table
+        ldmia ot, {nextPtr, otTail}
+        cmp nextPtr, #0
+        moveq otTail, face
+        stmia ot, {face, otTail}
+
+        ; ccbMap3 (colored)
+        stmia face, {flags, nextPtr, dataPtr}
+
+        mov hdx0, hdx0, lsl #20
+        mov hdy0, hdy0, lsl #20
+
+        mov vdx0, vdx0, lsl #16
+        mov vdy0, vdy0, lsl #16
+
+        rsb hddx, hdx0, #0
+        rsb hddy, hdy0, #0
+
+        mov xpos, vx0, lsl #16
+        mov ypos, vy0, lsl #16
+        add xpos, xpos, #(FRAME_WIDTH << 15)
+        add ypos, ypos, #(FRAME_HEIGHT << 15)
+
+        add face, face, #16    ; skip flags, nextPtr, dataPtr, plutPtr
+
+        stmia face, {xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
+
+        b loop
+
+done    add sp, sp, #SP_SIZE
+        ldmfd sp!, {r4-r11, pc}
+    END
diff --git a/src/platform/3do/faceAddRoomQuads.s b/src/platform/3do/faceAddRoomQuads.s
index 54d2f0aa..e1accd64 100644
--- a/src/platform/3do/faceAddRoomQuads.s
+++ b/src/platform/3do/faceAddRoomQuads.s
@@ -7,10 +7,10 @@
 
 faceAddRoomQuads_asm
 
-quadsArg    RN r0
+polysArg    RN r0
 countArg    RN r1
 
-flags       RN quadsArg
+flags       RN polysArg
 
 vx0         RN r2
 vy0         RN r3
@@ -30,11 +30,11 @@ tex         RN r11
 mask        RN r12
 depth       RN lr
 
-fQuads      RN countArg
+fPolys      RN countArg
 fLast       RN pixc
 fVertices   RN tex
 
-spQuads     RN vx0
+spPolys     RN vx0
 spLast      RN vx1
 spVertices  RN vy3
 spOT        RN vx2
@@ -45,8 +45,7 @@ spPalette   RN mask
 
 face        RN mask
 faceBase    RN mask
-hv0         RN mask
-hv1         RN vy2
+cross       RN mask
 
 i0          RN vy0
 i1          RN vy1
@@ -73,7 +72,7 @@ hddx        RN hdx1
 hddy        RN hdy1
 
 nextPtr     RN vy2
-dataPtr     RN quadsArg
+dataPtr     RN polysArg
 plutPtr     RN countArg
 
 tmp         RN countArg
@@ -91,7 +90,7 @@ ws          RN tex
 hs          RN depth
 shift       RN depth
 
-SP_QUADS    EQU 0
+SP_POLYS    EQU 0
 SP_LAST     EQU 4
 SP_VERTICES EQU 8
 SP_OT       EQU 12
@@ -105,7 +104,7 @@ SP_SIZE     EQU 32
         sub sp, sp, #SP_SIZE
 
         add countArg, countArg, countArg, lsl #1
-        add spLast, quadsArg, countArg, lsl #2
+        add spLast, polysArg, countArg, lsl #2
         ldr spVertices, =gVertices
         ldr spOT, =gOT
         ldr spShadeLUT, =shadeTable
@@ -115,13 +114,13 @@ SP_SIZE     EQU 32
         ldr spPalette, =gPalette
         ldr spPalette, [spPalette]
 
-        stmia sp, {quadsArg, spLast, spVertices, spOT, spShadeLUT, spTextures, spFaceBase, spPalette}
+        stmia sp, {polysArg, spLast, spVertices, spOT, spShadeLUT, spTextures, spFaceBase, spPalette}
 
-loop    ldmia sp, {fQuads, fLast, fVertices}
-skip    cmp fQuads, fLast
+loop    ldmia sp, {fPolys, fLast, fVertices}
+skip    cmp fPolys, fLast
         bge done
 
-        ldmia fQuads!, {flags, i0, i1}
+        ldmia fPolys!, {flags, i0, i1}
 
         ; get vertex pointers
         add vp0, fVertices, i0, lsr #16
@@ -162,15 +161,15 @@ skip    cmp fQuads, fLast
         sub hdy0, vy1, vy0
         sub vdx0, vx3, vx0
         sub vdy0, vy3, vy0
-        mul hv0, hdx0, vdy0
-        mul hv1, hdy0, vdx0
-        cmp hv0, hv1
+        mul cross, hdy0, vdx0
+        rsb cross, cross, #0
+        mlas cross, hdx0, vdy0, cross
         ble skip
 
-        ; poly is visible, store fQuads on the stack to reuse the reg
-        str fQuads, [sp, #SP_QUADS]
+        ; poly is visible, store fPolys on the stack to reuse the reg
+        str fPolys, [sp, #SP_POLYS]
 
-        ; depth = max(0, depth) >> (CLIP_SHIFT + OT_SHIFT)
+        ; depth = max(0, depth >> (CLIP_SHIFT + OT_SHIFT))
         movs depth, depth, lsr #(CLIP_SHIFT + OT_SHIFT)
         movmi depth, #0
 
diff --git a/src/platform/3do/faceAddRoomTriangles.s b/src/platform/3do/faceAddRoomTriangles.s
new file mode 100644
index 00000000..85ba2b1a
--- /dev/null
+++ b/src/platform/3do/faceAddRoomTriangles.s
@@ -0,0 +1,249 @@
+    AREA |C$$code|, CODE, READONLY
+|x$codeseg|
+
+    INCLUDE common_asm.inc
+
+    EXPORT faceAddRoomTriangles_asm
+
+faceAddRoomTriangles_asm
+
+polysArg    RN r0
+countArg    RN r1
+
+flags       RN polysArg
+
+vx0         RN r2
+vy0         RN r3
+
+vx1         RN r4
+vy1         RN r5
+
+vx2         RN r6
+vy2         RN r7
+
+vx3         RN r8
+vy3         RN r9
+
+pixc        RN r10
+tex         RN r11
+
+mask        RN r12
+depth       RN lr
+
+fPolys      RN countArg
+fLast       RN pixc
+fVertices   RN tex
+
+spPolys     RN vx0
+spLast      RN vx1
+spVertices  RN vy2
+spOT        RN vx3
+spPalette   RN vy3
+spShadeLUT  RN pixc
+spTextures  RN tex
+spFaceBase  RN mask
+
+face        RN mask
+faceBase    RN mask
+cross       RN mask
+
+i0          RN vy0
+i1          RN vy1
+
+vz0         RN vy0
+vz1         RN vy1
+vz2         RN vy2
+
+vp0         RN vx0
+vp1         RN vx1
+vp2         RN vx2
+
+xpos        RN vx0
+ypos        RN vy0
+hdx0        RN vx1
+hdy0        RN vy1
+vdx0        RN vx2
+vdy0        RN vy2
+hddx        RN vx3
+hddy        RN vy3
+
+nextPtr     RN vy3
+dataPtr     RN polysArg
+plutPtr     RN countArg
+
+tmp         RN countArg
+ot          RN countArg
+otTail      RN depth
+
+shadeLUT    RN pixc
+fog         RN pixc
+
+intensity   RN vy3
+plutOffset  RN vx3
+texIndex    RN vy3
+
+ws          RN tex
+hs          RN depth
+shift       RN depth
+
+SP_POLYS    EQU 0
+SP_LAST     EQU 4
+SP_VERTICES EQU 8
+SP_OT       EQU 12
+SP_PALETTE  EQU 16
+SP_SHADELUT EQU 20
+SP_TEXTURES EQU 24
+SP_FACEBASE EQU 28
+SP_SIZE     EQU 32
+
+        stmfd sp!, {r4-r11, lr}
+        sub sp, sp, #SP_SIZE
+
+        add countArg, countArg, countArg, lsl #1
+        add spLast, polysArg, countArg, lsl #2
+        ldr spVertices, =gVertices
+        ldr spOT, =gOT
+        ldr spPalette, =gPalette
+        ldr spPalette, [spPalette]
+        ldr spShadeLUT, =shadeTable
+        ldr spTextures, =level
+        ldr spTextures, [spTextures, #LVL_TEX_OFFSET]
+        ldr spFaceBase, =gFacesBase
+
+        stmia sp, {polysArg, spLast, spVertices, spOT, spPalette, spShadeLUT, spTextures, spFaceBase}
+
+loop    ldmia sp, {fPolys, fLast, fVertices}
+skip    cmp fPolys, fLast
+        bge done
+
+        ldmia fPolys!, {flags, i0, i1}
+
+        ; get vertex pointers
+        add vp0, fVertices, i0, lsr #16
+        mov i0, i0, lsl #16
+        add vp1, fVertices, i0, lsr #16
+
+        add vp2, fVertices, i1, lsr #16
+
+        ; read z value with clip mask
+        ldr vz0, [vp0, #8]
+        ldr vz1, [vp1, #8]
+        ldr vz2, [vp2, #8]
+
+        ; check clipping
+        and mask, vz1, vz0
+        and mask, vz2, mask
+        tst mask, #CLIP_MASK
+        bne skip
+
+        ; depth = max(vz0, vz1, vz2) (DEPTH_T_MAX)
+        mov depth, vz0
+        cmp depth, vz1
+        movlt depth, vz1
+        cmp depth, vz2
+        movlt depth, vz2
+
+        ; (vx1 - vx0) * (vy2 - vy0) <= (vy1 - vy0) * (vx2 - vx0)
+        ldmia vp0, {vx0, vy0}
+        ldmia vp1, {vx1, vy1}
+        ldmia vp2, {vx2, vy2}
+        sub hdx0, vx1, vx0
+        sub hdy0, vy1, vy0
+        sub vdx0, vx2, vx0
+        sub vdy0, vy2, vy0
+        mul cross, hdy0, vdx0
+        rsb cross, cross, #0
+        mlas cross, hdx0, vdy0, cross
+        ble skip
+
+        ; poly is visible, store fPolys on the stack to reuse the reg
+        str fPolys, [sp, #SP_POLYS]
+
+        ; depth = max(0, depth >> (CLIP_SHIFT + OT_SHIFT))
+        movs depth, depth, lsr #(CLIP_SHIFT + OT_SHIFT)
+        movmi depth, #0
+
+        ; fog = max(0, (depth - (FOG_MIN >> OT_SHIFT)) >> 1)
+        sub fog, depth, #(FOG_MIN >> OT_SHIFT)
+        movs fog, fog, asr #1
+        movmi fog, #0
+
+        ; intensity = min(255, fog + ((flags >> (FACE_MIP_SHIFT + FACE_MIP_SHIFT)) & 0xFF)) >> 3
+        mov intensity, flags, lsl #(32 - 8 - FACE_MIP_SHIFT - FACE_MIP_SHIFT)
+        add intensity, fog, intensity, lsr #(32 - 8)
+        cmp intensity, #255
+        movcs intensity, #255
+        mov intensity, intensity, lsr #3
+
+        add tmp, sp, #SP_OT
+        ldmia tmp, {ot, plutOffset, shadeLUT, tex, faceBase}
+
+        ; pixc = shadeTable[intensity]
+        ldr pixc, [shadeLUT, intensity, lsl #2]
+
+        ; get texture ptr (base or mip)
+        mov texIndex, flags
+        cmp depth, #(MIP_DIST >> OT_SHIFT)
+        movgt texIndex, texIndex, lsr #FACE_MIP_SHIFT
+        mov texIndex, texIndex, lsl #(32 - FACE_MIP_SHIFT)
+        add tex, tex, texIndex, lsr #(32 - FACE_MIP_SHIFT - 3)  ; sizeof(Texture) = 2^3
+
+        ; faceAdd
+        cmp depth, #(OT_SIZE - 1)
+        movgt depth, #(OT_SIZE - 1)
+        add ot, ot, depth, lsl #3   ; mul by size of OT element
+
+        mov depth, faceBase     ; use depth reg due face vs faceBase reg collision
+
+        ldr face, [depth]
+        add nextPtr, face, #SIZE_OF_CCB
+        str nextPtr, [depth]
+
+        ldmia ot, {nextPtr, otTail}
+        cmp nextPtr, #0
+        moveq otTail, face
+        stmia ot, {face, otTail}
+
+        ; ccb flags
+        ands flags, flags, #(1 << 30)
+        movne flags, #(CCB_BGND)
+        orr flags, flags, #(CCB_NOBLK)
+        orr flags, flags, #(CCB_ACE + CCB_ACCW + CCB_ACW + CCB_ALSC + CCB_ACSC + CCB_YOXY)
+        orr flags, flags, #(CCB_LDPLUT + CCB_LDPPMP + CCB_LDPRS + CCB_LDSIZE + CCB_PPABS + CCB_SPABS + CCB_NPABS)
+
+        ; ccbMap4
+        stmia face!, {flags, nextPtr}
+        ldmia tex, {dataPtr, shift}
+
+        ; plutPtr = plutOffset + (tex->shift >> 16) * sizeof(PLUT)
+        mov plutPtr, shift, lsr #16
+        add plutPtr, plutOffset, plutPtr, lsl #5
+
+        and ws, shift, #0xFF
+        mov hs, shift, lsr #8
+        and hs, hs, #0xFF
+
+        mov hdx0, hdx0, lsl ws
+        mov hdy0, hdy0, lsl ws
+
+        mov vdx0, vdx0, lsl hs
+        mov vdy0, vdy0, lsl hs
+
+        rsb hs, hs, #16
+        rsb hddx, hdx0, #0
+        rsb hddy, hdy0, #0
+        mov hddx, hddx, asr hs
+        mov hddy, hddy, asr hs
+
+        mov xpos, vx0, lsl #16
+        mov ypos, vy0, lsl #16
+        add xpos, xpos, #(FRAME_WIDTH << 15)
+        add ypos, ypos, #(FRAME_HEIGHT << 15)
+
+        stmia face, {dataPtr, plutPtr, xpos, ypos, hdx0, hdy0, vdx0, vdy0, hddx, hddy, pixc}
+
+        b loop
+
+done    add sp, sp, #SP_SIZE
+        ldmfd sp!, {r4-r11, pc}
+    END
diff --git a/src/platform/3do/matrixPush.s b/src/platform/3do/matrixPush.s
new file mode 100644
index 00000000..4ef993a8
--- /dev/null
+++ b/src/platform/3do/matrixPush.s
@@ -0,0 +1,33 @@
+    AREA |C$$code|, CODE, READONLY
+|x$codeseg|
+
+    INCLUDE common_asm.inc
+
+    EXPORT matrixPush_asm
+
+e0  RN r0
+e1  RN r1
+e2  RN r2
+e3  RN r3
+m   RN e0
+src RN r12
+dst RN lr
+
+matrixPush_asm
+        stmfd sp!, {lr}
+        ldr m, =matrixPtr
+        ldr src, [m]
+        add dst, src, #(12 * 4)
+        str dst, [m]
+
+        ldmia src!, {e0, e1, e2, e3}
+        stmia dst!, {e0, e1, e2, e3}
+
+        ldmia src!, {e0, e1, e2, e3}
+        stmia dst!, {e0, e1, e2, e3}
+
+        ldmia src!, {e0, e1, e2, e3}
+        stmia dst!, {e0, e1, e2, e3}
+
+        ldmfd sp!, {pc}
+    END
diff --git a/src/platform/3do/render_cel.cpp b/src/platform/3do/render_cel.cpp
index 0c6b39cb..51af3e02 100644
--- a/src/platform/3do/render_cel.cpp
+++ b/src/platform/3do/render_cel.cpp
@@ -189,33 +189,97 @@ enum ClipFlags {
 #define DEPTH_T_MAX(z0,z1,z2)    (X_MAX(z0, X_MAX(z1, z2)) >> (CLIP_SHIFT + OT_SHIFT))
 #define DEPTH_Q_MAX(z0,z1,z2,z3) (X_MAX(z0, X_MAX(z1, X_MAX(z2, z3))) >> (CLIP_SHIFT + OT_SHIFT))
 
+X_INLINE Face* faceAdd(int32 depth)
+{
+    if (depth < 0) depth = 0;
+    if (depth > OT_SIZE - 1) depth = OT_SIZE - 1;
+
+    Face* face = gFacesBase++;
+
+    if (gOT[depth].head) {
+        face->ccb_NextPtr = gOT[depth].head;
+    } else {
+        gOT[depth].tail = face;
+    }
+
+    gOT[depth].head = face;
+
+    return face;
+}
+
+X_INLINE void ccbSetTexture(uint32 flags, Face* face, const Texture* texture)
+{
+    face->ccb_Flags =
+        CCB_NPABS  |
+        CCB_SPABS  |
+        CCB_PPABS  |
+        CCB_LDSIZE |
+        CCB_LDPRS  |
+        CCB_LDPPMP |
+        CCB_LDPLUT |
+        CCB_YOXY   |
+        CCB_ACSC   | CCB_ALSC |
+        CCB_ACW    | CCB_ACCW |
+        CCB_ACE    |
+        CCB_NOBLK  |
+        (flags >> (8 + FACE_MIP_SHIFT + FACE_MIP_SHIFT) << 5); // set CCB_BGND (0x20 == 1 << 5)
+
+    face->ccb_SourcePtr = (CelData*)texture->data;
+    face->ccb_PLUTPtr = gPalette + (texture->shift >> 16) * 16;
+}
+
+X_INLINE void ccbSetColor(uint32 flags, Face* face)
+{
+    face->ccb_Flags =
+        CCB_NPABS  |
+        CCB_SPABS  |
+        CCB_PPABS  |
+        CCB_LDSIZE |
+        CCB_LDPRS  |
+        CCB_LDPPMP |
+        CCB_CCBPRE | // use the preamble words set in renderInit
+        CCB_YOXY   |
+        CCB_ACSC   | CCB_ALSC |
+        CCB_ACW    | CCB_ACCW |
+        CCB_ACE    |
+        CCB_NOBLK  |
+        CCB_BGND;
+
+    face->ccb_SourcePtr = (CelData*)&gPalette[flags & 0xFF];
+}
+
 #ifdef USE_ASM
-    #define unpackRoom unpackRoom_asm
-    #define unpackMesh unpackMesh_asm
-    #define projectVertices projectVertices_asm
-    #define ccbMap4 ccbMap4_asm
-    #define ccbMap3 ccbMap3_asm
-    #define faceAddRoomQuads faceAddRoomQuads_asm
+    #define unpackRoom                  unpackRoom_asm                  // -53%
+    #define unpackMesh                  unpackMesh_asm                  // -48%
+    #define projectVertices             projectVertices_asm             // -32%
+    #define faceAddRoomQuads            faceAddRoomQuads_asm            // -46%
+    #define faceAddRoomTriangles        faceAddRoomTriangles_asm        // -30%
+    #define faceAddMeshQuads            faceAddMeshQuads_asm            // -36%
+    #define faceAddMeshTriangles        faceAddMeshTriangles_asm        // -38%
+    #define faceAddMeshQuadsFlat        faceAddMeshQuadsFlat_asm        // -28%
+    #define faceAddMeshTrianglesFlat    faceAddMeshTrianglesFlat_asm    // -35%
 
     extern "C" {
         void unpackRoom_asm(const RoomVertex* vertices, int32 vCount);
         void unpackMesh_asm(const MeshVertex* vertices, int32 vCount);
         void projectVertices_asm(int32 vCount);
-        void ccbMap4_asm(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, const Vertex* v3, uint32 shift);
-        void ccbMap3_asm(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, uint32 shift);
-        void faceAddRoomQuads_asm(const RoomQuad* quads, int32 count);
+        void faceAddRoomQuads_asm(const RoomQuad* polys, int32 count);
+        void faceAddRoomTriangles_asm(const RoomTriangle* polys, int32 count);
+        void faceAddMeshQuads_asm(const MeshQuad* polys, int32 count, uint32 shade);
+        void faceAddMeshTriangles_asm(const MeshTriangle* polys, int32 count, uint32 shade);
+        void faceAddMeshQuadsFlat_asm(const MeshQuad* polys, int32 count, uint32 shade);
+        void faceAddMeshTrianglesFlat_asm(const MeshTriangle* polys, int32 count, uint32 shade);
     }
 #else
-    #define unpackRoom unpackRoom_c
-    #define unpackMesh unpackMesh_c
-    #define projectVertices projectVertices_c
-    #define ccbMap4 ccbMap4_c
-    #define ccbMap3 ccbMap3_c
-    #define faceAddRoomQuads faceAddRoomQuads_c
-
-    Face* faceAdd(int32 depth);
-    void ccbSetTexture(uint32 flags, Face* face, const Texture* texture);
-    void ccbSetColor(uint32 flags, Face* face);
+    #define unpackRoom                  unpackRoom_c
+    #define unpackMesh                  unpackMesh_c
+    #define projectVertices             projectVertices_c
+    #define faceAddRoomQuads            faceAddRoomQuads_c
+    #define faceAddRoomTriangles        faceAddRoomTriangles_c
+    #define faceAddMeshQuads            faceAddMeshQuads_c
+    #define faceAddMeshTriangles        faceAddMeshTriangles_c
+    #define faceAddMeshQuadsFlat        faceAddMeshQuadsFlat_c
+    #define faceAddMeshTrianglesFlat    faceAddMeshTrianglesFlat_c
 
 void unpackRoom_c(const RoomVertex* vertices, int32 vCount)
 {
@@ -320,7 +384,7 @@ void projectVertices_c(int32 vCount)
     } while (v < last);
 }
 
-X_INLINE void ccbMap4_c(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, const Vertex* v3, uint32 shift)
+X_INLINE void ccbMap4(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, const Vertex* v3, uint32 shift)
 {
     int32 x1 = v1->x;
     int32 y1 = v1->y;
@@ -360,7 +424,7 @@ X_INLINE void ccbMap4_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
 #endif
 }
 
-X_INLINE void ccbMap3_c(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, uint32 shift)
+X_INLINE void ccbMap3(Face* f, const Vertex* v0, const Vertex* v1, const Vertex* v2, uint32 shift)
 {
     int32 x0 = v0->x;
     int32 y0 = v0->y;
@@ -393,12 +457,12 @@ X_INLINE void ccbMap3_c(Face* f, const Vertex* v0, const Vertex* v1, const Verte
 #endif
 }
 
-void faceAddRoomQuads_c(const RoomQuad* quads, int32 count)
+void faceAddRoomQuads_c(const RoomQuad* polys, int32 count)
 {
-    for (int32 i = 0; i < count; i++, quads++)
+    for (int32 i = 0; i < count; i++, polys++)
     {
-        uint32 flags = quads->flags;
-        uint32* indices = (uint32*)quads->indices;
+        uint32 flags = polys->flags;
+        uint32* indices = (uint32*)polys->indices;
 
         uint32 i01 = indices[0];
         uint32 i23 = indices[1];
@@ -443,313 +507,285 @@ void faceAddRoomQuads_c(const RoomQuad* quads, int32 count)
         ccbMap4(f, v0, v1, v2, v3, texture->shift);
     }
 }
-#endif
 
-bool transformBoxRect(const AABBs* box, RectMinMax* rect)
+void faceAddRoomTriangles_c(const RoomTriangle* polys, int32 count)
 {
-    Matrix &m = matrixGet();
-
-    if ((m.e23 < VIEW_MIN_F) || (m.e23 >= VIEW_MAX_F)) {
-        return false;
-    }
-
-    int32 minX = box->minX << F16_SHIFT;
-    int32 maxX = box->maxX << F16_SHIFT;
-    int32 minY = box->minY << F16_SHIFT;
-    int32 maxY = box->maxY << F16_SHIFT;
-    int32 minZ = box->minZ << F16_SHIFT;
-    int32 maxZ = box->maxZ << F16_SHIFT;
-
-    gVertices[0].x = minX; gVertices[0].y = minY; gVertices[0].z = minZ;
-    gVertices[1].x = maxX; gVertices[1].y = minY; gVertices[1].z = minZ;
-    gVertices[2].x = minX; gVertices[2].y = maxY; gVertices[2].z = minZ;
-    gVertices[3].x = maxX; gVertices[3].y = maxY; gVertices[3].z = minZ;
-    gVertices[4].x = minX; gVertices[4].y = minY; gVertices[4].z = maxZ;
-    gVertices[5].x = maxX; gVertices[5].y = minY; gVertices[5].z = maxZ;
-    gVertices[6].x = minX; gVertices[6].y = maxY; gVertices[6].z = maxZ;
-    gVertices[7].x = maxX; gVertices[7].y = maxY; gVertices[7].z = maxZ;
+    for (int32 i = 0; i < count; i++, polys++)
+    {
+        uint32 flags = polys->flags;
+        uint32* indices = (uint32*)polys->indices;
 
-    projectVertices(8);
+        uint32 i01 = indices[0];
+        uint32 i23 = indices[1];
 
-    *rect = RectMinMax( INT_MAX, INT_MAX, INT_MIN, INT_MIN );
+        uint32 i0 = (i01 >> 16);
+        uint32 i1 = (i01 & 0xFFFF);
+        uint32 i2 = (i23 >> 16);
 
-    const Vertex* v = gVertices;
+        const Vertex* v0 = (Vertex*)((uint8*)gVertices + i0);
+        const Vertex* v1 = (Vertex*)((uint8*)gVertices + i1);
+        const Vertex* v2 = (Vertex*)((uint8*)gVertices + i2);
 
-    for (int32 i = 0; i < 8; i++, v++)
-    {
-        int32 x = v->x;
-        int32 y = v->y;
-        int32 z = v->z;
+        uint32 c0 = v0->z;
+        uint32 c1 = v1->z;
+        uint32 c2 = v2->z;
 
-        if ((z & CLIP_MASK) & (CLIP_NEAR | CLIP_FAR))
+        if ((c0 & c1 & c2) & CLIP_MASK)
             continue;
 
-        if (x < rect->x0) rect->x0 = x;
-        if (x > rect->x1) rect->x1 = x;
-        if (y < rect->y0) rect->y0 = y;
-        if (y > rect->y1) rect->y1 = y;
-    }
-
-    rect->x0 += (FRAME_WIDTH  / 2);
-    rect->y0 += (FRAME_HEIGHT / 2);
-    rect->x1 += (FRAME_WIDTH  / 2);
-    rect->y1 += (FRAME_HEIGHT / 2);
+        int32 depth = DEPTH_T_MAX(c0, c1, c2);
 
-    return true;
-}
+        if (cross(v0, v1, v2) <= 0)
+            continue;
 
-void transformRoom(const Room* room)
-{
-    int32 vCount = room->info->verticesCount;
-    if (vCount <= 0)
-        return;
+        Face* f = faceAdd(depth);
+    
+        uint32 intensity = (flags >> (FACE_MIP_SHIFT + FACE_MIP_SHIFT)) & 0xFF;
+        if (depth > (FOG_MIN >> OT_SHIFT)) {
+            intensity += (depth - (FOG_MIN >> OT_SHIFT)) >> 1;
+            intensity = X_MIN(intensity, 255);
+        }
 
-    unpackRoom(room->data.vertices, vCount);
+        f->ccb_PIXC = shadeTable[intensity >> 3];
 
-    projectVertices(vCount);
+        uint32 texIndex = flags;
+        if (depth > (MIP_DIST >> OT_SHIFT)) {
+            texIndex >>= FACE_MIP_SHIFT;
+        }
+        const Texture* texture = level.textures + (texIndex & FACE_TEXTURE);
+        ccbSetTexture(flags, f, texture);
 
-    gVerticesCount += vCount;
+        ccbMap3(f, v0, v1, v2, texture->shift);
+    }
 }
 
-void transformMesh(const MeshVertex* vertices, int32 vCount, const uint16* vIntensity, const vec3s* vNormal)
+void faceAddMeshQuads_c(const MeshQuad* polys, int32 count, uint32 shade)
 {
-    unpackMesh(vertices, vCount);
-
-    projectVertices(vCount);
-
-    gVerticesCount += vCount;
-}
+    for (int32 i = 0; i < count; i++, polys++)
+    {
+        uint32 indices = polys->indices;
+        uint32 flags = polys->flags;
 
-X_INLINE Face* faceAdd(int32 depth)
-{
-    if (depth < 0) depth = 0;
-    if (depth > OT_SIZE - 1) depth = OT_SIZE - 1;
+        uint32 i0 = indices & 0xFF; indices >>= 8;
+        uint32 i1 = indices & 0xFF; indices >>= 8;
+        uint32 i2 = indices & 0xFF; indices >>= 8;
+        uint32 i3 = indices;
 
-    Face* face = gFacesBase++;
+        const Vertex* v0 = gVertices + i0;
+        const Vertex* v1 = gVertices + i1;
+        const Vertex* v2 = gVertices + i2;
+        const Vertex* v3 = gVertices + i3;
 
-    if (gOT[depth].head) {
-        face->ccb_NextPtr = gOT[depth].head;
-    } else {
-        gOT[depth].tail = face;
-    }
+        uint32 c0 = v0->z;
+        uint32 c1 = v1->z;
+        uint32 c2 = v2->z;
+        uint32 c3 = v3->z;
 
-    gOT[depth].head = face;
+        if ((c0 & c1 & c2 & c3) & CLIP_MASK)
+            continue;
 
-    return face;
-}
+        int32 depth = DEPTH_Q_AVG(c0, c1, c2, c3);
 
-X_INLINE void ccbSetTexture(uint32 flags, Face* face, const Texture* texture)
-{
-    face->ccb_Flags =
-        CCB_NPABS  |
-        CCB_SPABS  |
-        CCB_PPABS  |
-        CCB_LDSIZE |
-        CCB_LDPRS  |
-        CCB_LDPPMP |
-        CCB_LDPLUT |
-        CCB_YOXY   |
-        CCB_ACSC   | CCB_ALSC |
-        CCB_ACW    | CCB_ACCW |
-        CCB_ACE    |
-        CCB_NOBLK  |
-        (flags >> (8 + FACE_MIP_SHIFT + FACE_MIP_SHIFT) << 5); // set CCB_BGND (0x20 == 1 << 5)
+        if ((cross(v0, v1, v3) ^ flags) & FACE_CCW)
+            continue;
 
-    face->ccb_SourcePtr = (CelData*)texture->data;
-    face->ccb_PLUTPtr = gPalette + (texture->shift >> 16) * 16;
-}
+        Face* f = faceAdd(depth);
+        f->ccb_PIXC = shade;
 
-X_INLINE void ccbSetColor(uint32 flags, Face* face)
-{
-    face->ccb_Flags =
-        CCB_NPABS  |
-        CCB_SPABS  |
-        CCB_PPABS  |
-        CCB_LDSIZE |
-        CCB_LDPRS  |
-        CCB_LDPPMP |
-        CCB_CCBPRE | // use the preamble words set in renderInit
-        CCB_YOXY   |
-        CCB_ACSC   | CCB_ALSC |
-        CCB_ACW    | CCB_ACCW |
-        CCB_ACE    |
-        CCB_NOBLK  |
-        CCB_BGND;
+        const Texture* texture = level.textures + (flags & FACE_TEXTURE);
+        ccbSetTexture(flags, f, texture);
 
-    face->ccb_SourcePtr = (CelData*)&gPalette[flags & 0xFF];
+        ccbMap4(f, v0, v1, v2, v3, texture->shift);
+    }
 }
 
-X_INLINE void faceAddRoomTriangle(uint32 flags, const Index* indices)
+void faceAddMeshTriangles_c(const MeshTriangle* polys, int32 count, uint32 shade)
 {
-    uint32 i01 = ((uint32*)indices)[0];
-    uint32 i23 = ((uint32*)indices)[1];
+    for (int32 i = 0; i < count; i++, polys++)
+    {
+        uint32 indices = polys->indices;
+        uint32 flags = polys->flags;
 
-    uint32 i0 = (i01 >> 16);
-    uint32 i1 = (i01 & 0xFFFF);
-    uint32 i2 = (i23 >> 16);
+        uint32 i0 = indices & 0xFF; indices >>= 8;
+        uint32 i1 = indices & 0xFF; indices >>= 8;
+        uint32 i2 = indices;
 
-    const Vertex* v0 = (Vertex*)((uint8*)gVertices + i0);
-    const Vertex* v1 = (Vertex*)((uint8*)gVertices + i1);
-    const Vertex* v2 = (Vertex*)((uint8*)gVertices + i2);
+        const Vertex* v0 = gVertices + i0;
+        const Vertex* v1 = gVertices + i1;
+        const Vertex* v2 = gVertices + i2;
 
-    uint32 c0 = v0->z;
-    uint32 c1 = v1->z;
-    uint32 c2 = v2->z;
+        uint32 c0 = v0->z;
+        uint32 c1 = v1->z;
+        uint32 c2 = v2->z;
 
-    if ((c0 & c1 & c2) & CLIP_MASK)
-        return;
+        if ((c0 & c1 & c2) & CLIP_MASK)
+            continue;
 
-    int32 depth = DEPTH_T_MAX(c0, c1, c2);
+        if (cross(v0, v1, v2) <= 0)
+            continue;
 
-    if (cross(v0, v1, v2) <= 0)
-        return;
+        int32 depth = DEPTH_T_AVG(v0->z, v1->z, v2->z);
 
-    Face* f = faceAdd(depth);
-    
-    uint32 intensity = (flags >> (FACE_MIP_SHIFT + FACE_MIP_SHIFT)) & 0xFF;
-    if (depth > (FOG_MIN >> OT_SHIFT)) {
-        intensity += (depth - (FOG_MIN >> OT_SHIFT)) >> 1;
-        intensity = X_MIN(intensity, 255);
-    }
+        Face* f = faceAdd(depth);
+        f->ccb_PIXC = shade;
 
-    f->ccb_PIXC = shadeTable[intensity >> 3];
+        const Texture* texture = level.textures + (flags & FACE_TEXTURE);
+        ccbSetTexture(flags, f, texture);
 
-    uint32 texIndex = flags;
-    if (depth > (MIP_DIST >> OT_SHIFT)) {
-        texIndex >>= FACE_MIP_SHIFT;
+        ccbMap3(f, v0, v1, v2, texture->shift);
     }
-    const Texture* texture = level.textures + (texIndex & FACE_TEXTURE);
-    ccbSetTexture(flags, f, texture);
-
-    ccbMap3(f, v0, v1, v2, texture->shift);
 }
 
-X_INLINE void faceAddMeshQuad(uint32 flags, uint32 indices, uint32 shade)
+void faceAddMeshQuadsFlat_c(const MeshQuad* polys, int32 count, uint32 shade)
 {
-    uint32 i0 = indices & 0xFF; indices >>= 8;
-    uint32 i1 = indices & 0xFF; indices >>= 8;
-    uint32 i2 = indices & 0xFF; indices >>= 8;
-    uint32 i3 = indices;
+    for (int32 i = 0; i < count; i++, polys++)
+    {
+        uint32 indices = polys->indices;
+        uint32 flags = polys->flags;
 
-    const Vertex* v0 = gVertices + i0;
-    const Vertex* v1 = gVertices + i1;
-    const Vertex* v2 = gVertices + i2;
-    const Vertex* v3 = gVertices + i3;
+        uint32 i0 = indices & 0xFF; indices >>= 8;
+        uint32 i1 = indices & 0xFF; indices >>= 8;
+        uint32 i2 = indices & 0xFF; indices >>= 8;
+        uint32 i3 = indices;
 
-    uint32 c0 = v0->z;
-    uint32 c1 = v1->z;
-    uint32 c2 = v2->z;
-    uint32 c3 = v3->z;
+        const Vertex* v0 = gVertices + i0;
+        const Vertex* v1 = gVertices + i1;
+        const Vertex* v2 = gVertices + i2;
+        const Vertex* v3 = gVertices + i3;
 
-    if ((c0 & c1 & c2 & c3) & CLIP_MASK)
-        return;
+        uint32 c0 = v0->z;
+        uint32 c1 = v1->z;
+        uint32 c2 = v2->z;
+        uint32 c3 = v3->z;
 
-    if ((cross(v0, v1, v3) ^ flags) & FACE_CCW) // TODO (hdx0 * vdy0 - vdx0 * hdy0) <= 0
-        return;
+        if ((c0 & c1 & c2 & c3) & CLIP_MASK)
+            continue;
 
-    int32 depth = DEPTH_Q_AVG(v0->z, v1->z, v2->z, v3->z);
+        if (cross(v0, v1, v3) <= 0)
+            continue;
 
-    Face* f = faceAdd(depth);
-    f->ccb_PIXC = shade;
+        int32 depth = DEPTH_Q_AVG(v0->z, v1->z, v2->z, v3->z);
+
+        Face* f = faceAdd(depth);
+        f->ccb_PIXC = shade;
 
-    const Texture* texture = level.textures + (flags & FACE_TEXTURE);
-    ccbSetTexture(flags, f, texture);
+        ccbSetColor(flags, f);
 
-    ccbMap4(f, v0, v1, v2, v3, texture->shift);
+        ccbMap4(f, v0, v1, v2, v3, 20 | (16 << 8));
+    }
 }
 
-X_INLINE void faceAddMeshTriangle(uint32 flags, uint32 indices, uint32 shade)
+void faceAddMeshTrianglesFlat_c(const MeshTriangle* polys, int32 count, uint32 shade)
 {
-    uint32 i0 = indices & 0xFF; indices >>= 8;
-    uint32 i1 = indices & 0xFF; indices >>= 8;
-    uint32 i2 = indices;
+    for (int32 i = 0; i < count; i++, polys++)
+    {
+        uint32 indices = polys->indices;
+        uint32 flags = polys->flags;
 
-    const Vertex* v0 = gVertices + i0;
-    const Vertex* v1 = gVertices + i1;
-    const Vertex* v2 = gVertices + i2;
+        uint32 i0 = indices & 0xFF; indices >>= 8;
+        uint32 i1 = indices & 0xFF; indices >>= 8;
+        uint32 i2 = indices;
 
-    uint32 c0 = v0->z;
-    uint32 c1 = v1->z;
-    uint32 c2 = v2->z;
+        const Vertex* v0 = gVertices + i0;
+        const Vertex* v1 = gVertices + i1;
+        const Vertex* v2 = gVertices + i2;
 
-    if ((c0 & c1 & c2) & CLIP_MASK)
-        return;
+        uint32 c0 = v0->z;
+        uint32 c1 = v1->z;
+        uint32 c2 = v2->z;
 
-    if (cross(v0, v1, v2) <= 0)
-        return;
+        if ((c0 & c1 & c2) & CLIP_MASK)
+            continue;
+
+        if (cross(v0, v1, v2) <= 0)
+            continue;
 
-    int32 depth = DEPTH_T_AVG(v0->z, v1->z, v2->z);
+        int32 depth = DEPTH_T_AVG(v0->z, v1->z, v2->z);
 
-    Face* f = faceAdd(depth);
-    f->ccb_PIXC = shade;
+        Face* f = faceAdd(depth);
+        f->ccb_PIXC = shade;
 
-    const Texture* texture = level.textures + (flags & FACE_TEXTURE);
-    ccbSetTexture(flags, f, texture);
+        ccbSetColor(flags, f);
 
-    ccbMap3(f, v0, v1, v2, texture->shift);
+        ccbMap3(f, v0, v1, v2, 20 | (16 << 8));
+    }
 }
+#endif
 
-X_INLINE void faceAddMeshQuadFlat(uint32 flags, uint32 indices, uint32 shade)
+bool transformBoxRect(const AABBs* box, RectMinMax* rect)
 {
-    uint32 i0 = indices & 0xFF; indices >>= 8;
-    uint32 i1 = indices & 0xFF; indices >>= 8;
-    uint32 i2 = indices & 0xFF; indices >>= 8;
-    uint32 i3 = indices;
+    Matrix &m = matrixGet();
 
-    const Vertex* v0 = gVertices + i0;
-    const Vertex* v1 = gVertices + i1;
-    const Vertex* v2 = gVertices + i2;
-    const Vertex* v3 = gVertices + i3;
+    if ((m.e23 < VIEW_MIN_F) || (m.e23 >= VIEW_MAX_F)) {
+        return false;
+    }
 
-    uint32 c0 = v0->z;
-    uint32 c1 = v1->z;
-    uint32 c2 = v2->z;
-    uint32 c3 = v3->z;
+    int32 minX = box->minX << F16_SHIFT;
+    int32 maxX = box->maxX << F16_SHIFT;
+    int32 minY = box->minY << F16_SHIFT;
+    int32 maxY = box->maxY << F16_SHIFT;
+    int32 minZ = box->minZ << F16_SHIFT;
+    int32 maxZ = box->maxZ << F16_SHIFT;
 
-    if ((c0 & c1 & c2 & c3) & CLIP_MASK)
-        return;
+    gVertices[0].x = minX; gVertices[0].y = minY; gVertices[0].z = minZ;
+    gVertices[1].x = maxX; gVertices[1].y = minY; gVertices[1].z = minZ;
+    gVertices[2].x = minX; gVertices[2].y = maxY; gVertices[2].z = minZ;
+    gVertices[3].x = maxX; gVertices[3].y = maxY; gVertices[3].z = minZ;
+    gVertices[4].x = minX; gVertices[4].y = minY; gVertices[4].z = maxZ;
+    gVertices[5].x = maxX; gVertices[5].y = minY; gVertices[5].z = maxZ;
+    gVertices[6].x = minX; gVertices[6].y = maxY; gVertices[6].z = maxZ;
+    gVertices[7].x = maxX; gVertices[7].y = maxY; gVertices[7].z = maxZ;
 
-    if (cross(v0, v1, v3) <= 0)
-        return;
+    projectVertices(8);
 
-    int32 depth = DEPTH_Q_AVG(v0->z, v1->z, v2->z, v3->z);
+    *rect = RectMinMax( INT_MAX, INT_MAX, INT_MIN, INT_MIN );
 
-    Face* f = faceAdd(depth);
-    f->ccb_PIXC = shade;
+    const Vertex* v = gVertices;
 
-    ccbSetColor(flags, f);
+    for (int32 i = 0; i < 8; i++, v++)
+    {
+        int32 x = v->x;
+        int32 y = v->y;
+        int32 z = v->z;
 
-    ccbMap4(f, v0, v1, v2, v3, 20 | (16 << 8));
-}
+        if ((z & CLIP_MASK) & (CLIP_NEAR | CLIP_FAR))
+            continue;
 
-X_INLINE void faceAddMeshTriangleFlat(uint32 flags, uint32 indices, uint32 shade)
-{
-    uint32 i0 = indices & 0xFF; indices >>= 8;
-    uint32 i1 = indices & 0xFF; indices >>= 8;
-    uint32 i2 = indices;
+        if (x < rect->x0) rect->x0 = x;
+        if (x > rect->x1) rect->x1 = x;
+        if (y < rect->y0) rect->y0 = y;
+        if (y > rect->y1) rect->y1 = y;
+    }
 
-    const Vertex* v0 = gVertices + i0;
-    const Vertex* v1 = gVertices + i1;
-    const Vertex* v2 = gVertices + i2;
+    rect->x0 += (FRAME_WIDTH  / 2);
+    rect->y0 += (FRAME_HEIGHT / 2);
+    rect->x1 += (FRAME_WIDTH  / 2);
+    rect->y1 += (FRAME_HEIGHT / 2);
 
-    uint32 c0 = v0->z;
-    uint32 c1 = v1->z;
-    uint32 c2 = v2->z;
+    return true;
+}
 
-    if ((c0 & c1 & c2) & CLIP_MASK)
+void transformRoom(const Room* room)
+{
+    int32 vCount = room->info->verticesCount;
+    if (vCount <= 0)
         return;
 
-    if (cross(v0, v1, v2) <= 0)
-        return;
+    unpackRoom(room->data.vertices, vCount);
+    projectVertices(vCount);
 
-    int32 depth = DEPTH_T_AVG(v0->z, v1->z, v2->z);
+    gVerticesCount += vCount;
+}
 
-    Face* f = faceAdd(depth);
-    f->ccb_PIXC = shade;
+void transformMesh(const MeshVertex* vertices, int32 vCount, const uint16* vIntensity, const vec3s* vNormal)
+{
+    if (vCount <= 0)
+        return;
 
-    ccbSetColor(flags, f);
+    unpackMesh(vertices, vCount);
+    projectVertices(vCount);
 
-    ccbMap3(f, v0, v1, v2, 20 | (16 << 8));
+    gVerticesCount += vCount;
 }
 
 void faceAddShadow(int32 x, int32 z, int32 sx, int32 sz)
@@ -779,9 +815,13 @@ void faceAddShadow(int32 x, int32 z, int32 sx, int32 sz)
 
     transformMesh(v, 8, NULL, NULL);
 
-    faceAddMeshQuadFlat(0, (0 | (1 << 8) | (2 << 16) | (7 << 24)), SHADE_SHADOW);
-    faceAddMeshQuadFlat(0, (7 | (2 << 8) | (3 << 16) | (6 << 24)), SHADE_SHADOW);
-    faceAddMeshQuadFlat(0, (6 | (3 << 8) | (4 << 16) | (5 << 24)), SHADE_SHADOW);
+    static const MeshQuad quads[] = {
+        0, (0 | (1 << 8) | (2 << 16) | (7 << 24)),
+        0, (7 | (2 << 8) | (3 << 16) | (6 << 24)),
+        0, (6 | (3 << 8) | (4 << 16) | (5 << 24)),
+    };
+
+    faceAddMeshQuadsFlat(quads, 3, SHADE_SHADOW);
 }
 
 void faceAddSprite(int32 vx, int32 vy, int32 vz, int32 vg, int32 index)
@@ -896,20 +936,25 @@ void faceAddGlyph(int32 vx, int32 vy, int32 index)
 
 void faceAddRoom(const Room* room)
 {
+//        const int SIZE = 10; Face* face = gFacesBase; for (int32 i = 0; i < SIZE; i++) {
+
     if (room->info->quadsCount) {
         faceAddRoomQuads(room->data.quads, room->info->quadsCount);
     }
 
-    const RoomTriangle* triangles = room->data.triangles;
-    for (int32 i = 0; i < room->info->trianglesCount; i++, triangles++) {
-        faceAddRoomTriangle(triangles->flags, triangles->indices);
+    if (room->info->trianglesCount) {
+        faceAddRoomTriangles(room->data.triangles, room->info->trianglesCount);
     }
 
+//        gFacesBase = face; } memset(gOT, 0, sizeof(gOT));
+
     gFacesCount = gFacesBase - gFaces;
 }
 
 void faceAddMesh(const MeshQuad* rFaces, const MeshQuad* crFaces, const MeshTriangle* tFaces, const MeshTriangle* ctFaces, int32 rCount, int32 crCount, int32 tCount, int32 ctCount)
 {
+//        const int SIZE = 10; Face* face = gFacesBase; for (int32 i = 0; i < SIZE; i++) {
+
     uint32 shade;
     if (lightAmbient > 4096) {
         shade = shadeTable[lightAmbient >> 8];
@@ -917,22 +962,24 @@ void faceAddMesh(const MeshQuad* rFaces, const MeshQuad* crFaces, const MeshTria
         shade = SHADE_16;
     }
 
-    for (int32 i = 0; i < rCount; i++) {
-        faceAddMeshQuad(rFaces[i].flags, rFaces[i].indices, shade);
+    if (rCount) {
+        faceAddMeshQuads(rFaces, rCount, shade);
     }
 
-    for (int32 i = 0; i < tCount; i++) {
-        faceAddMeshTriangle(tFaces[i].flags, tFaces[i].indices, shade);
+    if (tCount) {
+        faceAddMeshTriangles(tFaces, tCount, shade);
     }
 
-    for (int32 i = 0; i < crCount; i++) {
-        faceAddMeshQuadFlat(crFaces[i].flags, crFaces[i].indices, shade);
+    if (crCount) {
+        faceAddMeshQuadsFlat(crFaces, crCount, shade);
     }
 
-    for (int32 i = 0; i < ctCount; i++) {
-        faceAddMeshTriangleFlat(ctFaces[i].flags, ctFaces[i].indices, shade);
+    if (ctCount) {
+        faceAddMeshTrianglesFlat(ctFaces, ctCount, shade);
     }
 
+//        gFacesBase = face; } memset(gOT, 0, sizeof(gOT));
+
     gFacesCount = gFacesBase - gFaces;
 }
 
diff --git a/src/platform/3do/unpackMesh.s b/src/platform/3do/unpackMesh.s
index a3c9f616..a574b276 100644
--- a/src/platform/3do/unpackMesh.s
+++ b/src/platform/3do/unpackMesh.s
@@ -7,7 +7,7 @@
 
 unpackMesh_asm
 
-vertices RN r0
+data     RN r0
 vCount   RN r1
 vx0      RN r1
 vy0      RN r2
@@ -18,17 +18,17 @@ vz1      RN r6
 n0       RN vy0
 n1       RN vx1
 n2       RN vz1
-res      RN r12
+vertex   RN r12
 last     RN lr
 
         stmfd sp!, {r4-r6, lr}
-        ldr res, =gVertices
-        ; last = vertices + vCount * 6
+        ldr vertex, =gVertices
+        ; last = data + vCount * 6
         add vCount, vCount, vCount, lsl #1
-        add last, vertices, vCount, lsl #1
+        add last, data, vCount, lsl #1
 
-loop    ldmia vertices!, {n0, n1, n2} ; load two encoded vertices
-        cmp vertices, last
+loop    ldmia data!, {n0, n1, n2} ; load two encoded vertices
+        cmp data, last
 
         mov vx0, n0, asr #16 ; x
         mov n0, n0, lsl #16
@@ -42,7 +42,7 @@ loop    ldmia vertices!, {n0, n1, n2} ; load two encoded vertices
         mov n2, n2, lsl #16
         mov vz1, n2, asr #16 ; z
 
-        stmia res!, {vx0, vy0, vz0, vx1, vy1, vz1}
+        stmia vertex!, {vx0, vy0, vz0, vx1, vy1, vz1}
         blt loop
 
         ldmfd sp!, {r4-r6, pc}
diff --git a/src/platform/3do/unpackRoom.s b/src/platform/3do/unpackRoom.s
index 7852c891..7c7dfb7e 100644
--- a/src/platform/3do/unpackRoom.s
+++ b/src/platform/3do/unpackRoom.s
@@ -7,7 +7,7 @@
 
 unpackRoom_asm
 
-vertices RN r0
+data     RN r0
 vCount   RN r1
 vx0      RN r1
 vy0      RN r2
@@ -25,17 +25,17 @@ n0       RN vz1
 n1       RN r7
 maskH    RN r8
 maskV    RN r9
-res      RN r12
+vertex   RN r12
 last     RN lr
 
         stmfd sp!, {r4-r9, lr}
-        ldr res, =gVertices
-        add last, vertices, vCount, lsl #1 ; last = vertices + vCount * 2
+        ldr vertex, =gVertices
+        add last, data, vCount, lsl #1 ; last = data + vCount * 2
         mov maskH, #0x1F000
-        mov maskV, #0xFC00
+        mov maskV, #0x0FC00
 
-loop    ldmia vertices!, {n0, n1} ; load four encoded vertices
-        cmp vertices, last
+loop    ldmia data!, {n0, n1} ; load four encoded vertices
+        cmp data, last
 
         ; n0 = z1:5, y1:6, x1:5, z0:5, y0:6, x0:5
         ; n0 = z3:5, y3:6, x3:5, z2:5, y2:6, x2:5
@@ -51,7 +51,7 @@ loop    ldmia vertices!, {n0, n1} ; load four encoded vertices
         and vz1, maskH, n0, lsr #15     ; decode z1
 
     ; store
-        stmia res!, {vx0, vy0, vz0, vx1, vy1, vz1}
+        stmia vertex!, {vx0, vy0, vz0, vx1, vy1, vz1}
 
     ; 3rd vertex
         and vx2, maskH, n1, lsl #12     ; decode x2
@@ -64,7 +64,7 @@ loop    ldmia vertices!, {n0, n1} ; load four encoded vertices
         and vz3, maskH, n1, lsr #15     ; decode z3
 
     ; store
-        stmia res!, {vx2, vy2, vz2, vx3, vy3, vz3}
+        stmia vertex!, {vx2, vy2, vz2, vx3, vy3, vz3}
 
         blt loop