From d5f6139678fe71b021754f9fcca923c60258e3c1 Mon Sep 17 00:00:00 2001 From: XProger Date: Sun, 24 Apr 2022 04:17:27 +0300 Subject: [PATCH] #368 GBA add extra 5 bits (13 in total) of precision for Gouraud shading, to reduce "saw" effect --- src/platform/gba/asm/rasterizeFT.s | 8 +++--- src/platform/gba/asm/rasterizeFTA.s | 8 +++--- src/platform/gba/asm/rasterizeGT.s | 41 +++++++++++++---------------- src/platform/gba/asm/rasterizeGTA.s | 9 ++----- 4 files changed, 27 insertions(+), 39 deletions(-) diff --git a/src/platform/gba/asm/rasterizeFT.s b/src/platform/gba/asm/rasterizeFT.s index d92837cb..8252eae7 100644 --- a/src/platform/gba/asm/rasterizeFT.s +++ b/src/platform/gba/asm/rasterizeFT.s @@ -48,8 +48,6 @@ dtmp .req t Ltmp .req N Rtmp .req N -Rti .req indexB - .macro PUT_PIXELS tex indexA, t lit indexA @@ -176,17 +174,17 @@ rasterizeFT_asm: ldrb indexB, [ptr, #-1]! // read pal index from VRAM (byte) orr indexB, indexA, lsl #8 strh indexB, [ptr], #2 - add t, dtdx subs width, #1 // width-- beq .scanline_end // if (width == 0) + add t, dtdx + .align_right: tst width, #1 beq .align_block_4px - sub Rti, Rt, dtdx - tex indexA, Rti + tex indexA, Rt lit indexA ldrb indexB, [ptr, width] diff --git a/src/platform/gba/asm/rasterizeFTA.s b/src/platform/gba/asm/rasterizeFTA.s index 85c8a309..0d83c6d3 100644 --- a/src/platform/gba/asm/rasterizeFTA.s +++ b/src/platform/gba/asm/rasterizeFTA.s @@ -48,8 +48,6 @@ dtmp .req t Ltmp .req N Rtmp .req N -Rti .req indexB - .macro PUT_PIXELS tex indexA, t add t, dtdx, lsl #1 @@ -179,17 +177,17 @@ rasterizeFTA_asm: orrne indexB, indexA, lsl #8 strneh indexB, [ptr], #2 addeq ptr, #1 - add t, dtdx subs width, #1 // width-- beq .scanline_end // if (width == 0) + add t, dtdx + .align_right: tst width, #1 beq .align_block_4px - sub Rti, Rt, dtdx - tex indexA, Rti + tex indexA, Rt cmp indexA, #0 ldrneb indexA, [LMAP, indexA] diff --git a/src/platform/gba/asm/rasterizeGT.s b/src/platform/gba/asm/rasterizeGT.s index 7d7c3e29..6594228d 100644 --- a/src/platform/gba/asm/rasterizeGT.s +++ b/src/platform/gba/asm/rasterizeGT.s @@ -55,17 +55,17 @@ dtmp .req L Ltmp .req N Rtmp .req N -Rti .req tmp -Rgi .req tmp - SP_TILE = 0 SP_SIZE = 4 -.macro PUT_PIXELS - bic LMAP, g, #255 +G_EXTRA = 5 // extra bits of precision for gouraud shading (8 + G_EXTRA) +.macro PUT_PIXELS tex indexA, t - lit indexA + + mov LMAP, g, lsr #(8 + G_EXTRA) + ldrb indexA, [indexA, LMAP, lsl #8] + strb indexA, [ptr], #2 // writing a byte to GBA VRAM will write a half word for free add g, dgdx, lsl #1 @@ -104,7 +104,7 @@ rasterizeGT_asm: beq .calc_left_start lsl Lx, Lxy, #16 // Lx = L->v.x << 16 - lsl Lg, #8 // Lg <<= 8 + lsl Lg, #(8 + G_EXTRA) // Lg <<= 8 + G_EXTRA cmp Lh, #1 // if (Lh <= 1) skip Ldx calc beq .calc_left_end @@ -116,9 +116,9 @@ rasterizeGT_asm: mul Ldx, tmp // Ldx = tmp * (N->v.x - Lx) ldrb Ldg, [N, #VERTEX_G] - sub Ldg, Lg, lsr #8 + sub Ldg, Lg, lsr #(8 + G_EXTRA) mul Ldg, tmp // Ldg = tmp * (N->v.g - Lg) - asr Ldg, #8 // 8-bit for fractional part + asr Ldg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part ldr Ldt, [N, #VERTEX_T] sub Ldt, Lt // Ldt = N->v.t - Lt @@ -143,7 +143,7 @@ rasterizeGT_asm: beq .calc_right_start lsl Rx, Rxy, #16 // Rx = R->v.x << 16 - lsl Rg, #8 // Rg <<= 8 + lsl Rg, #(8 + G_EXTRA) // Rg <<= 8 + G_EXTRA cmp Rh, #1 // if (Rh <= 1) skip Rdx calc beq .calc_right_end @@ -155,9 +155,9 @@ rasterizeGT_asm: mul Rdx, tmp // Rdx = tmp * (N->v.x - Rx) ldrb Rdg, [N, #VERTEX_G] - sub Rdg, Rg, lsr #8 + sub Rdg, Rg, lsr #(8 + G_EXTRA) mul Rdg, tmp // Rdg = tmp * (N->v.g - Rg) - asr Rdg, #8 // 8-bit for fractional part + asr Rdg, #(8 - G_EXTRA) // (8 + G_EXTRA)-bit for fractional part ldr Rdt, [N, #VERTEX_T] sub Rdt, Rt // Rdt = N->v.t - Rt @@ -165,8 +165,8 @@ rasterizeGT_asm: fiq_off .calc_right_end: - orr Lg, #LMAP_ADDR - orr Rg, #LMAP_ADDR + orr Lg, #(LMAP_ADDR << G_EXTRA) + orr Rg, #(LMAP_ADDR << G_EXTRA) cmp Rh, Lh // if (Rh < Lh) movlt h, Rh // h = Rh @@ -203,9 +203,9 @@ rasterizeGT_asm: tst ptr, #1 // if (ptr & 1) beq .align_right - bic LMAP, g, #255 tex indexA, t - lit indexA + mov LMAP, g, lsr #(8 + G_EXTRA) + ldrb indexA, [indexA, LMAP, lsl #8] ldrb indexB, [ptr, #-1]! // read pal index from VRAM (byte) orr indexB, indexA, lsl #8 @@ -221,12 +221,9 @@ rasterizeGT_asm: tst width, #1 beq .align_block_4px - sub Rti, Rt, dtdx - tex indexA, Rti - - sub Rgi, Rg, dgdx - bic LMAP, Rgi, #255 - lit indexA + tex indexA, Rt + mov LMAP, Rg, lsr #(8 + G_EXTRA) + ldrb indexA, [indexA, LMAP, lsl #8] ldrb indexB, [ptr, width] subs width, #1 // width-- diff --git a/src/platform/gba/asm/rasterizeGTA.s b/src/platform/gba/asm/rasterizeGTA.s index ba438def..4dcb3049 100644 --- a/src/platform/gba/asm/rasterizeGTA.s +++ b/src/platform/gba/asm/rasterizeGTA.s @@ -55,9 +55,6 @@ dtmp .req L Ltmp .req N Rtmp .req N -Rti .req tmp -Rgi .req tmp - SP_TILE = 0 SP_SIZE = 4 @@ -229,15 +226,13 @@ rasterizeGTA_asm: tst width, #1 beq .align_block_4px - sub Rti, Rt, dtdx - tex indexA, Rti + tex indexA, Rt cmp indexA, #0 subeq width, #1 beq .skip_right - sub Rgi, Rg, dgdx, asr #1 - bic LMAP, Rgi, #255 + bic LMAP, Rg, #255 lit indexA ldrb indexB, [ptr, width]