From e7cb40706ec437df8f6dd609574bb86612f47a30 Mon Sep 17 00:00:00 2001 From: XProger Date: Wed, 29 Dec 2021 14:52:34 +0300 Subject: [PATCH] #368 GBA sound samples and music playback optimization --- src/fixed/game.h | 8 --- src/fixed/level.h | 8 +++ src/platform/gba/packer/main.cpp | 29 ++++++++ src/platform/gba/render.iwram.cpp | 108 +++++----------------------- src/platform/gba/sndIMA.s | 64 +++++++++++++++++ src/platform/gba/sndPCM.s | 48 +++++++++++++ src/platform/gba/sndWrite.s | 44 ++++++++++++ src/platform/gba/sound.cpp | 113 +++++++++++++++++++++++------- 8 files changed, 300 insertions(+), 122 deletions(-) create mode 100644 src/platform/gba/sndIMA.s create mode 100644 src/platform/gba/sndPCM.s create mode 100644 src/platform/gba/sndWrite.s diff --git a/src/fixed/game.h b/src/fixed/game.h index 84f3fc78..8f5ba6b6 100644 --- a/src/fixed/game.h +++ b/src/fixed/game.h @@ -130,14 +130,6 @@ struct Game rooms[roomIndex].add(lara); } - int32 getAmbientTrack() // TODO - { - extern int32 gLevelID; - if (gLevelID == 0) - return -1; - return 5; - } - void updateItems() { ItemObj* item = ItemObj::sFirstActive; diff --git a/src/fixed/level.h b/src/fixed/level.h index 55037f2d..0590444b 100644 --- a/src/fixed/level.h +++ b/src/fixed/level.h @@ -170,4 +170,12 @@ void updateLevel(int32 frames) } } +int32 getAmbientTrack() +{ + extern int32 gLevelID; + if (gLevelID == 0) + return -1; + return 5; +} + #endif diff --git a/src/platform/gba/packer/main.cpp b/src/platform/gba/packer/main.cpp index e4ae4d8a..6168e4a8 100644 --- a/src/platform/gba/packer/main.cpp +++ b/src/platform/gba/packer/main.cpp @@ -3079,6 +3079,35 @@ struct LevelPC //f.writeArray(demoData, demoDataSize); + for (int32 i = 0; i < soundOffsetsCount; i++) + { + uint8* ptr = soundData + soundOffsets[i]; + int32 size = *(int32*)(ptr + 40); + uint8* src = ptr + 44; + uint8* dst = ptr; + + while ((dst - soundData) % 4 != 0) { + dst++; + } + dst += 4; + + for (int32 j = 0; j < size; j++) + { + dst[j] = src[j]; + } + + while ((size % 4) != 0) + { + dst[size] = dst[size - 1]; + size++; + } + + dst -= 4; + *(int32*)dst = size; + + soundOffsets[i] = dst - soundData; + } + header.soundMap = f.align4(); f.write(soundMap, 256); diff --git a/src/platform/gba/render.iwram.cpp b/src/platform/gba/render.iwram.cpp index 5f28c03b..29641feb 100644 --- a/src/platform/gba/render.iwram.cpp +++ b/src/platform/gba/render.iwram.cpp @@ -1039,19 +1039,25 @@ void renderShadow(int32 x, int32 z, int32 sx, int32 sz) return; } - int32 sx2 = sx << 1; - int32 sz2 = sz << 1; - - MeshVertex v[8] = { - { x - sx, 0, z + sz2 }, // 0 - { x + sx, 0, z + sz2 }, // 1 - { x + sx2, 0, z + sz }, // 2 - { x + sx2, 0, z - sz }, // 3 - { x + sx, 0, z - sz2 }, // 4 - { x - sx, 0, z - sz2 }, // 5 - { x - sx2, 0, z - sz }, // 6 - { x - sx2, 0, z + sz } // 7 - }; + int16 xns1 = x - sx; + int16 xps1 = x + sx; + int16 xns2 = xns1 - sx; + int16 xps2 = xps1 + sx; + + int16 zns1 = z - sz; + int16 zps1 = z + sz; + int16 zns2 = zns1 - sz; + int16 zps2 = zps1 + sz; + + MeshVertex v[8]; + v[0].x = xns1; v[0].y = 0; v[0].z = zps2; + v[1].x = xps1; v[1].y = 0; v[1].z = zps2; + v[2].x = xps2; v[2].y = 0; v[2].z = zps1; + v[3].x = xps2; v[3].y = 0; v[3].z = zns1; + v[4].x = xps1; v[4].y = 0; v[4].z = zns2; + v[5].x = xns1; v[5].y = 0; v[5].z = zns2; + v[6].x = xns2; v[6].y = 0; v[6].z = zns1; + v[7].x = xns2; v[7].y = 0; v[7].z = zps1; transformMesh(v, 8, 0); faceAddMeshQuads(gShadowQuads, 3); @@ -1153,79 +1159,3 @@ void renderGlyph(int32 vx, int32 vy, int32 index) { // } - -extern int16 IMA_STEP[89]; - -#define DECODE_IMA_4(n)\ - step = IMA_STEP[idx];\ - index = n & 7;\ - step += index * step << 1;\ - if (index < 4) {\ - idx = X_MAX(idx - 1, 0);\ - } else {\ - idx = X_MIN(idx + ((index - 3) << 1), X_COUNT(IMA_STEP) - 1);\ - }\ - if (n & 8) {\ - smp -= step >> 3;\ - } else {\ - smp += step >> 3;\ - }\ - *buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT)); - -void decodeIMA(IMA_STATE &state, const uint8* data, int32* buffer, int32 size) -{ - uint32 step, index; - - int32 idx = state.idx; - int32 smp = state.smp; - - for (int32 i = 0; i < size; i++) - { - uint32 n = *data++; - DECODE_IMA_4(n); - n >>= 4; - DECODE_IMA_4(n); - } - - state.idx = idx; - state.smp = smp; -} - -/* TODO OUT OF IWRAM! -#define DECODE_IMA_4_sample(n)\ - step = IMA_STEP[idx];\ - index = n & 7;\ - step += index * step << 1;\ - if (index < 4) {\ - idx = X_MAX(idx - 1, 0);\ - } else {\ - idx = X_MIN(idx + ((index - 3) << 1), X_COUNT(IMA_STEP) - 1);\ - }\ - if (n & 8) {\ - smp -= step >> 3;\ - } else {\ - smp += step >> 3;\ - }\ - *buffer++ += smp * volume >> (16 - (8 + SND_VOL_SHIFT)); - -void decodeIMA_sample(IMA_STATE &state, const uint8* data, int32* buffer, int32 size, int32 inc, int32 volume) -{ - uint32 step, index; - - int32 idx = state.idx; - int32 smp = state.smp; - - for (int32 i = 0; i < size; i++) - { - uint32 n = *data; - DECODE_IMA_4_sample(n); - n >>= 4; - DECODE_IMA_4_sample(n); - - data += inc; - } - - state.idx = idx; - state.smp = smp; -} -*/ \ No newline at end of file diff --git a/src/platform/gba/sndIMA.s b/src/platform/gba/sndIMA.s new file mode 100644 index 00000000..35e2457b --- /dev/null +++ b/src/platform/gba/sndIMA.s @@ -0,0 +1,64 @@ +#include "common_asm.inc" + +state .req r0 +buffer .req r1 +data .req r2 +size .req r3 +smp .req r4 +idx .req r5 +stepLUT .req r6 +step .req r7 +n .req r8 +index .req r9 +outA .req r12 +outB .req lr +tmp .req outB + +IMA_STEP_SIZE = 88 + +.macro decode4 n, out + ldr step, [stepLUT, idx, lsl #2] + + and index, \n, #7 + mov tmp, step, lsl #1 + mla step, index, tmp, step + tst \n, #8 + subne smp, smp, step, lsr #3 + addeq smp, smp, step, lsr #3 + + subs index, #3 + suble idx, idx, #1 + bicle idx, idx, idx, asr #31 + addgt idx, idx, index, lsl #1 + cmpgt idx, #IMA_STEP_SIZE + movgt idx, #IMA_STEP_SIZE + + mov \out, smp, asr #2 +.endm + +.global sndIMA_asm +sndIMA_asm: + stmfd sp!, {r4-r9, lr} + + ldmia state, {smp, idx} + + ldr stepLUT, =IMA_STEP + +.loop: + ldrb n, [data], #1 + + decode4 n, outA + + mov n, n, lsr #4 + + decode4 n, outB + + stmia buffer!, {outA, outB} + + subs size, #1 + bne .loop + + stmia state, {smp, idx} + + ldmfd sp!, {r4-r9, lr} + bx lr diff --git a/src/platform/gba/sndPCM.s b/src/platform/gba/sndPCM.s new file mode 100644 index 00000000..97cd4021 --- /dev/null +++ b/src/platform/gba/sndPCM.s @@ -0,0 +1,48 @@ +#include "common_asm.inc" + +pos .req r0 +inc .req r1 +size .req r2 +volume .req r3 + +data .req r4 +buffer .req r5 +count .req r6 +ampA .req r7 +ampB .req r8 +outA .req r9 +outB .req r12 +last .req count +tmp .req outB + +.global sndPCM_asm +sndPCM_asm: + mov tmp, sp + stmfd sp!, {r4-r9} + + ldmia tmp, {data, buffer, count} + + mla last, inc, count, pos + cmp last, size + movgt last, size + +.loop: + ldrb ampA, [data, pos, lsr #8] + add pos, pos, inc + ldrb ampB, [data, pos, lsr #8] + add pos, pos, inc + cmp pos, last + + sub ampA, ampA, #128 + sub ampB, ampB, #128 + + ldmia buffer, {outA, outB} + mla outA, volume, ampA, outA + mla outB, volume, ampB, outB + stmia buffer!, {outA, outB} + + blt .loop + +.done: + ldmfd sp!, {r4-r9} + bx lr diff --git a/src/platform/gba/sndWrite.s b/src/platform/gba/sndWrite.s new file mode 100644 index 00000000..a57b6e44 --- /dev/null +++ b/src/platform/gba/sndWrite.s @@ -0,0 +1,44 @@ +#include "common_asm.inc" + +buffer .req r0 +count .req r1 +data .req r2 +vA .req r3 +vB .req r4 +vC .req r5 +vD .req r12 + +SND_VOL_SHIFT = 6 + +.macro encode amp + mov \amp, \amp, asr #SND_VOL_SHIFT + cmp \amp, #-128 + movlt \amp, #-128 + cmp \amp, #127 + movgt \amp, #127 +.endm + +.global sndWrite_asm +sndWrite_asm: + stmfd sp!, {r4-r5} +.loop: + ldmia data!, {vA, vB, vC, vD} + + encode vA + encode vB + encode vC + encode vD + + and vA, vA, #0xFF + and vB, vB, #0xFF + and vC, vC, #0xFF + orr vA, vA, vB, lsl #8 + orr vA, vA, vC, lsl #16 + orr vA, vA, vD, lsl #24 + str vA, [buffer], #4 + + subs count, #4 + bne .loop + + ldmfd sp!, {r4-r5} + bx lr diff --git a/src/platform/gba/sound.cpp b/src/platform/gba/sound.cpp index c3bab4cb..f8fdd160 100644 --- a/src/platform/gba/sound.cpp +++ b/src/platform/gba/sound.cpp @@ -1,6 +1,6 @@ #include "common.h" -int16 IMA_STEP[89] = { // IWRAM ! +int32 IMA_STEP[] = { // IWRAM ! 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 21, 23, 25, 28, 31, 34, 37, 41, 45, 50, 55, 60, 66, @@ -15,7 +15,82 @@ int16 IMA_STEP[89] = { // IWRAM ! 32767 }; -extern void decodeIMA(IMA_STATE &state, const uint8* data, int32* buffer, int32 size); +#ifdef USE_ASM1 + #define sndIMA sndIMA_asm + #define sndPCM sndPCM_asm + #define sndWrite sndWrite_asm + + extern "C" { + void sndIMA_asm(IMA_STATE &state, int32* buffer, const uint8* data, int32 size); + int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count); + void sndWrite_asm(uint8* buffer, int32 count, int32 *data); + } +#else + #define sndIMA sndIMA_c + #define sndPCM sndPCM_c + #define sndWrite sndWrite_c + +#define DECODE_IMA_4(n)\ + step = IMA_STEP[idx];\ + index = n & 7;\ + step += index * step << 1;\ + if (index < 4) {\ + idx = X_MAX(idx - 1, 0);\ + } else {\ + idx = X_MIN(idx + ((index - 3) << 1), X_COUNT(IMA_STEP) - 1);\ + }\ + if (n & 8) {\ + smp -= step >> 3;\ + } else {\ + smp += step >> 3;\ + }\ + *buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT)); + +void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size) +{ + uint32 step, index; + + int32 smp = state.smp; + int32 idx = state.idx; + + for (int32 i = 0; i < size; i++) + { + uint32 n = *data++; + DECODE_IMA_4(n); + n >>= 4; + DECODE_IMA_4(n); + } + + state.smp = smp; + state.idx = idx; +} + +int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count) +{ + int32 last = pos + count * inc; + if (last > size) { + last = size; + } + + while (pos < last) + { + *buffer++ += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume; + pos += inc; + } + + return pos; +} + +void sndWrite_c(uint8* buffer, int32 count, int32 *data) +{ + for (int32 i = 0; i < count; i++) + { + int32 samp = X_CLAMP(data[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX); + buffer[i] = SND_ENCODE(samp); + } +} +#endif + struct Music { @@ -28,7 +103,7 @@ struct Music { int32 len = X_MIN(size - pos, count >> 1); - decodeIMA(state, data + pos, buffer, len); + sndIMA(state, buffer, data + pos, len); pos += len; @@ -42,25 +117,19 @@ struct Music struct Sample { - const uint8* data; - int32 size; int32 pos; int32 inc; + int32 size; int32 volume; + const uint8* data; void fill(int32* buffer, int32 count) { - for (int32 i = 0; i < count; i++) - { - buffer[i] += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume; + pos = sndPCM(pos, inc, size, volume, data, buffer, count); - pos += inc; - if (pos >= size) - { - // TODO LOOP - data = NULL; - return; - } + if (pos >= size) + { + data = NULL; } } }; @@ -95,10 +164,8 @@ void sndFreeSamples() void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode) { const uint8 *data = level.soundData + level.soundOffsets[index]; - - int32 size; - memcpy(&size, data + 40, 4); // TODO preprocess and remove wave header - data += 44; + int32 size = *(int32*)data; + data += 4; if (mode == UNIQUE || mode == REPLAY) { @@ -178,7 +245,7 @@ bool sndTrackIsPlaying() void sndStopSample(int32 index) { - const uint8 *data = level.soundData + level.soundOffsets[index] + 44; + const uint8 *data = level.soundData + level.soundOffsets[index] + 4; int32 i = channelsCount; @@ -230,9 +297,5 @@ void sndFill(uint8* buffer, int32 count) } } - for (int32 i = 0; i < count; i++) - { - int32 samp = X_CLAMP(tmp[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX); - buffer[i] = SND_ENCODE(samp); - } + sndWrite(buffer, count, tmp); }