diff --git a/src/fixed/common.h b/src/fixed/common.h index 8119669e..cca4fa67 100644 --- a/src/fixed/common.h +++ b/src/fixed/common.h @@ -2839,7 +2839,7 @@ int32 doTutorial(ItemObj* lara, int32 track); void sndInit(); void sndInitSamples(); void sndFreeSamples(); -void sndFill(uint8* buffer, int32 count); +void sndFill(int8* buffer, int32 count); void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode); void sndPlayTrack(int32 track); bool sndTrackIsPlaying(); diff --git a/src/platform/gba/asm/common_asm.inc b/src/platform/gba/asm/common_asm.inc index 3ed7fbc3..fc3bb230 100644 --- a/src/platform/gba/asm/common_asm.inc +++ b/src/platform/gba/asm/common_asm.inc @@ -94,6 +94,9 @@ .equ MIN_INT32, 0x80000000 .equ MAX_INT32, 0x7FFFFFFF +.equ SND_VOL_SHIFT, 6 +.equ SND_FIXED_SHIFT, 8 + // res = divTable[x] (uint16) .macro divLUT res, x add \res, \x, #DIVLUT_ADDR diff --git a/src/platform/gba/asm/sndIMA.s b/src/platform/gba/asm/sndIMA.s index 35e2457b..18ee918a 100644 --- a/src/platform/gba/asm/sndIMA.s +++ b/src/platform/gba/asm/sndIMA.s @@ -10,9 +10,8 @@ stepLUT .req r6 step .req r7 n .req r8 index .req r9 -outA .req r12 -outB .req lr -tmp .req outB +out .req r12 +tmp .req out IMA_STEP_SIZE = 88 @@ -33,12 +32,12 @@ IMA_STEP_SIZE = 88 cmpgt idx, #IMA_STEP_SIZE movgt idx, #IMA_STEP_SIZE - mov \out, smp, asr #2 + mov \out, smp, asr #(2 + SND_VOL_SHIFT) .endm .global sndIMA_asm sndIMA_asm: - stmfd sp!, {r4-r9, lr} + stmfd sp!, {r4-r9} ldmia state, {smp, idx} @@ -47,18 +46,18 @@ sndIMA_asm: .loop: ldrb n, [data], #1 - decode4 n, outA - - mov n, n, lsr #4 + decode4 n, out + strb out, [buffer], #1 - decode4 n, outB + mov n, n, lsr #4 - stmia buffer!, {outA, outB} + decode4 n, out + strb out, [buffer], #1 subs size, #1 bne .loop stmia state, {smp, idx} - ldmfd sp!, {r4-r9, lr} + ldmfd sp!, {r4-r9} bx lr diff --git a/src/platform/gba/asm/sndPCM.s b/src/platform/gba/asm/sndPCM.s index 97cd4021..83bfdf5d 100644 --- a/src/platform/gba/asm/sndPCM.s +++ b/src/platform/gba/asm/sndPCM.s @@ -13,34 +13,53 @@ ampB .req r8 outA .req r9 outB .req r12 last .req count -tmp .req outB +tmpSP .req outB +tmp .req ampA + +.macro clamp amp + // Vanadium's clamp trick (-128..127) + mov tmp, \amp, asr #31 // tmp <- 0xffffffff + cmp tmp, \amp, asr #7 // not equal + eorne \amp, tmp, #0x7F // amp <- 0xffffff80 +.endm .global sndPCM_asm sndPCM_asm: - mov tmp, sp + mov tmpSP, sp stmfd sp!, {r4-r9} - ldmia tmp, {data, buffer, count} + ldmia tmpSP, {data, buffer, count} mla last, inc, count, pos cmp last, size movgt last, size .loop: - ldrb ampA, [data, pos, lsr #8] + ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT] add pos, pos, inc - ldrb ampB, [data, pos, lsr #8] + ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT] add pos, pos, inc - cmp pos, last + // can't use signed PCM because of LDRSB restrictions sub ampA, ampA, #128 sub ampB, ampB, #128 - ldmia buffer, {outA, outB} - mla outA, volume, ampA, outA - mla outB, volume, ampB, outB - stmia buffer!, {outA, outB} + mul ampA, volume + mul ampB, volume + + ldrsb outA, [buffer, #0] + ldrsb outB, [buffer, #1] + + add outA, ampA, asr #SND_VOL_SHIFT + add outB, ampB, asr #SND_VOL_SHIFT + clamp outA + clamp outB + + strb outA, [buffer], #1 + strb outB, [buffer], #1 + + cmp pos, last blt .loop .done: diff --git a/src/platform/gba/asm/sndWrite.s b/src/platform/gba/asm/sndWrite.s deleted file mode 100644 index a57b6e44..00000000 --- a/src/platform/gba/asm/sndWrite.s +++ /dev/null @@ -1,44 +0,0 @@ -#include "common_asm.inc" - -buffer .req r0 -count .req r1 -data .req r2 -vA .req r3 -vB .req r4 -vC .req r5 -vD .req r12 - -SND_VOL_SHIFT = 6 - -.macro encode amp - mov \amp, \amp, asr #SND_VOL_SHIFT - cmp \amp, #-128 - movlt \amp, #-128 - cmp \amp, #127 - movgt \amp, #127 -.endm - -.global sndWrite_asm -sndWrite_asm: - stmfd sp!, {r4-r5} -.loop: - ldmia data!, {vA, vB, vC, vD} - - encode vA - encode vB - encode vC - encode vD - - and vA, vA, #0xFF - and vB, vB, #0xFF - and vC, vC, #0xFF - orr vA, vA, vB, lsl #8 - orr vA, vA, vC, lsl #16 - orr vA, vA, vD, lsl #24 - str vA, [buffer], #4 - - subs count, #4 - bne .loop - - ldmfd sp!, {r4-r5} - bx lr diff --git a/src/platform/gba/main.cpp b/src/platform/gba/main.cpp index 48417998..932d4572 100644 --- a/src/platform/gba/main.cpp +++ b/src/platform/gba/main.cpp @@ -95,7 +95,7 @@ bool osLoadGame() void osJoyVibrate(int32 index, int32 L, int32 R) {} -extern uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt +extern int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt HWAVEOUT waveOut; WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) }; @@ -123,7 +123,7 @@ void soundFill() { WAVEHDR *waveHdr = waveBuf + curSoundBuffer; waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR)); - sndFill((uint8*)waveHdr->lpData, SND_SAMPLES); + sndFill((int8*)waveHdr->lpData, SND_SAMPLES); waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR)); waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR)); curSoundBuffer ^= 1; @@ -482,7 +482,7 @@ void updateInput() if (key_is_down(KEY_SELECT)) keys |= IK_SELECT; } -extern uint8* soundBuffer; +extern int8 soundBuffer[2 * SND_SAMPLES + 32]; void soundInit() { diff --git a/src/platform/gba/sound.cpp b/src/platform/gba/sound.cpp index 46224798..9d7c0b50 100644 --- a/src/platform/gba/sound.cpp +++ b/src/platform/gba/sound.cpp @@ -17,30 +17,23 @@ int32 IMA_STEP[] = { // IWRAM ! #if defined(__GBA__) && defined(USE_ASM) extern const uint8_t TRACKS_IMA[]; - // the sound mixer works during VBlank, this is a great opportunity for exclusive access to VRAM without any perf penalties - // so we use part of offscreen VRAM as sound buffers (704 + 384 = 1088 bytes) - int32* mixerBuffer = (int32*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT); - uint8* soundBuffer = (uint8*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT + SND_SAMPLES * sizeof(int32)); // use 2k of VRAM after the first frame buffer as sound buffer #else extern const void* TRACKS_IMA; - int32 mixerBuffer[SND_SAMPLES]; - uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt #endif +int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt + #ifdef USE_ASM #define sndIMA sndIMA_asm #define sndPCM sndPCM_asm - #define sndWrite sndWrite_asm extern "C" { - void sndIMA_asm(IMA_STATE &state, int32* buffer, const uint8* data, int32 size); - int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count); - void sndWrite_asm(uint8* buffer, int32 count, int32 *data); + void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size); + int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count); } #else #define sndIMA sndIMA_c #define sndPCM sndPCM_c - #define sndWrite sndWrite_c #define DECODE_IMA_4(n)\ step = IMA_STEP[idx];\ @@ -56,14 +49,16 @@ int32 IMA_STEP[] = { // IWRAM ! } else {\ smp += step >> 3;\ }\ - *buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT)); + amp = smp >> 8;\ + *buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX)); -void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size) +void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size) { uint32 step, index; int32 smp = state.smp; int32 idx = state.idx; + int32 amp; for (int32 i = 0; i < size; i++) { @@ -77,7 +72,7 @@ void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size) state.idx = idx; } -int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count) +int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count) { int32 last = pos + count * inc; if (last > size) { @@ -86,24 +81,15 @@ int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data while (pos < last) { - *buffer++ += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume; + int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT); + *buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX)); pos += inc; } return pos; } - -void sndWrite_c(uint8* buffer, int32 count, int32 *data) -{ - for (int32 i = 0; i < count; i++) - { - int32 samp = X_CLAMP(data[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX); - buffer[i] = SND_ENCODE(samp); - } -} #endif - struct Music { const uint8* data; @@ -111,7 +97,7 @@ struct Music int32 pos; IMA_STATE state; - void fill(int32* buffer, int32 count) + void fill(int8* buffer, int32 count) { int32 len = X_MIN(size - pos, count >> 1); @@ -135,7 +121,7 @@ struct Sample int32 volume; const uint8* data; - void fill(int32* buffer, int32 count) + void fill(int8* buffer, int32 count) { pos = sndPCM(pos, inc, size, volume, data, buffer, count); @@ -276,7 +262,7 @@ void sndStop() music.data = NULL; } -void sndFill(uint8* buffer, int32 count) +void sndFill(int8* buffer, int32 count) { #ifdef PROFILE_SOUNDTIME PROFILE_CLEAR(); @@ -290,9 +276,9 @@ void sndFill(uint8* buffer, int32 count) } if (music.data) { - music.fill(mixerBuffer, count); + music.fill(buffer, count); } else { - dmaFill(mixerBuffer, 0, SND_SAMPLES * sizeof(int32)); + dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0])); } int32 ch = channelsCount; @@ -300,12 +286,10 @@ void sndFill(uint8* buffer, int32 count) { Sample* sample = channels + ch; - sample->fill(mixerBuffer, count); + sample->fill(buffer, count); if (!sample->data) { channels[ch] = channels[--channelsCount]; } } - - sndWrite(buffer, count, mixerBuffer); }