Skip to content

Commit

Permalink
#368 GBA sound mixing optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
XProger committed Nov 27, 2022
1 parent b60788e commit 590c7cf
Show file tree
Hide file tree
Showing 7 changed files with 63 additions and 102 deletions.
2 changes: 1 addition & 1 deletion src/fixed/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -2839,7 +2839,7 @@ int32 doTutorial(ItemObj* lara, int32 track);
void sndInit();
void sndInitSamples();
void sndFreeSamples();
void sndFill(uint8* buffer, int32 count);
void sndFill(int8* buffer, int32 count);
void* sndPlaySample(int32 index, int32 volume, int32 pitch, int32 mode);
void sndPlayTrack(int32 track);
bool sndTrackIsPlaying();
Expand Down
3 changes: 3 additions & 0 deletions src/platform/gba/asm/common_asm.inc
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,9 @@
.equ MIN_INT32, 0x80000000
.equ MAX_INT32, 0x7FFFFFFF

.equ SND_VOL_SHIFT, 6
.equ SND_FIXED_SHIFT, 8

// res = divTable[x] (uint16)
.macro divLUT res, x
add \res, \x, #DIVLUT_ADDR
Expand Down
21 changes: 10 additions & 11 deletions src/platform/gba/asm/sndIMA.s
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ stepLUT .req r6
step .req r7
n .req r8
index .req r9
outA .req r12
outB .req lr
tmp .req outB
out .req r12
tmp .req out

IMA_STEP_SIZE = 88

Expand All @@ -33,12 +32,12 @@ IMA_STEP_SIZE = 88
cmpgt idx, #IMA_STEP_SIZE
movgt idx, #IMA_STEP_SIZE

mov \out, smp, asr #2
mov \out, smp, asr #(2 + SND_VOL_SHIFT)
.endm

.global sndIMA_asm
sndIMA_asm:
stmfd sp!, {r4-r9, lr}
stmfd sp!, {r4-r9}

ldmia state, {smp, idx}

Expand All @@ -47,18 +46,18 @@ sndIMA_asm:
.loop:
ldrb n, [data], #1

decode4 n, outA

mov n, n, lsr #4
decode4 n, out
strb out, [buffer], #1

decode4 n, outB
mov n, n, lsr #4

stmia buffer!, {outA, outB}
decode4 n, out
strb out, [buffer], #1

subs size, #1
bne .loop

stmia state, {smp, idx}

ldmfd sp!, {r4-r9, lr}
ldmfd sp!, {r4-r9}
bx lr
39 changes: 29 additions & 10 deletions src/platform/gba/asm/sndPCM.s
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,53 @@ ampB .req r8
outA .req r9
outB .req r12
last .req count
tmp .req outB
tmpSP .req outB
tmp .req ampA

.macro clamp amp
// Vanadium's clamp trick (-128..127)
mov tmp, \amp, asr #31 // tmp <- 0xffffffff
cmp tmp, \amp, asr #7 // not equal
eorne \amp, tmp, #0x7F // amp <- 0xffffff80
.endm

.global sndPCM_asm
sndPCM_asm:
mov tmp, sp
mov tmpSP, sp
stmfd sp!, {r4-r9}

ldmia tmp, {data, buffer, count}
ldmia tmpSP, {data, buffer, count}

mla last, inc, count, pos
cmp last, size
movgt last, size

.loop:
ldrb ampA, [data, pos, lsr #8]
ldrb ampA, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
ldrb ampB, [data, pos, lsr #8]
ldrb ampB, [data, pos, lsr #SND_FIXED_SHIFT]
add pos, pos, inc
cmp pos, last

// can't use signed PCM because of LDRSB restrictions
sub ampA, ampA, #128
sub ampB, ampB, #128

ldmia buffer, {outA, outB}
mla outA, volume, ampA, outA
mla outB, volume, ampB, outB
stmia buffer!, {outA, outB}
mul ampA, volume
mul ampB, volume

ldrsb outA, [buffer, #0]
ldrsb outB, [buffer, #1]

add outA, ampA, asr #SND_VOL_SHIFT
add outB, ampB, asr #SND_VOL_SHIFT

clamp outA
clamp outB

strb outA, [buffer], #1
strb outB, [buffer], #1

cmp pos, last
blt .loop

.done:
Expand Down
44 changes: 0 additions & 44 deletions src/platform/gba/asm/sndWrite.s

This file was deleted.

6 changes: 3 additions & 3 deletions src/platform/gba/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ bool osLoadGame()

void osJoyVibrate(int32 index, int32 L, int32 R) {}

extern uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
extern int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt

HWAVEOUT waveOut;
WAVEFORMATEX waveFmt = { WAVE_FORMAT_PCM, 1, SND_OUTPUT_FREQ, SND_OUTPUT_FREQ, 1, 8, sizeof(waveFmt) };
Expand Down Expand Up @@ -123,7 +123,7 @@ void soundFill()
{
WAVEHDR *waveHdr = waveBuf + curSoundBuffer;
waveOutUnprepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
sndFill((uint8*)waveHdr->lpData, SND_SAMPLES);
sndFill((int8*)waveHdr->lpData, SND_SAMPLES);
waveOutPrepareHeader(waveOut, waveHdr, sizeof(WAVEHDR));
waveOutWrite(waveOut, waveHdr, sizeof(WAVEHDR));
curSoundBuffer ^= 1;
Expand Down Expand Up @@ -482,7 +482,7 @@ void updateInput()
if (key_is_down(KEY_SELECT)) keys |= IK_SELECT;
}

extern uint8* soundBuffer;
extern int8 soundBuffer[2 * SND_SAMPLES + 32];

void soundInit()
{
Expand Down
50 changes: 17 additions & 33 deletions src/platform/gba/sound.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,23 @@ int32 IMA_STEP[] = { // IWRAM !

#if defined(__GBA__) && defined(USE_ASM)
extern const uint8_t TRACKS_IMA[];
// the sound mixer works during VBlank, this is a great opportunity for exclusive access to VRAM without any perf penalties
// so we use part of offscreen VRAM as sound buffers (704 + 384 = 1088 bytes)
int32* mixerBuffer = (int32*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT);
uint8* soundBuffer = (uint8*)(MEM_VRAM + VRAM_PAGE_SIZE + FRAME_WIDTH * FRAME_HEIGHT + SND_SAMPLES * sizeof(int32)); // use 2k of VRAM after the first frame buffer as sound buffer
#else
extern const void* TRACKS_IMA;
int32 mixerBuffer[SND_SAMPLES];
uint8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt
#endif

int8 soundBuffer[2 * SND_SAMPLES + 32]; // 32 bytes of silence for DMA overrun while interrupt

#ifdef USE_ASM
#define sndIMA sndIMA_asm
#define sndPCM sndPCM_asm
#define sndWrite sndWrite_asm

extern "C" {
void sndIMA_asm(IMA_STATE &state, int32* buffer, const uint8* data, int32 size);
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count);
void sndWrite_asm(uint8* buffer, int32 count, int32 *data);
void sndIMA_asm(IMA_STATE &state, int8* buffer, const uint8* data, int32 size);
int32 sndPCM_asm(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count);
}
#else
#define sndIMA sndIMA_c
#define sndPCM sndPCM_c
#define sndWrite sndWrite_c

#define DECODE_IMA_4(n)\
step = IMA_STEP[idx];\
Expand All @@ -56,14 +49,16 @@ int32 IMA_STEP[] = { // IWRAM !
} else {\
smp += step >> 3;\
}\
*buffer++ = smp >> (16 - (8 + SND_VOL_SHIFT));
amp = smp >> 8;\
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));

void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
void sndIMA_c(IMA_STATE &state, int8* buffer, const uint8* data, int32 size)
{
uint32 step, index;

int32 smp = state.smp;
int32 idx = state.idx;
int32 amp;

for (int32 i = 0; i < size; i++)
{
Expand All @@ -77,7 +72,7 @@ void sndIMA_c(IMA_STATE &state, int32* buffer, const uint8* data, int32 size)
state.idx = idx;
}

int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int32* buffer, int32 count)
int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data, int8* buffer, int32 count)
{
int32 last = pos + count * inc;
if (last > size) {
Expand All @@ -86,32 +81,23 @@ int32 sndPCM_c(int32 pos, int32 inc, int32 size, int32 volume, const uint8* data

while (pos < last)
{
*buffer++ += SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume;
int32 amp = SND_DECODE(*buffer) + ((SND_DECODE(data[pos >> SND_FIXED_SHIFT]) * volume) >> SND_VOL_SHIFT);
*buffer++ = SND_ENCODE(X_CLAMP(amp, SND_MIN, SND_MAX));
pos += inc;
}

return pos;
}

void sndWrite_c(uint8* buffer, int32 count, int32 *data)
{
for (int32 i = 0; i < count; i++)
{
int32 samp = X_CLAMP(data[i] >> SND_VOL_SHIFT, SND_MIN, SND_MAX);
buffer[i] = SND_ENCODE(samp);
}
}
#endif


struct Music
{
const uint8* data;
int32 size;
int32 pos;
IMA_STATE state;

void fill(int32* buffer, int32 count)
void fill(int8* buffer, int32 count)
{
int32 len = X_MIN(size - pos, count >> 1);

Expand All @@ -135,7 +121,7 @@ struct Sample
int32 volume;
const uint8* data;

void fill(int32* buffer, int32 count)
void fill(int8* buffer, int32 count)
{
pos = sndPCM(pos, inc, size, volume, data, buffer, count);

Expand Down Expand Up @@ -276,7 +262,7 @@ void sndStop()
music.data = NULL;
}

void sndFill(uint8* buffer, int32 count)
void sndFill(int8* buffer, int32 count)
{
#ifdef PROFILE_SOUNDTIME
PROFILE_CLEAR();
Expand All @@ -290,22 +276,20 @@ void sndFill(uint8* buffer, int32 count)
}

if (music.data) {
music.fill(mixerBuffer, count);
music.fill(buffer, count);
} else {
dmaFill(mixerBuffer, 0, SND_SAMPLES * sizeof(int32));
dmaFill(buffer, 0, SND_SAMPLES * sizeof(buffer[0]));
}

int32 ch = channelsCount;
while (ch--)
{
Sample* sample = channels + ch;

sample->fill(mixerBuffer, count);
sample->fill(buffer, count);

if (!sample->data) {
channels[ch] = channels[--channelsCount];
}
}

sndWrite(buffer, count, mixerBuffer);
}

0 comments on commit 590c7cf

Please sign in to comment.