Audio pipeline buffers to std::span (#3276)

* remove addAudio * Spans in audio_engine * Metronome to span * AbsoluteValue to span * Compressor to span * Stutterer to span * Granular processor to span * SampleRecorder to span * Song to std::span * ModControllableAudio signatures to std::span * Refactor ModControllableAudio to use element-wise for loops * GlobalEffectable parameters to std::span * ModFX to span (and minor parameter refactoring) * Output::renderOutput to std::span * Sound::render parameter to std::span * Refactor Sound::render() around std::span * GlobalEffectableForClip::render parameter to std::span * Refactor GlobalEffectableForClip::renderOutput to utilize std::span * renderGlobalEffectableForClip parameter to std::span * Refactor audio_output use of std::span * AudioClip::render signature to std::span * Add span include to GranularProcessor.h * span include for AbsValueFollower * include span in stutterer * include span in modfxprocessor
SynthstromAudible · Jan 18, 2025 · f8a9c57 · f8a9c57
1 parent c235c3e
commit f8a9c57
Show file tree

Hide file tree

Showing 39 changed files with 590 additions and 768 deletions.
diff --git a/src/deluge/dsp/compressor/rms_feedback.cpp b/src/deluge/dsp/compressor/rms_feedback.cpp
@@ -17,7 +17,7 @@
 
 #include "dsp/compressor/rms_feedback.h"
 #include "util/fixedpoint.h"
-StereoSample dryBuffer[SSI_TX_BUFFER_NUM_SAMPLES]{0};
+std::array<StereoSample, SSI_TX_BUFFER_NUM_SAMPLES> dryBuffer;
 
 RMSFeedbackCompressor::RMSFeedbackCompressor() {
 	setAttack(5 << 24);
@@ -49,34 +49,34 @@ void RMSFeedbackCompressor::updateER(float numSamples, q31_t finalVolume) {
 	er = runEnvelope(lastER, er, numSamples);
 }
 /// This renders at a 'neutral' volume, so that at threshold zero the volume in unchanged
-void RMSFeedbackCompressor::renderVolNeutral(StereoSample* buffer, uint16_t numSamples, q31_t finalVolume) {
+void RMSFeedbackCompressor::renderVolNeutral(std::span<StereoSample> buffer, q31_t finalVolume) {
 	// this is a bit gross - the compressor can inherently apply volume changes, but in the case of the per clip
 	// compressor that's already been handled by the reverb send, and the logic there is tightly coupled such that
 	// I couldn't extract correct volume levels from it.
-	render(buffer, numSamples, 1 << 27, 1 << 27, finalVolume >> 3);
+	render(buffer, 1 << 27, 1 << 27, finalVolume >> 3);
 }
 constexpr uint8_t saturationAmount = 3;
-void RMSFeedbackCompressor::render(StereoSample* buffer, uint16_t numSamples, q31_t volAdjustL, q31_t volAdjustR,
+void RMSFeedbackCompressor::render(std::span<StereoSample> buffer, q31_t volAdjustL, q31_t volAdjustR,
                                    q31_t finalVolume) {
 	// make a copy for blending if we need to
 	if (wet != ONE_Q31) {
-		memcpy(dryBuffer, buffer, numSamples * sizeof(StereoSample));
+		memcpy(dryBuffer.data(), buffer.data(), buffer.size_bytes());
 	}
 
 	if (!onLastTime) {
 		// sets the "working level" for interpolation and anti aliasing
 		lastSaturationTanHWorkingValue[0] =
-		    (uint32_t)lshiftAndSaturateUnknown(buffer->l, saturationAmount) + 2147483648u;
+		    (uint32_t)lshiftAndSaturateUnknown(buffer[0].l, saturationAmount) + 2147483648u;
 		lastSaturationTanHWorkingValue[1] =
-		    (uint32_t)lshiftAndSaturateUnknown(buffer->r, saturationAmount) + 2147483648u;
+		    (uint32_t)lshiftAndSaturateUnknown(buffer[0].r, saturationAmount) + 2147483648u;
 		onLastTime = true;
 	}
 	// we update this every time since we won't know if the song volume changed
-	updateER(numSamples, finalVolume);
+	updateER(buffer.size(), finalVolume);
 
 	float over = std::max<float>(0, (rms - threshdb));
 
-	state = runEnvelope(state, over, numSamples);
+	state = runEnvelope(state, over, buffer.size());
 
 	float reduction = -state * fraction;
 
@@ -93,40 +93,38 @@ void RMSFeedbackCompressor::render(StereoSample* buffer, uint16_t numSamples, q3
 	float finalVolumeR = gain * float(volAdjustR >> 9);
 
 	// The amount we need to step the current volume so that by the end of the rendering window
-	q31_t amplitudeIncrementL = ((int32_t)((finalVolumeL - (currentVolumeL >> 8)) / float(numSamples))) << 8;
-	q31_t amplitudeIncrementR = ((int32_t)((finalVolumeR - (currentVolumeR >> 8)) / float(numSamples))) << 8;
+	q31_t amplitudeIncrementL = ((int32_t)((finalVolumeL - (currentVolumeL >> 8)) / float(buffer.size()))) << 8;
+	q31_t amplitudeIncrementR = ((int32_t)((finalVolumeR - (currentVolumeR >> 8)) / float(buffer.size()))) << 8;
 
-	StereoSample* thisSample = buffer;
-	StereoSample* drySample = dryBuffer;
-	StereoSample* bufferEnd = buffer + numSamples;
-
-	do {
+	auto dry_it = dryBuffer.begin();
+	for (StereoSample& sample : buffer) {
 		currentVolumeL += amplitudeIncrementL;
 		currentVolumeR += amplitudeIncrementR;
+
 		// Need to shift left by 4 because currentVolumeL is a 5.26 signed number rather than a 1.30 signed.
-		thisSample->l = multiply_32x32_rshift32(thisSample->l, currentVolumeL) << 4;
-		thisSample->l = getTanHAntialiased(thisSample->l, &lastSaturationTanHWorkingValue[0], saturationAmount);
+		sample.l = multiply_32x32_rshift32(sample.l, currentVolumeL) << 4;
+		sample.l = getTanHAntialiased(sample.l, &lastSaturationTanHWorkingValue[0], saturationAmount);
 
-		thisSample->r = multiply_32x32_rshift32(thisSample->r, currentVolumeR) << 4;
-		thisSample->r = getTanHAntialiased(thisSample->r, &lastSaturationTanHWorkingValue[1], saturationAmount);
+		sample.r = multiply_32x32_rshift32(sample.r, currentVolumeR) << 4;
+		sample.r = getTanHAntialiased(sample.r, &lastSaturationTanHWorkingValue[1], saturationAmount);
 		// wet/dry blend
 		if (wet != ONE_Q31) {
-			thisSample->l = multiply_32x32_rshift32(thisSample->l, wet);
-			thisSample->l = multiply_accumulate_32x32_rshift32_rounded(thisSample->l, drySample->l, dry);
-			thisSample->l <<= 1; // correct for the two multiplications
+			sample.l = multiply_32x32_rshift32(sample.l, wet);
+			sample.l = multiply_accumulate_32x32_rshift32_rounded(sample.l, dry_it->l, dry);
+			sample.l <<= 1; // correct for the two multiplications
 			// same for r because StereoSample is a dumb class
-			thisSample->r = multiply_32x32_rshift32(thisSample->r, wet);
-			thisSample->r = multiply_accumulate_32x32_rshift32_rounded(thisSample->r, drySample->r, dry);
-			thisSample->r <<= 1;
-			++drySample; // this is a little gross but it's fine
+			sample.r = multiply_32x32_rshift32(sample.r, wet);
+			sample.r = multiply_accumulate_32x32_rshift32_rounded(sample.r, dry_it->r, dry);
+			sample.r <<= 1;
+			++dry_it; // this is a little gross but it's fine
 		}
+	}
 
-	} while (++thisSample != bufferEnd);
 	// for LEDs
 	// 4 converts to dB, then quadrupled for display range since a 30db reduction is basically killing the signal
 	gainReduction = std::clamp<int32_t>(-(reduction) * 4 * 4, 0, 127);
 	// calc compression for next round (feedback compressor)
-	rms = calcRMS(buffer, numSamples);
+	rms = calcRMS(buffer);
 }
 
 float RMSFeedbackCompressor::runEnvelope(float current, float desired, float numSamples) const {
@@ -142,21 +140,19 @@ float RMSFeedbackCompressor::runEnvelope(float current, float desired, float num
 
 // output range is 0-21 (2^31)
 // dac clipping is at 16
-float RMSFeedbackCompressor::calcRMS(StereoSample* buffer, uint16_t numSamples) {
-	StereoSample* thisSample = buffer;
-	StereoSample* bufferEnd = buffer + numSamples;
+float RMSFeedbackCompressor::calcRMS(std::span<StereoSample> buffer) {
 	q31_t sum = 0;
 	q31_t offset = 0; // to remove dc offset
 	float lastMean = mean;
-	do {
-		q31_t l = thisSample->l - hpfL.doFilter(thisSample->l, hpfA_);
-		q31_t r = thisSample->r - hpfL.doFilter(thisSample->r, hpfA_);
+
+	for (StereoSample sample : buffer) {
+		q31_t l = sample.l - hpfL.doFilter(sample.l, hpfA_);
+		q31_t r = sample.r - hpfL.doFilter(sample.r, hpfA_);
 		q31_t s = std::max(std::abs(l), std::abs(r));
 		sum += multiply_32x32_rshift32(s, s);
+	}
 
-	} while (++thisSample != bufferEnd);
-
-	float ns = float(numSamples * 2);
+	float ns = buffer.size() * 2;
 	mean = (float(sum) / ONE_Q31f) / ns;
 	// warning this is not good math but it's pretty close and way cheaper than doing it properly
 	// good math would use a long FIR, this is a one pole IIR instead

diff --git a/src/deluge/dsp/compressor/rms_feedback.h b/src/deluge/dsp/compressor/rms_feedback.h
@@ -21,6 +21,7 @@
 #include "dsp/filter/ladder_components.h"
 #include "dsp/stereo_sample.h"
 #include <cmath>
+#include <span>
 
 class [[gnu::hot]] RMSFeedbackCompressor {
 public:
@@ -54,12 +55,12 @@ class [[gnu::hot]] RMSFeedbackCompressor {
 	/// @param volAdjustL Linear gain to apply to the left channel as a 4.27 signed fixed point number.
 	/// @param volAdjustL Linear gain to apply to the right channel as a 4.27 signed fixed point number.
 	/// @param finalVolume Linear peak-to-peak volume scale, as a 3.29 fixed-point integer.
-	void render(StereoSample* buffer, uint16_t numSamples, q31_t volAdjustL, q31_t volAdjustR, q31_t finalVolume);
+	void render(std::span<StereoSample> buffer, q31_t volAdjustL, q31_t volAdjustR, q31_t finalVolume);
 
 	/// Render the compressor with neutral left/right gain and with the finalVolume tweaked so the compressor applies
 	/// 0db gain change at theshold zero. Used by the per-clip compressors because the clip volume is applied without
 	/// the compressor being involved.
-	void renderVolNeutral(StereoSample* buffer, uint16_t numSamples, q31_t finalVolume);
+	void renderVolNeutral(std::span<StereoSample> buffer, q31_t finalVolume);
 
 	/// Compute an updated envelope value, using the attack time constant if desired > current and the release time
 	/// constant otherwise.
@@ -169,7 +170,7 @@ class [[gnu::hot]] RMSFeedbackCompressor {
 	void updateER(float numSamples, q31_t finalVolume);
 
 	/// Calculate the RMS amplitude, post internal HPF, of the samples.
-	float calcRMS(StereoSample* buffer, uint16_t numSamples);
+	float calcRMS(std::span<StereoSample> buffer);
 
 	/// Amount of gain reduction applied during the last render pass, in 6.2 fixed point decibels
 	uint8_t gainReduction = 0;

diff --git a/src/deluge/dsp/envelope_follower/absolute_value.cpp b/src/deluge/dsp/envelope_follower/absolute_value.cpp
@@ -31,32 +31,30 @@ float AbsValueFollower::runEnvelope(float current, float desired, float numSampl
 
 // output range is 0-21 (2^31)
 // dac clipping is at 16
-StereoFloatSample AbsValueFollower::calcApproxRMS(StereoSample* buffer, uint16_t numSamples) {
-	StereoSample* thisSample = buffer;
-	StereoSample* bufferEnd = buffer + numSamples;
+StereoFloatSample AbsValueFollower::calcApproxRMS(std::span<StereoSample> buffer) {
 	q31_t l = 0;
 	q31_t r = 0;
 	StereoFloatSample logMean;
 
-	do {
-		l += std::abs(thisSample->l);
-		r += std::abs(thisSample->r);
-	} while (++thisSample != bufferEnd);
+	for (StereoSample sample : buffer) {
+		l += std::abs(sample.l);
+		r += std::abs(sample.r);
+	}
 
-	auto ns = float(numSamples);
-	meanL = (float(l)) / ns;
-	meanR = (float(r)) / ns;
+	float ns = buffer.size();
+	meanL = l / ns;
+	meanR = r / ns;
 	// warning this is not good math but it's pretty close and way cheaper than doing it properly
 	// good math would use a long FIR, this is a one pole IIR instead
 	// the more samples we have, the more weight we put on the current mean to avoid response slowing down
 	// at high cpu loads
 	meanL = (meanL * ns + lastMeanL) / (1 + ns);
 	meanR = (meanR * ns + lastMeanR) / (1 + ns);
 
-	lastMeanL = runEnvelope(lastMeanL, meanL, numSamples);
+	lastMeanL = runEnvelope(lastMeanL, meanL, ns);
 	logMean.l = std::log(lastMeanL + 1e-24f);
 
-	lastMeanR = runEnvelope(lastMeanR, meanR, numSamples);
+	lastMeanR = runEnvelope(lastMeanR, meanR, ns);
 	logMean.r = std::log(lastMeanR + 1e-24f);
 
 	return logMean;

diff --git a/src/deluge/dsp/envelope_follower/absolute_value.h b/src/deluge/dsp/envelope_follower/absolute_value.h
@@ -19,6 +19,8 @@
 
 #include "dsp/stereo_sample.h"
 #include <cmath>
+#include <span>
+
 class AbsValueFollower {
 public:
 	AbsValueFollower() = default;
@@ -53,7 +55,7 @@ class AbsValueFollower {
 		return releaseMS;
 	};
 
-	StereoFloatSample calcApproxRMS(StereoSample* buffer, uint16_t numSamples);
+	StereoFloatSample calcApproxRMS(std::span<StereoSample> buffer);
 
 private:
 	float runEnvelope(float current, float desired, float numSamples);

diff --git a/src/deluge/dsp/granular/GranularProcessor.cpp b/src/deluge/dsp/granular/GranularProcessor.cpp
@@ -43,8 +43,8 @@ void GranularProcessor::setWrapsToShutdown() {
 	grainBuffer->inUse = true;
 }
 
-void GranularProcessor::processGrainFX(StereoSample* buffer, int32_t grainRate, int32_t grainMix, int32_t grainDensity,
-                                       int32_t pitchRandomness, int32_t* postFXVolume, const StereoSample* bufferEnd,
+void GranularProcessor::processGrainFX(std::span<StereoSample> buffer, int32_t grainRate, int32_t grainMix,
+                                       int32_t grainDensity, int32_t pitchRandomness, int32_t* postFXVolume,
                                        bool anySoundComingIn, float tempoBPM, q31_t reverbAmount) {
 	if (anySoundComingIn || wrapsToShutdown >= 0) {
 		if (anySoundComingIn) {
@@ -57,23 +57,24 @@ void GranularProcessor::processGrainFX(StereoSample* buffer, int32_t grainRate,
 			}
 		}
 		setupGrainFX(grainRate, grainMix, grainDensity, pitchRandomness, postFXVolume, tempoBPM);
-		StereoSample* currentSample = buffer;
 		int i = 0;
-		do {
-			StereoSample grainWet = processOneGrainSample(currentSample);
+		for (StereoSample& sample : buffer) {
+			StereoSample grainWet = processOneGrainSample(sample);
 			auto wetl = q31_mult(grainWet.l, _grainVol);
 			auto wetr = q31_mult(grainWet.r, _grainVol);
+
 			// filter slightly - one pole at 12ish khz
 			wetl = lpf_l.doFilter(wetl, 1 << 29);
 			wetr = lpf_r.doFilter(wetr, 1 << 29);
+
 			// WET and DRY Vol
-			currentSample->l = add_saturation(q31_mult(currentSample->l, _grainDryVol), wetl);
-			currentSample->r = add_saturation(q31_mult(currentSample->r, _grainDryVol), wetr);
+			sample.l = add_saturation(q31_mult(sample.l, _grainDryVol), wetl);
+			sample.r = add_saturation(q31_mult(sample.r, _grainDryVol), wetr);
+
 			// adding a small amount of extra reverb covers a lot of the granular artifacts
 			AudioEngine::feedReverbBackdoorForGrain(i, q31_mult((wetl + wetr), reverbAmount));
 			i += 1;
-
-		} while (++currentSample != bufferEnd);
+		}
 
 		if (wrapsToShutdown < 0) {
 			grainBuffer->inUse = false;
@@ -123,7 +124,7 @@ void GranularProcessor::setupGrainFX(int32_t grainRate, int32_t grainMix, int32_
 		_grainFeedbackVol = _grainVol >> 1;
 	}
 }
-StereoSample GranularProcessor::processOneGrainSample(StereoSample* currentSample) {
+StereoSample GranularProcessor::processOneGrainSample(StereoSample currentSample) {
 	if (bufferWriteIndex >= kModFXGrainBufferSize) {
 		bufferWriteIndex = 0;
 		wrapsToShutdown -= 1;
@@ -163,9 +164,9 @@ StereoSample GranularProcessor::processOneGrainSample(StereoSample* currentSampl
 	grains_r <<= 3;
 	// Feedback (Below grainFeedbackVol means "grainVol >> 4")
 	(*grainBuffer)[writeIndex].l =
-	    multiply_accumulate_32x32_rshift32_rounded(currentSample->l, grains_l, _grainFeedbackVol);
+	    multiply_accumulate_32x32_rshift32_rounded(currentSample.l, grains_l, _grainFeedbackVol);
 	(*grainBuffer)[writeIndex].r =
-	    multiply_accumulate_32x32_rshift32_rounded(currentSample->r, grains_r, _grainFeedbackVol);
+	    multiply_accumulate_32x32_rshift32_rounded(currentSample.r, grains_r, _grainFeedbackVol);
 
 	bufferWriteIndex++;
 	return StereoSample{grains_l, grains_r};

diff --git a/src/deluge/dsp/granular/GranularProcessor.h b/src/deluge/dsp/granular/GranularProcessor.h
@@ -24,6 +24,7 @@
 #include "dsp/stereo_sample.h"
 #include "memory/stealable.h"
 #include "modulation/lfo.h"
+#include <span>
 
 class UnpatchedParamSet;
 
@@ -53,17 +54,17 @@ class GranularProcessor {
 	void startSkippingRendering();
 
 	/// preset is currently converted from a param to a 0-4 preset inside the grain, which is probably not great
-	void processGrainFX(StereoSample* buffer, int32_t grainRate, int32_t grainMix, int32_t grainDensity,
-	                    int32_t pitchRandomness, int32_t* postFXVolume, const StereoSample* bufferEnd,
-	                    bool anySoundComingIn, float tempoBPM, q31_t reverbAmount);
+	void processGrainFX(std::span<StereoSample> buffer, int32_t grainRate, int32_t grainMix, int32_t grainDensity,
+	                    int32_t pitchRandomness, int32_t* postFXVolume, bool anySoundComingIn, float tempoBPM,
+	                    q31_t reverbAmount);
 
 	void clearGrainFXBuffer();
 	void grainBufferStolen() { grainBuffer = nullptr; }
 
 private:
 	void setupGrainFX(int32_t grainRate, int32_t grainMix, int32_t grainDensity, int32_t pitchRandomness,
 	                  int32_t* postFXVolume, float timePerInternalTick);
-	StereoSample processOneGrainSample(StereoSample* currentSample);
+	StereoSample processOneGrainSample(StereoSample currentSample);
 	void getBuffer();
 	void setWrapsToShutdown();
 	void setupGrainsIfNeeded(int32_t writeIndex);

diff --git a/src/deluge/dsp/stereo_sample.h b/src/deluge/dsp/stereo_sample.h
@@ -23,6 +23,13 @@
 #include "util/functions.h"
 
 struct StereoSample {
+	[[gnu::always_inline]] static constexpr StereoSample fromMono(q31_t sampleValue) {
+		return StereoSample{
+		    .l = sampleValue,
+		    .r = sampleValue,
+		};
+	}
+
 	inline void addMono(q31_t sampleValue) {
 		l += sampleValue;
 		r += sampleValue;

diff --git a/src/deluge/model/clip/audio_clip.cpp b/src/deluge/model/clip/audio_clip.cpp
@@ -38,6 +38,7 @@
 #include "processing/audio_output.h"
 #include "processing/engines/audio_engine.h"
 #include "storage/storage_manager.h"
+#include "util/fixedpoint.h"
 #include <new>
 
 namespace params = deluge::modulation::params;
@@ -543,8 +544,8 @@ int64_t AudioClip::getNumSamplesTilLoop(ModelStackWithTimelineCounter* modelStac
 	return loopTime - AudioEngine::audioSampleTimer;
 }
 
-void AudioClip::render(ModelStackWithTimelineCounter* modelStack, int32_t* outputBuffer, int32_t numSamples,
-                       int32_t amplitude, int32_t amplitudeIncrement, int32_t pitchAdjust) {
+void AudioClip::render(ModelStackWithTimelineCounter* modelStack, std::span<q31_t> outputBuffer, int32_t amplitude,
+                       int32_t amplitudeIncrement, int32_t pitchAdjust) {
 
 	if (!voiceSample) {
 		return;
@@ -554,7 +555,7 @@ void AudioClip::render(ModelStackWithTimelineCounter* modelStack, int32_t* outpu
 
 	// First, if we're still attempting to do a "late start", see if we can do that (perhaps not if relevant audio data
 	// hasn't loaded yet)
-	if (doingLateStart && ((AudioOutput*)output)->envelope.state < EnvelopeStage::FAST_RELEASE) {
+	if (doingLateStart && static_cast<AudioOutput*>(this->output)->envelope.state < EnvelopeStage::FAST_RELEASE) {
 		uint64_t numSamplesIn = guide.getSyncedNumSamplesIn();
 
 		LateStartAttemptStatus result = voiceSample->attemptLateSampleStart(&guide, sample, numSamplesIn);
@@ -726,7 +727,7 @@ void AudioClip::render(ModelStackWithTimelineCounter* modelStack, int32_t* outpu
 		// forgot?) It's perhaps a little bit surprising, but this even works and sounds perfect (you never hear any of
 		// the margin) when time-stretching is happening! Down to about half speed. Below that, you hear some of the
 		// margin.
-		if (((AudioOutput*)output)->envelope.state < EnvelopeStage::FAST_RELEASE) {
+		if (static_cast<AudioOutput*>(this->output)->envelope.state < EnvelopeStage::FAST_RELEASE) {
 
 			ModelStackWithNoteRow* modelStackWithNoteRow = modelStack->addNoteRow(0, nullptr);
 
@@ -739,7 +740,7 @@ void AudioClip::render(ModelStackWithTimelineCounter* modelStack, int32_t* outpu
 				int32_t timeTilLoop = loopTime - AudioEngine::audioSampleTimer;
 
 				if (timeTilLoop < 1024) {
-					((AudioOutput*)output)
+					static_cast<AudioOutput*>(this->output)
 					    ->envelope.unconditionalRelease(EnvelopeStage::FAST_RELEASE, 8192); // Let's make it extra fast?
 				}
 			}
@@ -760,8 +761,8 @@ void AudioClip::render(ModelStackWithTimelineCounter* modelStack, int32_t* outpu
 	{
 		LoopType loopingType = getLoopingType(modelStack);
 
-		stillActive = voiceSample->render(&guide, outputBuffer, numSamples, sample, sample->numChannels, loopingType,
-		                                  phaseIncrement, timeStretchRatio, amplitude, amplitudeIncrement,
+		stillActive = voiceSample->render(&guide, outputBuffer.data(), outputBuffer.size(), sample, sample->numChannels,
+		                                  loopingType, phaseIncrement, timeStretchRatio, amplitude, amplitudeIncrement,
 		                                  sampleControls.getInterpolationBufferSize(phaseIncrement),
 		                                  sampleControls.interpolationMode, 1);
 	}