From ab5b261d6ebc38135d420c93f9ed14d502141f29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Hock?= Date: Sat, 2 Dec 2023 21:20:19 +0100 Subject: [PATCH] Provide zero padding #42 --- cpp/StftPitchShift/STFT.h | 20 +++++++++------- cpp/StftPitchShift/StftPitchShift.cpp | 16 +++++++++---- cpp/StftPitchShift/StftPitchShift.h | 9 +++++++ cpp/StftPitchShift/StftPitchShiftCore.h | 31 ++++++++++++++++--------- cpp/StftPitchShift/Vocoder.h | 12 +++++----- cpp/StftPitchShift/main.cpp | 1 + examples/realtime.cpp | 8 ++++--- 7 files changed, 64 insertions(+), 33 deletions(-) diff --git a/cpp/StftPitchShift/STFT.h b/cpp/StftPitchShift/STFT.h index 4432cdb..44afd0c 100755 --- a/cpp/StftPitchShift/STFT.h +++ b/cpp/StftPitchShift/STFT.h @@ -25,25 +25,26 @@ namespace stftpitchshift public: - STFT(const size_t framesize, const size_t hopsize, const bool chronometry = false) : - STFT(std::make_shared(), std::make_tuple(framesize, framesize), hopsize, chronometry) + STFT(const size_t framesize, const size_t hopsize, const size_t padsize, const bool chronometry = false) : + STFT(std::make_shared(), std::make_tuple(framesize, framesize), hopsize, padsize, chronometry) { } - STFT(const std::shared_ptr fft, const size_t framesize, const size_t hopsize, const bool chronometry = false) : - STFT(fft, std::make_tuple(framesize, framesize), hopsize, chronometry) + STFT(const std::shared_ptr fft, const size_t framesize, const size_t hopsize, const size_t padsize, const bool chronometry = false) : + STFT(fft, std::make_tuple(framesize, framesize), hopsize, padsize, chronometry) { } - STFT(const std::tuple framesize, const size_t hopsize, const bool chronometry = false) : - STFT(std::make_shared(), framesize, hopsize, chronometry) + STFT(const std::tuple framesize, const size_t hopsize, const size_t padsize, const bool chronometry = false) : + STFT(std::make_shared(), framesize, hopsize, padsize, chronometry) { } - STFT(const std::shared_ptr fft, const std::tuple framesize, const size_t hopsize, const bool chronometry = false) : + STFT(const std::shared_ptr fft, const std::tuple framesize, const size_t hopsize, const size_t padsize, const bool chronometry = false) : fft(fft), framesize(framesize), hopsize(hopsize), + padsize(padsize), chronometry(chronometry) { const auto analysis_window_size = std::get<0>(framesize); @@ -74,8 +75,8 @@ namespace stftpitchshift [unitygain](T value) { return value * unitygain; }); buffer.size = analysis_window_size; - buffer.time.resize(buffer.size); - buffer.freq.resize(buffer.size / 2 + 1); + buffer.time.resize(buffer.size * padsize); + buffer.freq.resize(buffer.size * padsize / 2 + 1); } void operator()(const std::span input, const std::span output, const std::function> dft)> callback) @@ -143,6 +144,7 @@ namespace stftpitchshift const std::shared_ptr fft; const std::tuple framesize; const size_t hopsize; + const size_t padsize; const bool chronometry; struct diff --git a/cpp/StftPitchShift/StftPitchShift.cpp b/cpp/StftPitchShift/StftPitchShift.cpp index 438b0e5..352ff11 100755 --- a/cpp/StftPitchShift/StftPitchShift.cpp +++ b/cpp/StftPitchShift/StftPitchShift.cpp @@ -12,6 +12,7 @@ StftPitchShift::StftPitchShift( const double samplerate, const size_t framesize, const size_t hopsize, + const size_t padsize, const bool normalization, const bool chronometry) : StftPitchShift( @@ -19,6 +20,7 @@ StftPitchShift::StftPitchShift( samplerate, std::make_tuple(framesize, framesize), hopsize, + padsize, normalization, chronometry) { @@ -28,6 +30,7 @@ StftPitchShift::StftPitchShift( const double samplerate, const std::tuple framesize, const size_t hopsize, + const size_t padsize, const bool normalization, const bool chronometry) : StftPitchShift( @@ -35,6 +38,7 @@ StftPitchShift::StftPitchShift( samplerate, framesize, hopsize, + padsize, normalization, chronometry) { @@ -45,6 +49,7 @@ StftPitchShift::StftPitchShift( const double samplerate, const size_t framesize, const size_t hopsize, + const size_t padsize, const bool normalization, const bool chronometry) : StftPitchShift( @@ -52,6 +57,7 @@ StftPitchShift::StftPitchShift( samplerate, std::make_tuple(framesize, framesize), hopsize, + padsize, normalization, chronometry) { @@ -62,12 +68,14 @@ StftPitchShift::StftPitchShift( const double samplerate, const std::tuple framesize, const size_t hopsize, + const size_t padsize, const bool normalization, const bool chronometry) : fft(fft), samplerate(samplerate), framesize(framesize), hopsize(hopsize), + padsize(padsize), normalization(normalization), chronometry(chronometry) { @@ -117,14 +125,14 @@ void StftPitchShift::shiftpitch( // preemptively clear output #30 std::fill(output.begin(), output.end(), float(0)); - StftPitchShiftCore core(fft, samplerate, framesize, hopsize); + StftPitchShiftCore core(fft, samplerate, framesize, hopsize, padsize); core.factors(factors); core.quefrency(quefrency); core.distortion(distortion); core.normalization(normalization); - STFT stft(fft, framesize, hopsize, chronometry); + STFT stft(fft, framesize, hopsize, padsize, chronometry); stft(input, output, [&](std::span> dft) { @@ -142,14 +150,14 @@ void StftPitchShift::shiftpitch( // preemptively clear output #30 std::fill(output.begin(), output.end(), double(0)); - StftPitchShiftCore core(fft, samplerate, framesize, hopsize); + StftPitchShiftCore core(fft, samplerate, framesize, hopsize, padsize); core.factors(factors); core.quefrency(quefrency); core.distortion(distortion); core.normalization(normalization); - STFT stft(fft, framesize, hopsize, chronometry); + STFT stft(fft, framesize, hopsize, padsize, chronometry); stft(input, output, [&](std::span> dft) { diff --git a/cpp/StftPitchShift/StftPitchShift.h b/cpp/StftPitchShift/StftPitchShift.h index be1a37f..112cdf0 100755 --- a/cpp/StftPitchShift/StftPitchShift.h +++ b/cpp/StftPitchShift/StftPitchShift.h @@ -21,6 +21,7 @@ namespace stftpitchshift * @param samplerate The sample rate of the signal in hertz. * @param framesize The STFT frame size in samples (analysis = synthesis). * @param hopsize The STFT hop size in samples. + * @param padsize The FFT zero padding factor. * @param normalization Optionally enable spectral rms normalization. * @param chronometry Optionally enable runtime measurements. */ @@ -28,6 +29,7 @@ namespace stftpitchshift const double samplerate, const size_t framesize, const size_t hopsize, + const size_t padsize = 1, const bool normalization = false, const bool chronometry = false); @@ -35,6 +37,7 @@ namespace stftpitchshift * @param samplerate The sample rate of the signal in hertz. * @param framesize The STFT frame size in samples (analysis >= synthesis). * @param hopsize The STFT hop size in samples. + * @param padsize The FFT zero padding factor. * @param normalization Optionally enable spectral rms normalization. * @param chronometry Optionally enable runtime measurements. */ @@ -42,6 +45,7 @@ namespace stftpitchshift const double samplerate, const std::tuple framesize, const size_t hopsize, + const size_t padsize = 1, const bool normalization = false, const bool chronometry = false); @@ -50,6 +54,7 @@ namespace stftpitchshift * @param samplerate The sample rate of the signal in hertz. * @param framesize The STFT frame size in samples (analysis = synthesis). * @param hopsize The STFT hop size in samples. + * @param padsize The FFT zero padding factor. * @param normalization Optionally enable spectral rms normalization. * @param chronometry Optionally enable runtime measurements. */ @@ -58,6 +63,7 @@ namespace stftpitchshift const double samplerate, const size_t framesize, const size_t hopsize, + const size_t padsize = 1, const bool normalization = false, const bool chronometry = false); @@ -66,6 +72,7 @@ namespace stftpitchshift * @param samplerate The sample rate of the signal in hertz. * @param framesize The STFT frame size in samples (analysis >= synthesis). * @param hopsize The STFT hop size in samples. + * @param padsize The FFT zero padding factor. * @param normalization Optionally enable spectral rms normalization. * @param chronometry Optionally enable runtime measurements. */ @@ -74,6 +81,7 @@ namespace stftpitchshift const double samplerate, const std::tuple framesize, const size_t hopsize, + const size_t padsize = 1, const bool normalization = false, const bool chronometry = false); @@ -139,6 +147,7 @@ namespace stftpitchshift const double samplerate; const std::tuple framesize; const size_t hopsize; + const size_t padsize; const bool normalization; const bool chronometry; diff --git a/cpp/StftPitchShift/StftPitchShiftCore.h b/cpp/StftPitchShift/StftPitchShiftCore.h index 185bf6c..a3db9f6 100755 --- a/cpp/StftPitchShift/StftPitchShiftCore.h +++ b/cpp/StftPitchShift/StftPitchShiftCore.h @@ -20,24 +20,28 @@ namespace stftpitchshift StftPitchShiftCore( const double samplerate, const size_t framesize, - const size_t hopsize) : + const size_t hopsize, + const size_t padsize) : StftPitchShiftCore( std::make_shared(), samplerate, std::make_tuple(framesize, framesize), - hopsize) + hopsize, + padsize) { } StftPitchShiftCore( const double samplerate, const std::tuple framesize, - const size_t hopsize) : + const size_t hopsize, + const size_t padsize) : StftPitchShiftCore( std::make_shared(), samplerate, framesize, - hopsize) + hopsize, + padsize) { } @@ -45,12 +49,14 @@ namespace stftpitchshift const std::shared_ptr fft, const double samplerate, const size_t framesize, - const size_t hopsize) : + const size_t hopsize, + const size_t padsize) : StftPitchShiftCore( fft, samplerate, std::make_tuple(framesize, framesize), - hopsize) + hopsize, + padsize) { } @@ -58,15 +64,17 @@ namespace stftpitchshift const std::shared_ptr fft, const double samplerate, const std::tuple framesize, - const size_t hopsize) : + const size_t hopsize, + const size_t padsize) : fft(fft), samplerate(samplerate), framesize(framesize), hopsize(hopsize), - vocoder(samplerate, framesize, hopsize), - pitcher(samplerate, std::get<0>(framesize)), - cepster(fft, samplerate, std::get<0>(framesize)), - envelope(std::get<0>(framesize) / 2 + 1) + padsize(padsize), + vocoder(samplerate, framesize, hopsize, padsize), + pitcher(samplerate, std::get<0>(framesize) * padsize), + cepster(fft, samplerate, std::get<0>(framesize) * padsize), + envelope(std::get<0>(framesize) * padsize / 2 + 1) { } @@ -175,6 +183,7 @@ namespace stftpitchshift const double samplerate; const std::tuple framesize; const size_t hopsize; + const size_t padsize; Vocoder vocoder; Pitcher pitcher; diff --git a/cpp/StftPitchShift/Vocoder.h b/cpp/StftPitchShift/Vocoder.h index 37f73bf..748bcec 100755 --- a/cpp/StftPitchShift/Vocoder.h +++ b/cpp/StftPitchShift/Vocoder.h @@ -17,19 +17,19 @@ namespace stftpitchshift public: - Vocoder(const double samplerate, const size_t framesize, const size_t hopsize) : - Vocoder(samplerate, std::make_tuple(framesize, framesize), hopsize) + Vocoder(const double samplerate, const size_t framesize, const size_t hopsize, const size_t padsize) : + Vocoder(samplerate, std::make_tuple(framesize, framesize), hopsize, padsize) { } - Vocoder(const double samplerate, const std::tuple framesize, const size_t hopsize) + Vocoder(const double samplerate, const std::tuple framesize, const size_t hopsize, const size_t padsize) { const double pi = 2.0 * std::acos(-1.0); - const size_t dftsize = std::get<0>(framesize) / 2 + 1; + const size_t dftsize = std::get<0>(framesize) * padsize / 2 + 1; - stft_freq_inc = samplerate / std::get<0>(framesize); - stft_phase_inc = pi * hopsize / std::get<0>(framesize); + stft_freq_inc = samplerate / (std::get<0>(framesize) * padsize); + stft_phase_inc = pi * hopsize / (std::get<0>(framesize) * padsize); encode_phase_buffer.resize(dftsize); decode_phase_buffer.resize(dftsize); diff --git a/cpp/StftPitchShift/main.cpp b/cpp/StftPitchShift/main.cpp index 6f728c8..c752cbf 100755 --- a/cpp/StftPitchShift/main.cpp +++ b/cpp/StftPitchShift/main.cpp @@ -67,6 +67,7 @@ int main(int argc, char** argv) samplerate, cli.framesize, std::get<1>(cli.framesize) / cli.hoprate, + 1, // TODO cli.normalization, cli.chronometry); diff --git a/examples/realtime.cpp b/examples/realtime.cpp index 42aaa9e..614c1ea 100644 --- a/examples/realtime.cpp +++ b/examples/realtime.cpp @@ -13,9 +13,11 @@ using namespace stftpitchshift; // basic parameters // - samplerate as required -// - overlap at least 4 +// - STFT overlap factor at least 4 +// - FFT zero padding factor at least 1 const double samplerate = 44100; const size_t overlap = 4; +const size_t padding = 1; // analysis and synthesis window sizes // power of two each of them @@ -48,8 +50,8 @@ int main() buffer.input.resize(total_buffer_size); buffer.output.resize(total_buffer_size); - stft = std::make_shared>(framesize, hopsize); - core = std::make_shared>(samplerate, framesize, hopsize); + stft = std::make_shared>(framesize, hopsize, padding); + core = std::make_shared>(samplerate, framesize, hopsize, padding); // set pitch shifting parameters as required