diff --git a/CMakeLists.txt b/CMakeLists.txt index b8127df..4bef544 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,7 +22,7 @@ option(ANIRA_WITH_TFLITE "Build with TensorFlow Lite backend" ON) set (PROJECT_NAME anira) -project (${PROJECT_NAME} VERSION 0.0.7) +project (${PROJECT_NAME} VERSION 0.0.8) # Sets the minimum macOS version, c++20 is only available from macOS 11.0 if (APPLE) diff --git a/extras/benchmark/eval-scripts/anira_benchmark.py b/extras/benchmark/eval-scripts/anira_benchmark.py index ccabf11..d4c61fa 100644 --- a/extras/benchmark/eval-scripts/anira_benchmark.py +++ b/extras/benchmark/eval-scripts/anira_benchmark.py @@ -5,9 +5,9 @@ # Define log file paths log_file_paths = [] -log_file_paths.append(os.path.join(os.path.dirname(__file__), "./../logs/Linux_advanced.log")) -log_file_paths.append(os.path.join(os.path.dirname(__file__), "./../logs/MacOS_advanced.log")) -log_file_paths.append(os.path.join(os.path.dirname(__file__), "./../logs/Windows_advanced.log")) +log_file_paths.append(os.path.join(os.path.dirname(__file__), "./../logs/Linux_advanced_0.0.8.log")) +log_file_paths.append(os.path.join(os.path.dirname(__file__), "./../logs/MacOS_advanced_0.0.8.log")) +log_file_paths.append(os.path.join(os.path.dirname(__file__), "./../logs/Windows_advanced_0.0.8.log")) def create_folder(folder_name: str) -> None: try: @@ -161,11 +161,11 @@ def write_list_to_csv(file_path: str, list: list, append: bool=False, top_row_ar sequence_std_results = get_sequence_statistics_from_list(listed_results, "sequence_std") moving_average_results = moving_average(listed_results, 3) cummulativ_average_results = cummulativ_average(listed_results) - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_single_interation.csv"), listed_results) - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_mean.csv"), sequence_mean_results, False, "sequence_mean") - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_max.csv"), sequence_max_results, False, "sequence_max") - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_min.csv"), sequence_min_results, False, "sequence_min") - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_iqr.csv"), sequence_iqr_results, False, "sequence_iqr") - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_std.csv"), sequence_std_results, False, "sequence_std") - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_moving_average.csv"), moving_average_results, False, "moving_average") - write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_cummulativ_average.csv"), cummulativ_average_results, False, "cummulativ_average") \ No newline at end of file + write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_advanced_0.0.8.csv"), listed_results) + # write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_mean.csv"), sequence_mean_results, False, "sequence_mean") + # write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_max.csv"), sequence_max_results, False, "sequence_max") + # write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_min.csv"), sequence_min_results, False, "sequence_min") + # write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_iqr.csv"), sequence_iqr_results, False, "sequence_iqr") + # write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_sequence_std.csv"), sequence_std_results, False, "sequence_std") + # write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_moving_average.csv"), moving_average_results, False, "moving_average") + # write_list_to_csv(os.path.join(os.path.dirname(__file__), "./../results/benchmark_cummulativ_average.csv"), cummulativ_average_results, False, "cummulativ_average") \ No newline at end of file diff --git a/include/anira/benchmark/ProcessBlockFixture.h b/include/anira/benchmark/ProcessBlockFixture.h index 3453486..75a49ab 100644 --- a/include/anira/benchmark/ProcessBlockFixture.h +++ b/include/anira/benchmark/ProcessBlockFixture.h @@ -38,7 +38,6 @@ class ANIRA_API ProcessBlockFixture : public ::benchmark::Fixture { bool m_sleep_after_repetition = true; int m_iteration = 0; std::chrono::duration m_runtime_last_repetition = std::chrono::duration(0); - bool m_init = false; int m_prev_num_received_samples = 0; std::string m_model_name; std::string m_inference_backend_name = "libtorch"; diff --git a/include/anira/scheduler/InferenceManager.h b/include/anira/scheduler/InferenceManager.h index 07a5e0f..84addb7 100644 --- a/include/anira/scheduler/InferenceManager.h +++ b/include/anira/scheduler/InferenceManager.h @@ -25,7 +25,6 @@ class ANIRA_API InferenceManager { // Required for unit test size_t getNumReceivedSamples(); - bool isInitializing() const; InferenceThreadPool& getInferenceThreadPool(); int getMissingBlocks(); @@ -35,6 +34,11 @@ class ANIRA_API InferenceManager { void processInput(float ** inputBuffer, const size_t inputSamples); void processOutput(float ** inputBuffer, const size_t inputSamples); void clearBuffer(float ** inputBuffer, const size_t inputSamples); + int calculateLatency(); + int calculateBufferAdaptation(int hostBufferSize, int modelOutputSize); + int maxNumberOfInferences(int hostBufferSize, int modelOutputSize); + int greatestCommonDivisor(int a, int b); + int leatCommonMultiple(int a, int b); private: std::shared_ptr inferenceThreadPool; @@ -43,8 +47,6 @@ class ANIRA_API InferenceManager { SessionElement& session; HostAudioConfig spec; - bool init = true; - size_t bufferCount = 0; size_t initSamples = 0; std::atomic inferenceCounter {0}; }; diff --git a/src/benchmark/ProcessBlockFixture.cpp b/src/benchmark/ProcessBlockFixture.cpp index 997cebf..c88f7a2 100644 --- a/src/benchmark/ProcessBlockFixture.cpp +++ b/src/benchmark/ProcessBlockFixture.cpp @@ -13,7 +13,6 @@ ProcessBlockFixture::~ProcessBlockFixture() { } void ProcessBlockFixture::initializeIteration() { - m_init = m_inferenceHandler->getInferenceManager().isInitializing(); m_prev_num_received_samples = m_inferenceHandler->getInferenceManager().getNumReceivedSamples(); } @@ -75,18 +74,7 @@ void ProcessBlockFixture::initializeRepetition(const InferenceConfig& inferenceC } bool ProcessBlockFixture::bufferHasBeenProcessed() { - if (m_init) { - // if we have init the process output does not get called so, we never pop samples out - // when asking for the number of received samples, we dont wait since the time bufferInSec time we give to the request is zero - return m_inferenceHandler->getInferenceManager().getNumReceivedSamples() >= m_prev_num_received_samples + m_bufferSize; - } - else { - // when init is finished we allready anticipate that we take out samples from the buffer and just wait for the buffer to be filled again - // therefore it makes no difference if the buffer gets filled while waiting for the semaphore or in this while loop - // TODO: A problem could occur is when init is set to false at start and the wait_for_semaphore time is too short so no samples are returned - // At the moment this is not possible since init is allways set to true at the start, but this shall be changed in the future, so we can do realreal time processing - return m_inferenceHandler->getInferenceManager().getNumReceivedSamples() >= m_prev_num_received_samples; - } + return m_inferenceHandler->getInferenceManager().getNumReceivedSamples() >= m_prev_num_received_samples; } void ProcessBlockFixture::pushRandomSamplesInBuffer(anira::HostAudioConfig hostAudioConfig) { diff --git a/src/scheduler/InferenceManager.cpp b/src/scheduler/InferenceManager.cpp index 951d470..a1fc8cd 100644 --- a/src/scheduler/InferenceManager.cpp +++ b/src/scheduler/InferenceManager.cpp @@ -28,25 +28,11 @@ void InferenceManager::prepare(HostAudioConfig newConfig) { inferenceCounter = 0; - bufferCount = 0; - - size_t max_inference_time_in_samples = (size_t) std::ceil(inferenceConfig.m_max_inference_time * spec.hostSampleRate / 1000); - - float divisor = (float) spec.hostBufferSize / ((float) inferenceConfig.m_batch_size * (float) inferenceConfig.m_model_output_size); - size_t remainder = spec.hostBufferSize % (inferenceConfig.m_batch_size * inferenceConfig.m_model_output_size); - - if (remainder == 0) { - initSamples = (size_t) divisor * max_inference_time_in_samples + (size_t) divisor * inferenceConfig.m_model_latency; - } else if (remainder > 0 && remainder < spec.hostBufferSize) { - initSamples = ((size_t) divisor + 1) * max_inference_time_in_samples + ((size_t) divisor + 1) * inferenceConfig.m_model_latency + spec.hostBufferSize; //TODO not minimum possible - } else { - initSamples = max_inference_time_in_samples + (inferenceConfig.m_batch_size * inferenceConfig.m_model_output_size) + inferenceConfig.m_model_latency; //TODO not minimum possible - } - - if ((float) initSamples < inferenceConfig.m_wait_in_process_block * (float) spec.hostBufferSize) { - init = false; - } else { - init = true; + initSamples = calculateLatency(); + for (size_t i = 0; i < spec.hostChannels; ++i) { + for (size_t j = 0; j < initSamples; ++j) { + session.receiveBuffer.pushSample(i, 0.f); + } } } @@ -57,13 +43,7 @@ void InferenceManager::process(float ** inputBuffer, size_t inputSamples) { double timeInSec = static_cast(inputSamples) / spec.hostSampleRate; inferenceThreadPool->newDataRequest(session, timeInSec); - if (init) { - bufferCount += inputSamples; - clearBuffer(inputBuffer, inputSamples); - if (bufferCount >= initSamples) init = false; - } else { - processOutput(inputBuffer, inputSamples); - } + processOutput(inputBuffer, inputSamples); } void InferenceManager::processInput(float ** inputBuffer, size_t inputSamples) { @@ -112,8 +92,7 @@ void InferenceManager::clearBuffer(float ** inputBuffer, size_t inputSamples) { } int InferenceManager::getLatency() const { - if ((int) initSamples % (int) spec.hostBufferSize == 0) return initSamples; - else return ((int) ((float) initSamples / (float) spec.hostBufferSize) + 1) * (int) spec.hostBufferSize; + return initSamples; } InferenceThreadPool& InferenceManager::getInferenceThreadPool() { @@ -125,10 +104,6 @@ size_t InferenceManager::getNumReceivedSamples() { return session.receiveBuffer.getAvailableSamples(0); } -bool InferenceManager::isInitializing() const { - return init; -} - int InferenceManager::getMissingBlocks() { return inferenceCounter.load(); } @@ -137,4 +112,57 @@ int InferenceManager::getSessionID() const { return session.sessionID; } +int InferenceManager::calculateLatency() { + // First calculate some universal values + int modelOutputSize = inferenceConfig.m_batch_size * inferenceConfig.m_model_output_size; + float hostBufferTime = (float) spec.hostBufferSize * 1000.f / (float) spec.hostSampleRate; + float waitTime = inferenceConfig.m_wait_in_process_block * hostBufferTime; + + // Then caclulate the different parts of the latency + int bufferAdaptation = calculateBufferAdaptation(spec.hostBufferSize, modelOutputSize); + + int maxPossibleInferences = maxNumberOfInferences(spec.hostBufferSize, modelOutputSize); + float totalInferenceTimeAfterWait = (maxPossibleInferences * inferenceConfig.m_max_inference_time) - waitTime; + int numBuffersForMaxInferences = std::ceil(totalInferenceTimeAfterWait / hostBufferTime); + int inferenceCausedLatency = numBuffersForMaxInferences * spec.hostBufferSize; + + int modelCausedLatency = inferenceConfig.m_model_latency; + + // Add it all together + return bufferAdaptation + inferenceCausedLatency + modelCausedLatency; +} + + +int InferenceManager::calculateBufferAdaptation(int hostBufferSize, int modelOutputSize) { + int res = 0; + for (int i = hostBufferSize; i < leatCommonMultiple(hostBufferSize, modelOutputSize) ; i+=hostBufferSize) { + res = std::max(res, i%modelOutputSize); + } + return res; +} + +int InferenceManager::maxNumberOfInferences(int hostBufferSize, int modelOutputSize) { + float samplesInBuffer = hostBufferSize; + int res = (int) (samplesInBuffer / (float) modelOutputSize); + int numberOfInferences = 0; + for (int i = hostBufferSize; i < leatCommonMultiple(hostBufferSize, modelOutputSize) ; i+=hostBufferSize) { + numberOfInferences = (int) (samplesInBuffer / (float) modelOutputSize); + res = std::max(res, numberOfInferences); + samplesInBuffer += hostBufferSize - numberOfInferences * modelOutputSize; + } + return res; +} + +int InferenceManager::greatestCommonDivisor(int a, int b) { + if (b == 0) { + return a; + } else { + return greatestCommonDivisor(b, a % b); + } +} + +int InferenceManager::leatCommonMultiple(int a, int b) { + return a * b / greatestCommonDivisor(a, b); +} + } // namespace anira \ No newline at end of file