From ea1d81bdfe1674fc4955e6a24df3b0674784745d Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 22 Jul 2024 16:54:00 +0800 Subject: [PATCH] C api example for sense voice (#1165) --- .github/workflows/c-api.yaml | 147 ++++++++++++++++++ c-api-examples/CMakeLists.txt | 7 +- c-api-examples/sense-voice-c-api.c | 86 ++++++++++ .../{offline-stt-c-api.c => whisper-c-api.c} | 17 +- 4 files changed, 248 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/c-api.yaml create mode 100644 c-api-examples/sense-voice-c-api.c rename c-api-examples/{offline-stt-c-api.c => whisper-c-api.c} (86%) diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml new file mode 100644 index 000000000..3b1cd85a3 --- /dev/null +++ b/.github/workflows/c-api.yaml @@ -0,0 +1,147 @@ +name: c-api + +on: + push: + branches: + - master + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + paths: + - '.github/workflows/c-api.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'c-api-examples/**' + pull_request: + branches: + - master + paths: + - '.github/workflows/c-api.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'c-api-examples/**' + + workflow_dispatch: + +concurrency: + group: c-api-${{ github.ref }} + cancel-in-progress: true + +jobs: + c_api: + name: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-c-api-shared + + - name: Build sherpa-onnx + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build + cd build + + cmake \ + -D CMAKE_BUILD_TYPE=Release \ + -D BUILD_SHARED_LIBS=ON \ + -D CMAKE_INSTALL_PREFIX=./install \ + -D SHERPA_ONNX_ENABLE_BINARY=OFF \ + .. + + make -j2 install + + ls -lh install/lib + ls -lh install/include + + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + ldd ./install/lib/libsherpa-onnx-c-api.so + echo "---" + readelf -d ./install/lib/libsherpa-onnx-c-api.so + fi + + if [[ ${{ matrix.os }} == macos-latest ]]; then + otool -L ./install/lib/libsherpa-onnx-c-api.dylib + fi + + - name: Test sense-voice + shell: bash + run: | + gcc -o sense-voice-c-api ./c-api-examples/sense-voice-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh sense-voice-c-api + + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + ldd ./sense-voice-c-api + echo "----" + readelf -d ./sense-voice-c-api + fi + + # Now download models + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + + ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 + echo "---" + ls -lh sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./sense-voice-c-api + + rm -rf sherpa-onnx-sense-voice-* + + - name: Test whisper + shell: bash + run: | + gcc -o whisper-c-api ./c-api-examples/whisper-c-api.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh whisper-c-api + + if [[ ${{ matrix.os }} == ubuntu-latest ]]; then + ldd ./whisper-c-api + echo "----" + readelf -d ./whisper-c-api + fi + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 + tar xvf sherpa-onnx-whisper-tiny.tar.bz2 + rm sherpa-onnx-whisper-tiny.tar.bz2 + + ls -lh sherpa-onnx-whisper-tiny + echo "---" + ls -lh sherpa-onnx-whisper-tiny/test_wavs + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./whisper-c-api + + rm -rf sherpa-onnx-whisper-* diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index 2922665eb..cb4f7ecb4 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -24,8 +24,11 @@ target_link_libraries(audio-tagging-c-api sherpa-onnx-c-api) add_executable(add-punctuation-c-api add-punctuation-c-api.c) target_link_libraries(add-punctuation-c-api sherpa-onnx-c-api) -add_executable(offline-stt-c-api offline-stt-c-api.c) -target_link_libraries(offline-stt-c-api sherpa-onnx-c-api) +add_executable(whisper-c-api whisper-c-api.c) +target_link_libraries(whisper-c-api sherpa-onnx-c-api) + +add_executable(sense-voice-c-api sense-voice-c-api.c) +target_link_libraries(sense-voice-c-api sherpa-onnx-c-api) if(SHERPA_ONNX_HAS_ALSA) add_subdirectory(./asr-microphone-example) diff --git a/c-api-examples/sense-voice-c-api.c b/c-api-examples/sense-voice-c-api.c new file mode 100644 index 000000000..d2a470101 --- /dev/null +++ b/c-api-examples/sense-voice-c-api.c @@ -0,0 +1,86 @@ +// c-api-examples/sense-voice-c-api.c +// +// Copyright (c) 2024 Xiaomi Corporation + +// +// This file demonstrates how to use SenseVoice with sherpa-onnx's C API. +// clang-format off +// +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +// tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +// rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +// +// clang-format on + +#include +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +int32_t main() { + // You can find more test waves from + // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs + const char *wav_filename = + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/test_wavs/en.wav"; + const char *model_filename = + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"; + const char *tokens_filename = + "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"; + const char *language = "auto"; + const char *provider = "cpu"; + int32_t use_inverse_text_normalization = 1; + + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); + if (wave == NULL) { + fprintf(stderr, "Failed to read %s\n", wav_filename); + return -1; + } + + SherpaOnnxOfflineSenseVoiceModelConfig sense_voice_config; + memset(&sense_voice_config, 0, sizeof(sense_voice_config)); + sense_voice_config.model = model_filename; + sense_voice_config.language = language; + sense_voice_config.use_itn = use_inverse_text_normalization; + + // Offline model config + SherpaOnnxOfflineModelConfig offline_model_config; + memset(&offline_model_config, 0, sizeof(offline_model_config)); + offline_model_config.debug = 1; + offline_model_config.num_threads = 1; + offline_model_config.provider = provider; + offline_model_config.tokens = tokens_filename; + offline_model_config.sense_voice = sense_voice_config; + + // Recognizer config + SherpaOnnxOfflineRecognizerConfig recognizer_config; + memset(&recognizer_config, 0, sizeof(recognizer_config)); + recognizer_config.decoding_method = "greedy_search"; + recognizer_config.model_config = offline_model_config; + + SherpaOnnxOfflineRecognizer *recognizer = + CreateOfflineRecognizer(&recognizer_config); + + if (recognizer == NULL) { + fprintf(stderr, "Please check your config!\n"); + SherpaOnnxFreeWave(wave); + return -1; + } + + SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer); + + AcceptWaveformOffline(stream, wave->sample_rate, wave->samples, + wave->num_samples); + DecodeOfflineStream(recognizer, stream); + const SherpaOnnxOfflineRecognizerResult *result = + GetOfflineStreamResult(stream); + + fprintf(stderr, "Decoded text: %s\n", result->text); + + DestroyOfflineRecognizerResult(result); + DestroyOfflineStream(stream); + DestroyOfflineRecognizer(recognizer); + SherpaOnnxFreeWave(wave); + + return 0; +} diff --git a/c-api-examples/offline-stt-c-api.c b/c-api-examples/whisper-c-api.c similarity index 86% rename from c-api-examples/offline-stt-c-api.c rename to c-api-examples/whisper-c-api.c index 42748c65e..0fb2a32df 100644 --- a/c-api-examples/offline-stt-c-api.c +++ b/c-api-examples/whisper-c-api.c @@ -21,8 +21,6 @@ #include "sherpa-onnx/c-api/c-api.h" int32_t main() { - // You can find more test waves from - // https://hf-mirror.com/spaces/k2-fsa/spoken-language-identification/tree/main/test_wavs const char *wav_filename = "./sherpa-onnx-whisper-tiny/test_wavs/0.wav"; const char *encoder_filename = "sherpa-onnx-whisper-tiny/tiny-encoder.onnx"; const char *decoder_filename = "sherpa-onnx-whisper-tiny/tiny-decoder.onnx"; @@ -48,31 +46,36 @@ int32_t main() { // Offline model config SherpaOnnxOfflineModelConfig offline_model_config; memset(&offline_model_config, 0, sizeof(offline_model_config)); - offline_model_config.bpe_vocab = ""; offline_model_config.debug = 1; offline_model_config.num_threads = 1; offline_model_config.provider = provider; offline_model_config.tokens = tokens_filename; offline_model_config.whisper = whisper_config; - offline_model_config.sense_voice = - (SherpaOnnxOfflineSenseVoiceModelConfig){"", "", 0}; // Recognizer config SherpaOnnxOfflineRecognizerConfig recognizer_config; memset(&recognizer_config, 0, sizeof(recognizer_config)); recognizer_config.decoding_method = "greedy_search"; - recognizer_config.feat_config = (SherpaOnnxFeatureConfig){16000, 512}; recognizer_config.model_config = offline_model_config; SherpaOnnxOfflineRecognizer *recognizer = CreateOfflineRecognizer(&recognizer_config); + if (recognizer == NULL) { + fprintf(stderr, "Please check your config!\n"); + + SherpaOnnxFreeWave(wave); + + return -1; + } + SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer); AcceptWaveformOffline(stream, wave->sample_rate, wave->samples, wave->num_samples); DecodeOfflineStream(recognizer, stream); - SherpaOnnxOfflineRecognizerResult *result = GetOfflineStreamResult(stream); + const SherpaOnnxOfflineRecognizerResult *result = + GetOfflineStreamResult(stream); fprintf(stderr, "Decoded text: %s\n", result->text);