Merge branch 'master' of https://github.com/k2-fsa/sherpa-onnx

# Conflicts: # .gitignore
XiaYucca · Oct 15, 2024 · 4d130e4 · 4d130e4
2 parents 16eb38d + 77dd5f7
commit 4d130e4
Show file tree

Hide file tree

Showing 306 changed files with 11,432 additions and 314 deletions.
diff --git a/.github/scripts/node-addon/package-optional.json b/.github/scripts/node-addon/package-optional.json
@@ -1,7 +1,7 @@
 {
   "name": "sherpa-onnx-PLATFORM2-ARCH",
   "version": "SHERPA_ONNX_VERSION",
-  "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
+  "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
   "main": "index.js",
   "scripts": {
     "test": "echo \"Error: no test specified\" && exit 1"
@@ -16,8 +16,18 @@
     "transcription",
     "real-time speech recognition",
     "without internet connection",
+    "locally",
+    "local",
     "embedded systems",
     "open source",
+    "diarization",
+    "speaker diarization",
+    "speaker recognition",
+    "speaker",
+    "speaker segmentation",
+    "speaker verification",
+    "spoken language identification",
+    "sherpa",
     "zipformer",
     "asr",
     "tts",
@@ -30,13 +40,13 @@
     "offline",
     "privacy",
     "open source",
-    "vad",
-    "speaker id",
-    "language id",
-    "node-addon-api",
     "streaming speech recognition",
     "speech",
-    "recognition"
+    "recognition",
+    "vad",
+    "node-addon-api",
+    "speaker id",
+    "language id"
   ],
   "author": "The next-gen Kaldi team",
   "license": "Apache-2.0",

diff --git a/.github/scripts/node-addon/package.json b/.github/scripts/node-addon/package.json
@@ -1,7 +1,7 @@
 {
   "name": "sherpa-onnx-node",
   "version": "SHERPA_ONNX_VERSION",
-  "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
+  "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
   "main": "sherpa-onnx.js",
   "scripts": {
     "test": "echo \"Error: no test specified\" && exit 1"
@@ -16,8 +16,18 @@
     "transcription",
     "real-time speech recognition",
     "without internet connection",
+    "locally",
+    "local",
     "embedded systems",
     "open source",
+    "diarization",
+    "speaker diarization",
+    "speaker recognition",
+    "speaker",
+    "speaker segmentation",
+    "speaker verification",
+    "spoken language identification",
+    "sherpa",
     "zipformer",
     "asr",
     "tts",
@@ -30,13 +40,13 @@
     "offline",
     "privacy",
     "open source",
-    "vad",
-    "speaker id",
-    "language id",
-    "node-addon-api",
     "streaming speech recognition",
     "speech",
-    "recognition"
+    "recognition",
+    "vad",
+    "node-addon-api",
+    "speaker id",
+    "language id"
   ],
   "author": "The next-gen Kaldi team",
   "license": "Apache-2.0",

diff --git a/.github/scripts/test-dart.sh b/.github/scripts/test-dart.sh
@@ -4,6 +4,11 @@ set -ex
 
 cd dart-api-examples
 
+pushd speaker-diarization
+echo '----------speaker diarization----------'
+./run.sh
+popd
+
 pushd speaker-identification
 echo '----------3d speaker----------'
 ./run-3d-speaker.sh

diff --git a/.github/scripts/test-dot-net.sh b/.github/scripts/test-dot-net.sh
@@ -2,7 +2,13 @@
 
 cd dotnet-examples/
 
-cd ./offline-decode-files
+cd ./offline-speaker-diarization
+./run.sh
+rm -rfv *.onnx
+rm -fv *.wav
+rm -rfv sherpa-onnx-pyannote-*
+
+cd ../offline-decode-files
 ./run-sense-voice-ctc.sh
 rm -rf sherpa-onnx-*
 

diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh
@@ -10,6 +10,20 @@ arch=$(node -p "require('os').arch()")
 platform=$(node -p "require('os').platform()")
 node_version=$(node -p "process.versions.node.split('.')[0]")
 
+echo "----------non-streaming speaker diarization----------"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test_offline_speaker_diarization.js
+
+rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*
+
 echo "----------non-streaming asr + vad----------"
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
 tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2

diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh
@@ -9,6 +9,18 @@ git status
 ls -lh
 ls -lh node_modules
 
+echo '-----speaker diarization----------'
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+node ./test-offline-speaker-diarization.js
+rm -rfv *.wav *.onnx sherpa-onnx-pyannote-*
+
 echo '-----vad+whisper----------'
 
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2

diff --git a/.github/scripts/test-online-punctuation.sh b/.github/scripts/test-online-punctuation.sh
@@ -2,6 +2,9 @@
 
 set -ex
 
+echo "TODO(fangjun): Skip this test since the sanitizer test is failed. We need to fix it"
+exit 0
+
 log() {
   # This function is from espnet
   local fname=${BASH_SOURCE[1]##*/}

diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh
@@ -8,6 +8,33 @@ log() {
   echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 
+log "test offline speaker diarization"
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+python3 ./python-api-examples/offline-speaker-diarization.py
+
+rm -rf *.wav *.onnx ./sherpa-onnx-pyannote-segmentation-3-0
+
+
+log "test_clustering"
+pushd /tmp/
+mkdir test-cluster
+cd test-cluster
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+git clone https://github.com/csukuangfj/sr-data
+popd
+
+python3 ./sherpa-onnx/python/tests/test_fast_clustering.py
+
+rm -rf /tmp/test-cluster
+
 export GIT_CLONE_PROTECTION_ACTIVE=false
 
 log "test offline SenseVoice CTC"

diff --git a/.github/scripts/test-speaker-diarization.sh b/.github/scripts/test-speaker-diarization.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+echo "EXE is $EXE"
+echo "PATH: $PATH"
+
+which $EXE
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav
+
+log "specify number of clusters"
+$EXE \
+  --clustering.num-clusters=4 \
+  --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+  --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+  ./0-four-speakers-zh.wav
+
+log "specify threshold for clustering"
+
+$EXE \
+  --clustering.cluster-threshold=0.90 \
+  --segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
+  --embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
+  ./0-four-speakers-zh.wav
+
+rm -rf sherpa-onnx-pyannote-*
+rm -fv *.onnx
+rm -fv *.wav
diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh
@@ -7,6 +7,11 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
 
+./run-speaker-diarization.sh
+rm -rf *.onnx
+rm -rf sherpa-onnx-pyannote-segmentation-3-0
+rm -fv *.wav
+
 ./run-add-punctuations.sh
 rm ./add-punctuations
 rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,6 +2,9 @@ @@
     set -ex
+    echo "TODO(fangjun): Skip this test since the sanitizer test is failed. We need to fix it"
+    exit 0
     log() {
       # This function is from espnet
       local fname=${BASH_SOURCE[1]##*/}
@@ Expand Down @@