Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/k2-fsa/sherpa-onnx
Browse files Browse the repository at this point in the history
# Conflicts:
#	.gitignore
  • Loading branch information
XiaYucca committed Oct 15, 2024
2 parents 16eb38d + 77dd5f7 commit 4d130e4
Show file tree
Hide file tree
Showing 306 changed files with 11,432 additions and 314 deletions.
22 changes: 16 additions & 6 deletions .github/scripts/node-addon/package-optional.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "sherpa-onnx-PLATFORM2-ARCH",
"version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
Expand All @@ -16,8 +16,18 @@
"transcription",
"real-time speech recognition",
"without internet connection",
"locally",
"local",
"embedded systems",
"open source",
"diarization",
"speaker diarization",
"speaker recognition",
"speaker",
"speaker segmentation",
"speaker verification",
"spoken language identification",
"sherpa",
"zipformer",
"asr",
"tts",
Expand All @@ -30,13 +40,13 @@
"offline",
"privacy",
"open source",
"vad",
"speaker id",
"language id",
"node-addon-api",
"streaming speech recognition",
"speech",
"recognition"
"recognition",
"vad",
"node-addon-api",
"speaker id",
"language id"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
Expand Down
22 changes: 16 additions & 6 deletions .github/scripts/node-addon/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "sherpa-onnx-node",
"version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
"main": "sherpa-onnx.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
Expand All @@ -16,8 +16,18 @@
"transcription",
"real-time speech recognition",
"without internet connection",
"locally",
"local",
"embedded systems",
"open source",
"diarization",
"speaker diarization",
"speaker recognition",
"speaker",
"speaker segmentation",
"speaker verification",
"spoken language identification",
"sherpa",
"zipformer",
"asr",
"tts",
Expand All @@ -30,13 +40,13 @@
"offline",
"privacy",
"open source",
"vad",
"speaker id",
"language id",
"node-addon-api",
"streaming speech recognition",
"speech",
"recognition"
"recognition",
"vad",
"node-addon-api",
"speaker id",
"language id"
],
"author": "The next-gen Kaldi team",
"license": "Apache-2.0",
Expand Down
5 changes: 5 additions & 0 deletions .github/scripts/test-dart.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ set -ex

cd dart-api-examples

pushd speaker-diarization
echo '----------speaker diarization----------'
./run.sh
popd

pushd speaker-identification
echo '----------3d speaker----------'
./run-3d-speaker.sh
Expand Down
8 changes: 7 additions & 1 deletion .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@

cd dotnet-examples/

cd ./offline-decode-files
cd ./offline-speaker-diarization
./run.sh
rm -rfv *.onnx
rm -fv *.wav
rm -rfv sherpa-onnx-pyannote-*

cd ../offline-decode-files
./run-sense-voice-ctc.sh
rm -rf sherpa-onnx-*

Expand Down
14 changes: 14 additions & 0 deletions .github/scripts/test-nodejs-addon-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@ arch=$(node -p "require('os').arch()")
platform=$(node -p "require('os').platform()")
node_version=$(node -p "process.versions.node.split('.')[0]")

echo "----------non-streaming speaker diarization----------"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

node ./test_offline_speaker_diarization.js

rm -rfv *.onnx *.wav sherpa-onnx-pyannote-*

echo "----------non-streaming asr + vad----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
Expand Down
12 changes: 12 additions & 0 deletions .github/scripts/test-nodejs-npm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ git status
ls -lh
ls -lh node_modules

echo '-----speaker diarization----------'
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

node ./test-offline-speaker-diarization.js
rm -rfv *.wav *.onnx sherpa-onnx-pyannote-*

echo '-----vad+whisper----------'

curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
Expand Down
3 changes: 3 additions & 0 deletions .github/scripts/test-online-punctuation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

set -ex

echo "TODO(fangjun): Skip this test since the sanitizer test is failed. We need to fix it"
exit 0

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
Expand Down
27 changes: 27 additions & 0 deletions .github/scripts/test-python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,33 @@ log() {
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

log "test offline speaker diarization"

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

python3 ./python-api-examples/offline-speaker-diarization.py

rm -rf *.wav *.onnx ./sherpa-onnx-pyannote-segmentation-3-0


log "test_clustering"
pushd /tmp/
mkdir test-cluster
cd test-cluster
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
git clone https://github.com/csukuangfj/sr-data
popd

python3 ./sherpa-onnx/python/tests/test_fast_clustering.py

rm -rf /tmp/test-cluster

export GIT_CLONE_PROTECTION_ACTIVE=false

log "test offline SenseVoice CTC"
Expand Down
41 changes: 41 additions & 0 deletions .github/scripts/test-speaker-diarization.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env bash

set -ex

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

echo "EXE is $EXE"
echo "PATH: $PATH"

which $EXE

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/0-four-speakers-zh.wav

log "specify number of clusters"
$EXE \
--clustering.num-clusters=4 \
--segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
--embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
./0-four-speakers-zh.wav

log "specify threshold for clustering"

$EXE \
--clustering.cluster-threshold=0.90 \
--segmentation.pyannote-model=./sherpa-onnx-pyannote-segmentation-3-0/model.onnx \
--embedding.model=./3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx \
./0-four-speakers-zh.wav

rm -rf sherpa-onnx-pyannote-*
rm -fv *.onnx
rm -fv *.wav
5 changes: 5 additions & 0 deletions .github/scripts/test-swift.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ echo "pwd: $PWD"
cd swift-api-examples
ls -lh

./run-speaker-diarization.sh
rm -rf *.onnx
rm -rf sherpa-onnx-pyannote-segmentation-3-0
rm -fv *.wav

./run-add-punctuations.sh
rm ./add-punctuations
rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
Expand Down
Loading

0 comments on commit 4d130e4

Please sign in to comment.