Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Go API for TTS #377

Merged
merged 4 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 121 additions & 1 deletion .github/workflows/test-go-package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
fetch-depth: 0
- uses: actions/setup-go@v4
with:
go-version: '>=1.20'
go-version: '>=1.12'

- name: Display go version
shell: bash
Expand All @@ -66,6 +66,121 @@ jobs:
run: |
gcc --version

- name: Test non-streaming TTS (Linux/macOS)
if: matrix.os != 'windows-latest'
shell: bash
run: |
mkdir tts-waves
cd go-api-examples/non-streaming-tts
ls -lh
go mod tidy
cat go.mod
go build
ls -lh

git lfs install

echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs

echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk

echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3

ls -lh *.wav
cp *.wav ../../tts-waves/

- name: Test non-streaming TTS (Win64)
if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
shell: bash
run: |
mkdir tts-waves
cd go-api-examples/non-streaming-tts
ls -lh
go mod tidy
cat go.mod
go build
ls -lh

echo $PWD
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
ls -lh

git lfs install

echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs

echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk

echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3

ls -lh *.wav
cp *.wav ../../tts-waves/

- name: Test non-streaming TTS (Win32)
if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
shell: bash
run: |
cd go-api-examples/non-streaming-tts
ls -lh
go mod tidy
cat go.mod
ls -lh

go env GOARCH
go env
echo "------------------------------"
go env -w GOARCH=386
go env -w CGO_ENABLED=1
go env

go clean
go build

echo $PWD
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
ls -lh

git lfs install

echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs

echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk

echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3

ls -lh *.wav
cp *.wav ../../tts-waves/

- name: Test non-streaming decoding files (Linux/macOS)
if: matrix.os != 'windows-latest'
shell: bash
Expand Down Expand Up @@ -298,3 +413,8 @@ jobs:
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-paraformer-bilingual-zh-en
./run-paraformer.sh
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en

- uses: actions/upload-artifact@v3
with:
name: tts-waves
path: tts-waves
36 changes: 36 additions & 0 deletions .github/workflows/test-go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,42 @@ jobs:
go mod tidy
go build

- name: Test non-streaming TTS (macOS)
shell: bash
run: |
mkdir tts-waves

cd scripts/go/_internal/non-streaming-tts/
ls -lh
go mod tidy
cat go.mod
go build
ls -lh

git lfs install

echo "Test vits-ljs"
git clone https://huggingface.co/csukuangfj/vits-ljs
./run-vits-ljs.sh
rm -rf vits-ljs

echo "Test vits-vctk"
git clone https://huggingface.co/csukuangfj/vits-vctk
./run-vits-vctk.sh
rm -rf vits-vctk

echo "Test vits-zh-aishell3"
git clone https://huggingface.co/csukuangfj/vits-zh-aishell3
./run-vits-zh-aishell3.sh
rm -rf vits-zh-aishell3

cp *.wav ../../../../tts-waves/

- uses: actions/upload-artifact@v3
with:
name: tts-waves
path: tts-waves

- name: Test non-streaming decoding files (macOS)
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)

set(SHERPA_ONNX_VERSION "1.8.3")
set(SHERPA_ONNX_VERSION "1.8.4")

# Disable warning about
#
Expand Down
2 changes: 1 addition & 1 deletion c-api-examples/offline-tts-c-api.c
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ int32_t main(int32_t argc, char *argv[]) {
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid);

SherpaOnnxDestroyOfflineWriteWave(audio, filename);
SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
SherpaOnnxDestroyOfflineTts(tts);
Expand Down
6 changes: 0 additions & 6 deletions go-api-examples/non-streaming-decode-files/go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
module non-streaming-decode-files

go 1.12

require (
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha
github.com/spf13/pflag v1.0.5
github.com/youpy/go-wav v0.3.2
)
35 changes: 0 additions & 35 deletions go-api-examples/non-streaming-decode-files/go.sum

This file was deleted.

3 changes: 3 additions & 0 deletions go-api-examples/non-streaming-tts/go.mod
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module non-streaming-tts

go 1.12
61 changes: 61 additions & 0 deletions go-api-examples/non-streaming-tts/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package main

import (
sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
flag "github.com/spf13/pflag"
"log"
)

func main() {
log.SetFlags(log.LstdFlags | log.Lmicroseconds)

config := sherpa.OfflineTtsConfig{}
sid := 0
filename := "./generated.wav"

flag.StringVar(&config.Model.Vits.Model, "vits-model", "", "Path to the vits ONNX model")
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")

flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")

flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")

flag.IntVar(&sid, "sid", 0, "Speaker ID. Used only for multi-speaker models")
flag.StringVar(&filename, "output-filename", "./generated.wav", "Filename to save the generated audio")

flag.Parse()

if len(flag.Args()) != 1 {
log.Fatalf("Please provide the text to generate audios")
}

text := flag.Arg(0)

log.Println("Input text:", text)
log.Println("Speaker ID:", sid)
log.Println("Output filename:", filename)

log.Println("Initializing model (may take several seconds)")

tts := sherpa.NewOfflineTts(&config)
defer sherpa.DeleteOfflineTts(tts)

log.Println("Model created!")

log.Println("Start generating!")

audio := tts.Generate(text, sid)

log.Println("Done!")

ok := audio.Save(filename)
if ok != 1 {
log.Fatalf("Failed to write", filename)
}

}
14 changes: 14 additions & 0 deletions go-api-examples/non-streaming-tts/run-vits-ljs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/usr/bin/env bash

# please refer to
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#ljspeech-english-single-speaker
# to download the model before you run this script

./non-streaming-tts \
--vits-model=./vits-ljs/vits-ljs.onnx \
--vits-lexicon=./vits-ljs/lexicon.txt \
--vits-tokens=./vits-ljs/tokens.txt \
--sid=0 \
--debug=1 \
--output-filename=./vits-ljs.wav \
"Liliana, the most beautiful and lovely assistant of our team!"
16 changes: 16 additions & 0 deletions go-api-examples/non-streaming-tts/run-vits-vctk.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash

# please refer to
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers
# to download the model before you run this script

for sid in 0 10 108; do
./non-streaming-tts \
--vits-model=./vits-vctk/vits-vctk.onnx \
--vits-lexicon=./vits-vctk/lexicon.txt \
--vits-tokens=./vits-vctk/tokens.txt \
--sid=0 \
--debug=1 \
--output-filename=./kennedy-$sid.wav \
'Ask not what your country can do for you; ask what you can do for your country.'
done
16 changes: 16 additions & 0 deletions go-api-examples/non-streaming-tts/run-vits-zh-aishell3.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/usr/bin/env bash

# please refer to
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#aishell3-chinese-multi-speaker-174-speakers
# to download the model before you run this script

for sid in 10 33 99; do
./non-streaming-tts \
--vits-model=./vits-zh-aishell3/vits-aishell3.onnx \
--vits-lexicon=./vits-zh-aishell3/lexicon.txt \
--vits-tokens=./vits-zh-aishell3/tokens.txt \
--sid=10 \
--debug=1 \
--output-filename=./liliana-$sid.wav \
"林美丽最美丽、最漂亮、最可爱!"
done
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
module real-time-speech-recognition-from-microphone

go 1.12

require (
github.com/gordonklaus/portaudio v0.0.0-20230709114228-aafa478834f5
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha
github.com/spf13/pflag v1.0.5
)

This file was deleted.

6 changes: 0 additions & 6 deletions go-api-examples/streaming-decode-files/go.mod
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
module streaming-decode-files

go 1.12

require (
github.com/k2-fsa/sherpa-onnx-go v1.7.12-alpha
github.com/spf13/pflag v1.0.5
github.com/youpy/go-wav v0.3.2
)
Loading
Loading