From 3ba9a4932f52b86d668821e0036b44495cf649f9 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 20 Oct 2023 12:06:30 +0800 Subject: [PATCH] Support printing input text and words after splitting (#376) --- sherpa-onnx/csrc/lexicon.cc | 33 +++++++++++++++++++++++- sherpa-onnx/csrc/lexicon.h | 4 ++- sherpa-onnx/csrc/offline-tts-vits-impl.h | 3 ++- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/sherpa-onnx/csrc/lexicon.cc b/sherpa-onnx/csrc/lexicon.cc index 87203f4e1..8dafe7bfe 100644 --- a/sherpa-onnx/csrc/lexicon.cc +++ b/sherpa-onnx/csrc/lexicon.cc @@ -76,7 +76,9 @@ static std::vector ConvertTokensToIds( } Lexicon::Lexicon(const std::string &lexicon, const std::string &tokens, - const std::string &punctuations, const std::string &language) { + const std::string &punctuations, const std::string &language, + bool debug /*= false*/) + : debug_(debug) { InitLanguage(language); InitTokens(tokens); InitLexicon(lexicon); @@ -102,6 +104,20 @@ std::vector Lexicon::ConvertTextToTokenIdsChinese( const std::string &text) const { std::vector words = SplitUtf8(text); + if (debug_) { + fprintf(stderr, "Input text in string: %s\n", text.c_str()); + fprintf(stderr, "Input text in bytes:"); + for (uint8_t c : text) { + fprintf(stderr, " %02x", c); + } + fprintf(stderr, "\n"); + fprintf(stderr, "After splitting to words:"); + for (const auto &w : words) { + fprintf(stderr, " %s", w.c_str()); + } + fprintf(stderr, "\n"); + } + std::vector ans; auto sil = token2id_.at("sil"); @@ -134,6 +150,21 @@ std::vector Lexicon::ConvertTextToTokenIdsEnglish( ToLowerCase(&text); std::vector words = SplitUtf8(text); + + if (debug_) { + fprintf(stderr, "Input text (lowercase) in string: %s\n", text.c_str()); + fprintf(stderr, "Input text in bytes:"); + for (uint8_t c : text) { + fprintf(stderr, " %02x", c); + } + fprintf(stderr, "\n"); + fprintf(stderr, "After splitting to words:"); + for (const auto &w : words) { + fprintf(stderr, " %s", w.c_str()); + } + fprintf(stderr, "\n"); + } + int32_t blank = token2id_.at(" "); std::vector ans; diff --git a/sherpa-onnx/csrc/lexicon.h b/sherpa-onnx/csrc/lexicon.h index 35bace8f8..211eb607f 100644 --- a/sherpa-onnx/csrc/lexicon.h +++ b/sherpa-onnx/csrc/lexicon.h @@ -17,7 +17,8 @@ namespace sherpa_onnx { class Lexicon { public: Lexicon(const std::string &lexicon, const std::string &tokens, - const std::string &punctuations, const std::string &language); + const std::string &punctuations, const std::string &language, + bool debug = false); std::vector ConvertTextToTokenIds(const std::string &text) const; @@ -45,6 +46,7 @@ class Lexicon { std::unordered_set punctuations_; std::unordered_map token2id_; Language language_; + bool debug_; // }; diff --git a/sherpa-onnx/csrc/offline-tts-vits-impl.h b/sherpa-onnx/csrc/offline-tts-vits-impl.h index af1a001eb..6b19024e0 100644 --- a/sherpa-onnx/csrc/offline-tts-vits-impl.h +++ b/sherpa-onnx/csrc/offline-tts-vits-impl.h @@ -21,7 +21,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { explicit OfflineTtsVitsImpl(const OfflineTtsConfig &config) : model_(std::make_unique(config.model)), lexicon_(config.model.vits.lexicon, config.model.vits.tokens, - model_->Punctuations(), model_->Language()) {} + model_->Punctuations(), model_->Language(), + config.model.debug) {} GeneratedAudio Generate(const std::string &text, int64_t sid = 0) const override {