-
Notifications
You must be signed in to change notification settings - Fork 477
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Python binding for online punctuation models (#1312)
- Loading branch information
Showing
8 changed files
with
133 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
This script shows how to add punctuations to text using sherpa-onnx Python API. | ||
Please download the model from | ||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models | ||
The following is an example | ||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
tar xvf sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
rm sherpa-onnx-online-punct-en-2024-08-06.tar.bz2 | ||
""" | ||
|
||
from pathlib import Path | ||
|
||
import sherpa_onnx | ||
|
||
|
||
def main(): | ||
model = "./sherpa-onnx-online-punct-en-2024-08-06/model.onnx" | ||
bpe = "./sherpa-onnx-online-punct-en-2024-08-06/bpe.vocab" | ||
if not Path(model).is_file(): | ||
raise ValueError(f"{model} does not exist") | ||
if not Path(bpe).is_file(): | ||
raise ValueError(f"{bpe} does not exist") | ||
|
||
model_config = sherpa_onnx.OnlinePunctuationModelConfig( | ||
cnn_bilstm=model, bpe_vocab=bpe | ||
) | ||
config = sherpa_onnx.OnlinePunctuationConfig(model_config=model_config) | ||
punct = sherpa_onnx.OnlinePunctuation(config) | ||
|
||
texts = [ | ||
"how are you i am fine thank you", | ||
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", | ||
] | ||
for text in texts: | ||
text_with_punct = punct.add_punctuation_with_case(text) | ||
print("----------") | ||
print(f"input : {text}") | ||
print(f"output: {text_with_punct}") | ||
print("----------") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// sherpa-onnx/python/csrc/online-punctuation.cc | ||
// | ||
// Copyright (c) 2024 | ||
|
||
#include "sherpa-onnx/python/csrc/online-punctuation.h" | ||
|
||
#include "sherpa-onnx/csrc/online-punctuation.h" | ||
|
||
namespace sherpa_onnx { | ||
|
||
static void PybindOnlinePunctuationModelConfig(py::module *m) { | ||
using PyClass = OnlinePunctuationModelConfig; | ||
py::class_<PyClass>(*m, "OnlinePunctuationModelConfig") | ||
.def(py::init<>()) | ||
.def(py::init<const std::string &, const std::string &, int32_t, bool, const std::string &>(), | ||
py::arg("cnn_bilstm"), py::arg("bpe_vocab"), py::arg("num_threads") = 1, | ||
py::arg("debug") = false, py::arg("provider") = "cpu") | ||
.def_readwrite("cnn_bilstm", &PyClass::cnn_bilstm) | ||
.def_readwrite("bpe_vocab", &PyClass::bpe_vocab) | ||
.def_readwrite("num_threads", &PyClass::num_threads) | ||
.def_readwrite("debug", &PyClass::debug) | ||
.def_readwrite("provider", &PyClass::provider) | ||
.def("validate", &PyClass::Validate) | ||
.def("__str__", &PyClass::ToString); | ||
} | ||
|
||
static void PybindOnlinePunctuationConfig(py::module *m) { | ||
PybindOnlinePunctuationModelConfig(m); | ||
using PyClass = OnlinePunctuationConfig; | ||
|
||
py::class_<PyClass>(*m, "OnlinePunctuationConfig") | ||
.def(py::init<>()) | ||
.def(py::init<const OnlinePunctuationModelConfig &>(), py::arg("model_config")) | ||
.def_readwrite("model_config", &PyClass::model) | ||
.def("validate", &PyClass::Validate) | ||
.def("__str__", &PyClass::ToString); | ||
} | ||
|
||
void PybindOnlinePunctuation(py::module *m) { | ||
PybindOnlinePunctuationConfig(m); | ||
using PyClass = OnlinePunctuation; | ||
|
||
py::class_<PyClass>(*m, "OnlinePunctuation") | ||
.def(py::init<const OnlinePunctuationConfig &>(), py::arg("config"), | ||
py::call_guard<py::gil_scoped_release>()) | ||
.def("add_punctuation_with_case", &PyClass::AddPunctuationWithCase, py::arg("text"), | ||
py::call_guard<py::gil_scoped_release>()); | ||
} | ||
|
||
} // namespace sherpa_onnx |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// sherpa-onnx/python/csrc/online-punctuation.h | ||
// | ||
// Copyright (c) 2024 | ||
|
||
#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_ | ||
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_ | ||
|
||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h" | ||
|
||
namespace sherpa_onnx { | ||
|
||
void PybindOnlinePunctuation(py::module *m); | ||
|
||
} | ||
|
||
#endif // SHERPA_ONNX_PYTHON_CSRC_ONLINE_PUNCTUATION_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters