From b2c283fa2b0377281d6b4711c36bc5d4119ef4dd Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 15 Jul 2024 15:30:40 +0800 Subject: [PATCH] Add Swift API for adding punctuations to text. (#1132) --- .github/scripts/test-swift.sh | 4 ++ swift-api-examples/.gitignore | 1 + swift-api-examples/SherpaOnnx.swift | 49 ++++++++++++++++++++++ swift-api-examples/add-punctuations.swift | 31 ++++++++++++++ swift-api-examples/run-add-punctuations.sh | 34 +++++++++++++++ 5 files changed, 119 insertions(+) create mode 100644 swift-api-examples/add-punctuations.swift create mode 100755 swift-api-examples/run-add-punctuations.sh diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh index 875c4fa34..18c9bed41 100755 --- a/.github/scripts/test-swift.sh +++ b/.github/scripts/test-swift.sh @@ -7,6 +7,10 @@ echo "pwd: $PWD" cd swift-api-examples ls -lh +./run-add-punctuations.sh +rm ./add-punctuations +rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 + ./run-keyword-spotting-from-file.sh rm ./keyword-spotting-from-file rm -rf sherpa-onnx-kws-* diff --git a/swift-api-examples/.gitignore b/swift-api-examples/.gitignore index 794cabec8..97b559df4 100644 --- a/swift-api-examples/.gitignore +++ b/swift-api-examples/.gitignore @@ -9,3 +9,4 @@ sherpa-onnx-paraformer-zh-2023-09-14 *.bak streaming-hlg-decode-file keyword-spotting-from-file +add-punctuations diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index fe9b870e7..eba8d8916 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -957,3 +957,52 @@ class SherpaOnnxKeywordSpotterWrapper { InputFinished(stream) } } + +// Punctuation + +func sherpaOnnxOfflinePunctuationModelConfig( + ctTransformer: String, + numThreads: Int = 1, + debug: Int = 0, + provider: String = "cpu" +) -> SherpaOnnxOfflinePunctuationModelConfig { + return SherpaOnnxOfflinePunctuationModelConfig( + ct_transformer: toCPointer(ctTransformer), + num_threads: Int32(numThreads), + debug: Int32(debug), + provider: toCPointer(provider) + ) +} + +func sherpaOnnxOfflinePunctuationConfig( + model: SherpaOnnxOfflinePunctuationModelConfig +) -> SherpaOnnxOfflinePunctuationConfig { + return SherpaOnnxOfflinePunctuationConfig( + model: model + ) +} + +class SherpaOnnxOfflinePunctuationWrapper { + /// A pointer to the underlying counterpart in C + let ptr: OpaquePointer! + + /// Constructor taking a model config + init( + config: UnsafePointer! + ) { + ptr = SherpaOnnxCreateOfflinePunctuation(config) + } + + deinit { + if let ptr { + SherpaOnnxDestroyOfflinePunctuation(ptr) + } + } + + func addPunct(text: String) -> String { + let cText = SherpaOfflinePunctuationAddPunct(ptr, toCPointer(text)) + let ans = String(cString: cText!) + SherpaOfflinePunctuationFreeText(cText) + return ans + } +} diff --git a/swift-api-examples/add-punctuations.swift b/swift-api-examples/add-punctuations.swift new file mode 100644 index 000000000..5c1ee32ca --- /dev/null +++ b/swift-api-examples/add-punctuations.swift @@ -0,0 +1,31 @@ +func run() { + let model = "./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx" + let modelConfig = sherpaOnnxOfflinePunctuationModelConfig( + ctTransformer: model, + numThreads: 1, + debug: 1, + provider: "cpu" + ) + var config = sherpaOnnxOfflinePunctuationConfig(model: modelConfig) + + let punct = SherpaOnnxOfflinePunctuationWrapper(config: &config) + + let textList = [ + "这是一个测试你好吗How are you我很好thank you are you ok谢谢你", + "我们都是木头人不会说话不会动", + "The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry", + ] + + for i in 0..