Skip to content

Commit

Permalink
Add C# API for Moonshine models. (#1483)
Browse files Browse the repository at this point in the history
* Also, return timestamps for non-streaming ASR.
  • Loading branch information
csukuangfj authored Oct 27, 2024
1 parent cdd8e1b commit 3622104
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 8 deletions.
3 changes: 3 additions & 0 deletions .github/scripts/test-dot-net.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ rm -fv *.wav
rm -rfv sherpa-onnx-pyannote-*

cd ../offline-decode-files
./run-moonshine.sh
rm -rf sherpa-onnx-*

./run-sense-voice-ctc.sh
rm -rf sherpa-onnx-*

Expand Down
44 changes: 37 additions & 7 deletions dotnet-examples/offline-decode-files/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class Options
{

[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
public int SampleRate { get; set; } = 16000;
public int SampleRate { get; set; } = 16000;

[Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
public int FeatureDim { get; set; } = 80;
Expand All @@ -31,7 +31,7 @@ class Options
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
public string Decoder { get; set; } = "";

[Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
[Option(Required = false, Default = "", HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
public string Joiner { get; set; } = "";

[Option("model-type", Required = false, Default = "", HelpText = "model type")]
Expand All @@ -44,10 +44,22 @@ class Options
public string WhisperDecoder { get; set; } = "";

[Option("whisper-language", Required = false, Default = "", HelpText = "Language of the input file. Can be empty")]
public string WhisperLanguage{ get; set; } = "";
public string WhisperLanguage { get; set; } = "";

[Option("whisper-task", Required = false, Default = "transcribe", HelpText = "transcribe or translate")]
public string WhisperTask{ get; set; } = "transcribe";
public string WhisperTask { get; set; } = "transcribe";

[Option("moonshine-preprocessor", Required = false, Default = "", HelpText = "Path to preprocess.onnx. Used only for Moonshine models")]
public string MoonshinePreprocessor { get; set; } = "";

[Option("moonshine-encoder", Required = false, Default = "", HelpText = "Path to encode.onnx. Used only for Moonshine models")]
public string MoonshineEncoder { get; set; } = "";

[Option("moonshine-uncached-decoder", Required = false, Default = "", HelpText = "Path to uncached_decode.onnx. Used only for Moonshine models")]
public string MoonshineUncachedDecoder { get; set; } = "";

[Option("moonshine-cached-decoder", Required = false, Default = "", HelpText = "Path to cached_decode.onnx. Used only for Moonshine models")]
public string MoonshineCachedDecoder { get; set; } = "";

[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
public string TdnnModel { get; set; } = "";
Expand Down Expand Up @@ -90,7 +102,7 @@ class Options
public float HotwordsScore { get; set; } = 1.5F;

[Option("files", Required = true, HelpText = "Audio files for decoding")]
public IEnumerable<string> Files { get; set; } = new string[] {};
public IEnumerable<string> Files { get; set; } = new string[] { };
}

static void Main(string[] args)
Expand Down Expand Up @@ -236,6 +248,13 @@ private static void Run(Options options)
config.ModelConfig.SenseVoice.Model = options.SenseVoiceModel;
config.ModelConfig.SenseVoice.UseInverseTextNormalization = options.SenseVoiceUseItn;
}
else if (!String.IsNullOrEmpty(options.MoonshinePreprocessor))
{
config.ModelConfig.Moonshine.Preprocessor = options.MoonshinePreprocessor;
config.ModelConfig.Moonshine.Encoder = options.MoonshineEncoder;
config.ModelConfig.Moonshine.UncachedDecoder = options.MoonshineUncachedDecoder;
config.ModelConfig.Moonshine.CachedDecoder = options.MoonshineCachedDecoder;
}
else
{
Console.WriteLine("Please provide a model");
Expand Down Expand Up @@ -273,10 +292,21 @@ private static void Run(Options options)
// display results
for (int i = 0; i != files.Length; ++i)
{
var text = streams[i].Result.Text;
var r = streams[i].Result;
Console.WriteLine("--------------------");
Console.WriteLine(files[i]);
Console.WriteLine(text);
Console.WriteLine("Text: {0}", r.Text);
Console.WriteLine("Tokens: [{0}]", string.Join(", ", r.Tokens));
if (r.Timestamps != null && r.Timestamps.Length > 0) {
Console.Write("Timestamps: [");
var sep = "";
for (int k = 0; k != r.Timestamps.Length; ++k)
{
Console.Write("{0}{1}", sep, r.Timestamps[k].ToString("0.00"));
sep = ", ";
}
Console.WriteLine("]");
}
}
Console.WriteLine("--------------------");
}
Expand Down
18 changes: 18 additions & 0 deletions dotnet-examples/offline-decode-files/run-moonshine.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

set -ex

if [ ! -f ./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
tar xvf sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
rm sherpa-onnx-moonshine-tiny-en-int8.tar.bz2
fi

dotnet run \
--num-threads=2 \
--moonshine-preprocessor=./sherpa-onnx-moonshine-tiny-en-int8/preprocess.onnx \
--moonshine-encoder=./sherpa-onnx-moonshine-tiny-en-int8/encode.int8.onnx \
--moonshine-uncached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/uncached_decode.int8.onnx \
--moonshine-cached-decoder=./sherpa-onnx-moonshine-tiny-en-int8/cached_decode.int8.onnx \
--tokens=./sherpa-onnx-moonshine-tiny-en-int8/tokens.txt \
--files ./sherpa-onnx-moonshine-tiny-en-int8/test_wavs/0.wav
2 changes: 2 additions & 0 deletions scripts/dotnet/OfflineModelConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public OfflineModelConfig()
BpeVocab = "";
TeleSpeechCtc = "";
SenseVoice = new OfflineSenseVoiceModelConfig();
Moonshine = new OfflineMoonshineModelConfig();
}
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
Expand Down Expand Up @@ -54,5 +55,6 @@ public OfflineModelConfig()
public string TeleSpeechCtc;

public OfflineSenseVoiceModelConfig SenseVoice;
public OfflineMoonshineModelConfig Moonshine;
}
}
29 changes: 29 additions & 0 deletions scripts/dotnet/OfflineMoonshineModelConfig.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)

using System.Runtime.InteropServices;

namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineMoonshineModelConfig
{
public OfflineMoonshineModelConfig()
{
Preprocessor = "";
Encoder = "";
UncachedDecoder = "";
CachedDecoder = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Preprocessor;

[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;

[MarshalAs(UnmanagedType.LPStr)]
public string UncachedDecoder;

[MarshalAs(UnmanagedType.LPStr)]
public string CachedDecoder;
}
}
55 changes: 54 additions & 1 deletion scripts/dotnet/OfflineRecognizerResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,70 @@ public OfflineRecognizerResult(IntPtr handle)
byte[] stringBuffer = new byte[length];
Marshal.Copy(impl.Text, stringBuffer, 0, length);
_text = Encoding.UTF8.GetString(stringBuffer);

_tokens = new String[impl.Count];

unsafe
{
byte* buf = (byte*)impl.Tokens;
for (int i = 0; i < impl.Count; i++)
{
length = 0;
byte* start = buf;
while (*buf != 0)
{
++buf;
length += 1;
}
++buf;

stringBuffer = new byte[length];
fixed (byte* pTarget = stringBuffer)
{
for (int k = 0; k < length; k++)
{
pTarget[k] = start[k];
}
}

_tokens[i] = Encoding.UTF8.GetString(stringBuffer);
}
}

unsafe
{
if (impl.Timestamps != IntPtr.Zero)
{
float *t = (float*)impl.Timestamps;
_timestamps = new float[impl.Count];
fixed (float* f = _timestamps)
{
for (int k = 0; k < impl.Count; k++)
{
f[k] = t[k];
}
}
}
}

}

[StructLayout(LayoutKind.Sequential)]
struct Impl
{
public IntPtr Text;
public IntPtr Timestamps;
public int Count;
public IntPtr Tokens;
}

private String _text;
public String Text => _text;
}

private String[] _tokens;
public String[] Tokens => _tokens;

private float[] _timestamps;
public float[] Timestamps => _timestamps;
}
}

0 comments on commit 3622104

Please sign in to comment.