Skip to content

Commit

Permalink
Handle invalid utf8 sequence from Whisper for Dart API. (#1106)
Browse files Browse the repository at this point in the history
Fixes #1104
  • Loading branch information
csukuangfj authored Jul 10, 2024
1 parent 08c7585 commit 5a2603f
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 2 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.10.14 (to-be-released)

* Fix invalid utf8 sequence from Whisper for Dart API.

## 1.10.13

* Update onnxruntime from 1.17.1 to 1.18.0
Expand Down
3 changes: 2 additions & 1 deletion flutter/sherpa_onnx/lib/src/offline_recognizer.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart';
import './feature_config.dart';
import './offline_stream.dart';
import './sherpa_onnx_bindings.dart';
import './utils.dart';

class OfflineTransducerModelConfig {
const OfflineTransducerModelConfig({
Expand Down Expand Up @@ -287,7 +288,7 @@ class OfflineRecognizer {
return OfflineRecognizerResult(text: '', tokens: [], timestamps: []);
}

final parsedJson = jsonDecode(json.toDartString());
final parsedJson = jsonDecode(toDartString(json));

SherpaOnnxBindings.destroyOfflineStreamResultJson?.call(json);

Expand Down
3 changes: 2 additions & 1 deletion flutter/sherpa_onnx/lib/src/online_recognizer.dart
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import 'package:ffi/ffi.dart';
import './feature_config.dart';
import './online_stream.dart';
import './sherpa_onnx_bindings.dart';
import './utils.dart';

class OnlineTransducerModelConfig {
const OnlineTransducerModelConfig({
Expand Down Expand Up @@ -268,7 +269,7 @@ class OnlineRecognizer {
return OnlineRecognizerResult(text: '', tokens: [], timestamps: []);
}

final parsedJson = jsonDecode(json.toDartString());
final parsedJson = jsonDecode(toDartString(json));

SherpaOnnxBindings.destroyOnlineStreamResultJson?.call(json);

Expand Down
25 changes: 25 additions & 0 deletions flutter/sherpa_onnx/lib/src/utils.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Copyright (c) 2024 Xiaomi Corporation
import 'dart:convert';
import 'dart:ffi';
import 'dart:typed_data';

import 'package:ffi/ffi.dart';

int _strLen(Pointer<Uint8> codeUnits) {
// this function is copied from
// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L52
var length = 0;
while (codeUnits[length] != 0) {
length++;
}
return length;
}

// This function is modified from
// https://github.com/dart-archive/ffi/blob/main/lib/src/utf8.dart#L41
// It ignores invalid utf8 sequence
String toDartString(Pointer<Utf8> s) {
final codeUnits = s.cast<Uint8>();
final length = _strLen(codeUnits);
return utf8.decode(codeUnits.asTypedList(length), allowMalformed: true);
}

0 comments on commit 5a2603f

Please sign in to comment.