speechToText method
Transcribes spoken audio bytes into text.
audioBytes Raw or encoded audio data (e.g. mp3, wav).
mimeType The MIME type of the audio.
language Optional ISO code to force parsing in a specific language (e.g., 'en', 'ar').
Implementation
@override
Future<String> speechToText(
List<int> audioBytes, {
String? mimeType,
String? language,
}) async {
final uri = Uri.parse('$_baseUrl/audio/transcriptions');
final request = http.MultipartRequest('POST', uri);
request.headers['Authorization'] = 'Bearer ${config.apiKey}';
request.fields['model'] = 'whisper-1';
if (language != null) request.fields['language'] = language;
// Guess extension from mime type
String filename = 'audio.mp3';
if (mimeType != null) {
if (mimeType.contains('wav')) filename = 'audio.wav';
if (mimeType.contains('m4a')) filename = 'audio.m4a';
if (mimeType.contains('mp4')) filename = 'audio.mp4';
}
request.files.add(
http.MultipartFile.fromBytes(
'file',
audioBytes,
filename: filename,
),
);
final streamedResponse = await _httpClient.send(request);
final response = await http.Response.fromStream(streamedResponse);
final json = jsonDecode(response.body) as Map<String, dynamic>;
if (response.statusCode != 200) {
throw _parseError(response.statusCode, json);
}
return json['text'] as String;
}