speechToText method

  1. @override
Future<String> speechToText(
  1. List<int> audioBytes, {
  2. String? mimeType,
  3. String? language,
})

Transcribes spoken audio bytes into text.

audioBytes Raw or encoded audio data (e.g. mp3, wav). mimeType The MIME type of the audio. language Optional ISO code to force parsing in a specific language (e.g., 'en', 'ar').

Implementation

@override
Future<String> speechToText(
  List<int> audioBytes, {
  String? mimeType,
  String? language,
}) async {
  final uri = Uri.parse('$_baseUrl/audio/transcriptions');
  final request = http.MultipartRequest('POST', uri);
  request.headers['Authorization'] = 'Bearer ${config.apiKey}';
  request.fields['model'] = 'whisper-1';
  if (language != null) request.fields['language'] = language;

  // Guess extension from mime type
  String filename = 'audio.mp3';
  if (mimeType != null) {
    if (mimeType.contains('wav')) filename = 'audio.wav';
    if (mimeType.contains('m4a')) filename = 'audio.m4a';
    if (mimeType.contains('mp4')) filename = 'audio.mp4';
  }

  request.files.add(
    http.MultipartFile.fromBytes(
      'file',
      audioBytes,
      filename: filename,
    ),
  );

  final streamedResponse = await _httpClient.send(request);
  final response = await http.Response.fromStream(streamedResponse);
  final json = jsonDecode(response.body) as Map<String, dynamic>;

  if (response.statusCode != 200) {
    throw _parseError(response.statusCode, json);
  }
  return json['text'] as String;
}