fromCsv static method
- String? path,
- Stream<
List< ? rowStream,int> > - Encoding decoding = utf8,
- String fieldDelimiter = ',',
- String textDelimiter = '"',
- @Deprecated('No longer used; csv v8 auto-detects line endings') String eolToken = '\n',
- bool containsHeader = true,
- List<
String> ? columnNames, - List<
String> ? skipColumns, - int? maxRows,
- Set<
Record?> parseAsNull = const {}, - bool convertNumeric = true,
- bool convertDates = true,
- String? datePattern,
Build a dataframe from csv data.
Pass either a csv file path or a rowStream, which you may process
beforehand in some way.
fieldDelimiter and textDelimiter are forwarded to the underlying
CSV decoder. Line endings are detected automatically; eolToken
is kept for backwards compatibility but is not used during parsing.
If containsHeader is set to true (default), the first row of the
parsed csv data will be used as column names. Otherwise,
columnNames must be provided.
Passing parseAsNull leads to the specified values being replaced by null.
Upon skipColumns being specified, the corresponding columns will not be
added to the data frame. Likewise, only maxRows rows of csv data,
excluding the optionally included header row, will be read if specified.
convertNumeric enables automatic parsing of numeric values by the CSV
decoder. convertDates attempts to parse values of each column as dates.
This conversion may be parametrized via datePattern.
A datetime like '13.08.2022' could be parsed with 'dd.MM.yyyy'.
Implementation
static Future<DataFrame> fromCsv(
{String? path,
Stream<List<int>>? rowStream,
Encoding decoding = utf8,
String fieldDelimiter = ',',
String textDelimiter = '"',
@Deprecated('No longer used; csv v8 auto-detects line endings')
String eolToken = '\n',
bool containsHeader = true,
List<String>? columnNames,
List<String>? skipColumns,
int? maxRows,
Set<Record> parseAsNull = const {},
bool convertNumeric = true,
bool convertDates = true,
String? datePattern}) async {
// do argument validity checks
if (!((path == null) ^ (rowStream == null))) {
throw ArgumentError('Pass either a file path or a row stream');
}
StreamTransformer<List<int>, String> decoder;
try {
decoder = decoding.decoder as StreamTransformer<List<int>, String>;
} on TypeError catch (_, s) {
throw ArgumentError(
'Pass codec whose .decoder property is of type StreamTransformer<List<int>, String>: $s');
}
if (!containsHeader && columnNames == null) {
throw ArgumentError(
'Pass column names if the csv does not contain a header row');
}
// extract fields
if (path != null) {
rowStream = File(path).openRead();
}
var csvRowStream = rowStream!.transform(decoder).transform(
CsvDecoder(
fieldDelimiter: fieldDelimiter,
quoteCharacter: textDelimiter,
dynamicTyping: convertNumeric,
),
);
// take only {maxRows} rows if passed
if (maxRows != null) {
csvRowStream = csvRowStream.take(maxRows + (containsHeader ? 1 : 0));
}
final fields =
await csvRowStream.map((row) => row.cast<Record>().toList()).toList();
// if no columnNames passed, get them from fields
if (columnNames == null) {
columnNames = fields.removeAt(0).cast<String>();
}
// instantiate DataFrame
final df = DataFrame.fromNamesAndData(columnNames, fields);
// skip columns if required
if (skipColumns != null) {
skipColumns.forEach((name) => df.removeColumn(name));
}
// convert records present in [parseAsNull] to null if required;
//
// NOTE: this should really be done by the Csv decoder, however there's no
// respective parameter to do so. Iterating twice over the entirety of the data
// introduces a ton of overhead
if (parseAsNull.isNotEmpty) {
df.forEachIndexed((i, row) {
row.forEachIndexed((j, record) {
if (parseAsNull.contains(record)) {
df[i][j] = null;
}
});
});
}
// attempt to convert dates if required
if (convertDates) {
for (final name in df._trackedColumnNames) {
try {
df.transformColumn(
name,
(element) => element != null
? Jiffy.parse(element, pattern: datePattern).dateTime
: null);
} catch (_) {}
}
}
return df;
}