fromCsv static method

Future<DataFrame> fromCsv({
  1. String? path,
  2. Stream<List<int>>? rowStream,
  3. Encoding decoding = utf8,
  4. String fieldDelimiter = ',',
  5. String textDelimiter = '"',
  6. @Deprecated('No longer used; csv v8 auto-detects line endings') String eolToken = '\n',
  7. bool containsHeader = true,
  8. List<String>? columnNames,
  9. List<String>? skipColumns,
  10. int? maxRows,
  11. Set<Record?> parseAsNull = const {},
  12. bool convertNumeric = true,
  13. bool convertDates = true,
  14. String? datePattern,
})

Build a dataframe from csv data.

Pass either a csv file path or a rowStream, which you may process beforehand in some way.

fieldDelimiter and textDelimiter are forwarded to the underlying CSV decoder. Line endings are detected automatically; eolToken is kept for backwards compatibility but is not used during parsing.

If containsHeader is set to true (default), the first row of the parsed csv data will be used as column names. Otherwise, columnNames must be provided.

Passing parseAsNull leads to the specified values being replaced by null.

Upon skipColumns being specified, the corresponding columns will not be added to the data frame. Likewise, only maxRows rows of csv data, excluding the optionally included header row, will be read if specified.

convertNumeric enables automatic parsing of numeric values by the CSV decoder. convertDates attempts to parse values of each column as dates. This conversion may be parametrized via datePattern. A datetime like '13.08.2022' could be parsed with 'dd.MM.yyyy'.

Implementation

static Future<DataFrame> fromCsv(
    {String? path,
    Stream<List<int>>? rowStream,
    Encoding decoding = utf8,
    String fieldDelimiter = ',',
    String textDelimiter = '"',
    @Deprecated('No longer used; csv v8 auto-detects line endings')
    String eolToken = '\n',
    bool containsHeader = true,
    List<String>? columnNames,
    List<String>? skipColumns,
    int? maxRows,
    Set<Record> parseAsNull = const {},
    bool convertNumeric = true,
    bool convertDates = true,
    String? datePattern}) async {
  // do argument validity checks
  if (!((path == null) ^ (rowStream == null))) {
    throw ArgumentError('Pass either a file path or a row stream');
  }

  StreamTransformer<List<int>, String> decoder;
  try {
    decoder = decoding.decoder as StreamTransformer<List<int>, String>;
  } on TypeError catch (_, s) {
    throw ArgumentError(
        'Pass codec whose .decoder property is of type StreamTransformer<List<int>, String>: $s');
  }

  if (!containsHeader && columnNames == null) {
    throw ArgumentError(
        'Pass column names if the csv does not contain a header row');
  }

  // extract fields
  if (path != null) {
    rowStream = File(path).openRead();
  }

  var csvRowStream = rowStream!.transform(decoder).transform(
        CsvDecoder(
          fieldDelimiter: fieldDelimiter,
          quoteCharacter: textDelimiter,
          dynamicTyping: convertNumeric,
        ),
      );

  // take only {maxRows} rows if passed
  if (maxRows != null) {
    csvRowStream = csvRowStream.take(maxRows + (containsHeader ? 1 : 0));
  }

  final fields =
      await csvRowStream.map((row) => row.cast<Record>().toList()).toList();

  // if no columnNames passed, get them from fields
  if (columnNames == null) {
    columnNames = fields.removeAt(0).cast<String>();
  }

  // instantiate DataFrame
  final df = DataFrame.fromNamesAndData(columnNames, fields);

  // skip columns if required
  if (skipColumns != null) {
    skipColumns.forEach((name) => df.removeColumn(name));
  }

  // convert records present in [parseAsNull] to null if required;
  //
  // NOTE: this should really be done by the Csv decoder, however there's no
  // respective parameter to do so. Iterating twice over the entirety of the data
  // introduces a ton of overhead
  if (parseAsNull.isNotEmpty) {
    df.forEachIndexed((i, row) {
      row.forEachIndexed((j, record) {
        if (parseAsNull.contains(record)) {
          df[i][j] = null;
        }
      });
    });
  }

  // attempt to convert dates if required
  if (convertDates) {
    for (final name in df._trackedColumnNames) {
      try {
        df.transformColumn(
            name,
            (element) => element != null
                ? Jiffy.parse(element, pattern: datePattern).dateTime
                : null);
      } catch (_) {}
    }
  }

  return df;
}