/// <summary> /// Given a potentially extremely large table, shred it into smaller CSV files based on the values in columnName. /// This can be very useful for easily building an index for a large file. /// For each unique value in column, funcCreateStream is invoked with that value to get a TextWriter. The csv is written to that writer. /// The ordering within each small file is preserved /// This stream based overload is useful when you need to avoid writing to the local file system (such as with Azure storage) /// </summary> /// <param name="table">original table to shred</param> /// <param name="funcCreateStream">callback function to create a stream for each new table.</param> /// <param name="columnName">column name to use for shredding. You can use <see cref="GetColumnValueCounts"/> /// to see the variation in each column to determine a good column to use for shredding. /// </param> public static void Shred(DataTable table, string columnName, Func <string, TextWriter> funcCreateStream) { Dictionary <string, TextWriter> dict = new Dictionary <string, TextWriter>(); try { foreach (Row row in table.Rows) { TextWriter tw; string val = row[columnName]; if (!dict.TryGetValue(val, out tw)) { // New value tw = funcCreateStream(val); dict[val] = tw; CsvWriter.RawWriteLine(table.ColumnNames, tw); // header } CsvWriter.RawWriteLine(row.Values, tw); } } finally { foreach (var kv in dict) { kv.Value.Close(); } } }
// Write this single row to a CSV file internal void WriteCsv(TextWriter tw) { CsvWriter.RawWriteLine(this.Values, tw); }