Beispiel #1
0
        /// <summary>
        /// Given a potentially extremely large table, shred it into smaller CSV files based on the values in columnName.
        /// This can be very useful for easily building an index for a large file.
        /// For each unique value in column, funcCreateStream is invoked with that value to get a TextWriter. The csv is written to that writer.
        /// The ordering within each small file is preserved
        /// This stream based overload is useful when you need to avoid writing to the local file system (such as with Azure storage)
        /// </summary>
        /// <param name="table">original table to shred</param>
        /// <param name="funcCreateStream">callback function to create a stream for each new table.</param>
        /// <param name="columnName">column name to use for shredding. You can use <see cref="GetColumnValueCounts"/>
        /// to see the variation in each column to determine a good column to use for shredding.
        /// </param>
        public static void Shred(DataTable table, string columnName, Func <string, TextWriter> funcCreateStream)
        {
            Dictionary <string, TextWriter> dict = new Dictionary <string, TextWriter>();

            try
            {
                foreach (Row row in table.Rows)
                {
                    TextWriter tw;
                    string     val = row[columnName];
                    if (!dict.TryGetValue(val, out tw))
                    {
                        // New value
                        tw        = funcCreateStream(val);
                        dict[val] = tw;
                        CsvWriter.RawWriteLine(table.ColumnNames, tw); // header
                    }
                    CsvWriter.RawWriteLine(row.Values, tw);
                }
            }
            finally
            {
                foreach (var kv in dict)
                {
                    kv.Value.Close();
                }
            }
        }
Beispiel #2
0
 // Write this single row to a CSV file
 internal void WriteCsv(TextWriter tw)
 {
     CsvWriter.RawWriteLine(this.Values, tw);
 }