/// <summary> /// Merge 2 datatables together assuming no common join key. /// This will collapse common columns, but keep all rows. /// This needs to deal with columns being in different orders /// </summary> /// <param name="tables">set of tables to merge together</param> /// <returns>a merged table. The rows may be in a random order.</returns> public static MutableDataTable Join(IEnumerable <DataTable> tables) { var dict = new Dictionary2d <string, string, string>(); int counter = 0; foreach (var dt in tables) { Add(dt, dict, ref counter); } var merge = DataTable.New.From2dDictionary(dict); // remove extra column that Dict2d added, and reorder to more closely match dataset var mutable = DataTable.New.GetMutableCopy(merge); IEnumerable <string> columnNames = new string[0]; foreach (var dt in tables) { columnNames = columnNames.Concat(dt.ColumnNames); } var names = columnNames.Distinct(StringComparer.OrdinalIgnoreCase); var x = names.ToArray(); mutable.KeepColumns(x); return(mutable); }
// Read the excel sheet from the workbook and return as a data table. // Return null if sheet is empty. private static MutableDataTable ReadSheet(WorkbookPart wbPart, Sheet sheet) { string sheetName = sheet.Name.Value; // Retrieve a reference to the worksheet part. WorksheetPart wsPart = (WorksheetPart)(wbPart.GetPartById(sheet.Id)); IEnumerable <Cell> cells = wsPart.Worksheet.Descendants <Cell>(); Dictionary2d <int, int, string> vals = new Dictionary2d <int, int, string>(); foreach (Cell c in cells) { var val = CellToText(wbPart, c); var loc = c.CellReference; var loc2 = ParseRef(loc); int columnId = loc2.Item1; int rowId = loc2.Item2; vals[rowId, columnId] = val; } if (vals.Count > 0) { MutableDataTable dt = ToTable(vals); dt.Name = sheetName; return(dt); } return(null); }
// Convert a 2d dict into a 2d data table. // TKey1 is rows, TKey1 is columns. // Data table column names are obtained from key values. // Column 0 is set of row values. internal static MutableDataTable ToTable <TKey1, TKey2, TValue>(Dictionary2d <TKey1, TKey2, TValue> dict) { // TKey1 is rows, TKey2 is values. MutableDataTable d = new MutableDataTable(); var rows = dict.Key1; int count = rows.Count(); // Set columns var columns = dict.Key2.ToArray(); { Column[] cs = new Column[columns.Length + 1]; cs[0] = new Column("row name", count); for (int ic = 0; ic < columns.Length; ic++) { cs[ic + 1] = new Column(columns[ic].ToString(), count); } d.Columns = cs; } // Add rows int i = 0; foreach (var row in rows) { d.Columns[0].Values[i] = row.ToString(); for (int ic = 0; ic < columns.Length; ic++) { d.Columns[ic + 1].Values[i] = dict[row, columns[ic]].ToString(); } i++; } return(d); }
static void Add(DataTable table, Dictionary2d <string, string, string> dict, ref int counter) { foreach (var row in table.Rows) { int i = 0; foreach (var name in row.ColumnNames) { var value = row.Values[i]; dict[counter.ToString(), name] = value; i++; } counter++; } }
static void Add(DataTable table, Dictionary2d <string, string, string> dict, ref int counter) { var names = (from name in table.ColumnNames select name.ToLowerInvariant()).ToArray(); foreach (var row in table.Rows) { int i = 0; foreach (var name in names) { var value = row.Values[i]; dict[counter.ToString(), name] = value; i++; } counter++; } }
// skip access private static MutableDataTable ToTable <TValue>(Dictionary2d <int, int, TValue> dict) { // TKey1 is rows, TKey2 is values. MutableDataTable d = new MutableDataTable(); var rows = dict.Key1; int count = rows.Count() - 1; // Set columns var columns = dict.Key2.ToArray(); { Column[] cs = new Column[columns.Length]; for (int ic = 0; ic < columns.Length; ic++) { // fix for empty column name string columnName = dict[0, columns[ic]] == null ? string.Empty : dict[0, columns[ic]].ToString();; cs[ic] = new Column(columnName, count); } d.Columns = cs; } // Add rows int i = 0; foreach (var row in rows) { i++; if (i == 1) { continue; // skip 1st row, header } for (int ic = 0; ic < columns.Length; ic++) { var value = dict[row, columns[ic]]; string s = (value == null) ? string.Empty : value.ToString(); d.Columns[ic].Values[i - 2] = s; } } return(d); }
// Read the excel sheet from the workbook and return as a data table. // Return null if sheet is empty. private static MutableDataTable ReadSheet(WorkbookPart wbPart, Sheet sheet) { string sheetName = sheet.Name.Value; // Retrieve a reference to the worksheet part. WorksheetPart wsPart = (WorksheetPart)(wbPart.GetPartById(sheet.Id)); IEnumerable <Cell> cells = wsPart.Worksheet.Descendants <Cell>(); Dictionary2d <int, int, string> vals = new Dictionary2d <int, int, string>(); // Retrieve a cached list of shared strings of this workbook to be used by all cell references IList <OpenXmlElement> sharedStrings = wbPart.GetPartsOfType <SharedStringTablePart>().Select(sharedString => sharedString.SharedStringTable.OfType <OpenXmlElement>().ToList()).FirstOrDefault(); foreach (Cell c in cells) { var val = CellToText(wbPart, c, sharedStrings); var loc = c.CellReference; var loc2 = ParseRef(loc); int columnId = loc2.Item1; int rowId = loc2.Item2; vals[rowId, columnId] = val; } sharedStrings.Clear(); if (vals.Count > 0) { MutableDataTable dt = ToTable(vals); dt.Name = sheetName; return(dt); } return(null); }
/// <summary> /// Copy the 2d-dictionary into a in-memory table. This is ideal for creating a sparse table from a dictionary. /// Column names are inferred from key values. /// </summary> public static MutableDataTable From2dDictionary <TKeyRow, TKeyColumn, TValue>(this DataTableBuilder builder, Dictionary2d <TKeyRow, TKeyColumn, TValue> dict) { Debug.Assert(builder != null); return(Utility.ToTable(dict)); }