/// <summary> /// Sort a mutable datatable in place by the given column. /// </summary> /// <param name="dt">dat table to sort</param> /// <param name="columnName">column name to sort on. Throws if missing</param> /// <param name="comparer">Comparer to use on column name</param> public static void Sort(this MutableDataTable dt, string columnName, IComparer <string> comparer) { if (comparer == null) { throw new ArgumentNullException("comparer"); } var column = dt.GetColumn(columnName, throwOnMissing: true); int len = column.Values.Length; int[] map = new int[len]; for (int i = 0; i < len; i++) { map[i] = i; } Array.Sort(column.Values, map, comparer); // Sort other columns for consistency foreach (var c in dt.Columns) { if (c == column) { continue; } string[] newVals = new string[len]; for (int i = 0; i < len; i++) { newVals[i] = c.Values[map[i]]; } c.Values = newVals; } }
// $$$ Clarify - multiple joins (inner, outer, etc) /// <summary> /// Performs a full outer join on two in-memory tables and returns a new table. /// The number of rows in the resulting table is the sum of rows from each source table. /// The number of columns in teh new table is the sum of columns in the the source tables minus 1 /// (since the join column is redundant) /// </summary> /// <param name="d1"></param> /// <param name="d2"></param> /// <param name="columnName">column name to join on. Both tables must have this column name.</param> /// <returns>a new table</returns> public static MutableDataTable Join(MutableDataTable d1, MutableDataTable d2, string columnName) { Column c1 = d1.GetColumn(columnName); if (c1 == null) { throw new InvalidOperationException("Missing column"); } Column c2 = d2.GetColumn(columnName); if (c2 == null) { throw new InvalidOperationException("Missing column"); } // Place d1 in first set of columns, and d2 in second set. int kColumn = d1.Columns.Length; int kTotalColumns = kColumn + d2.Columns.Length; // Indices into new table where join columns are. int joinColumn1 = Utility.GetColumnIndexFromName(d1.ColumnNames, columnName); int joinColumn2 = Utility.GetColumnIndexFromName(d2.ColumnNames, columnName) + kColumn; // $$$ could really optimize. Sort both on column and then zip. Dictionary <string, int> m1 = GetRowIndex(c1); Dictionary <string, int> m2 = GetRowIndex(c2); // $$$ column names may not be unique. //string[] headers = d1.ColumnNames.Union(d2.ColumnNames).ToArray(); string[] headers = new string[kTotalColumns]; Array.Copy(d1.ColumnNames.ToArray(), 0, headers, 0, kColumn); Array.Copy(d2.ColumnNames.ToArray(), 0, headers, kColumn, kTotalColumns - kColumn); string[] values = new string[headers.Length]; string path = GetTempFileName(); using (CsvWriter tw = new CsvWriter(path, headers)) { foreach (var kv in m1) { Clear(values); string key = kv.Key; // join column int r1 = kv.Value; int r2; if (m2.TryGetValue(key, out r2)) { // In both. write out CopyRowIntoArray(values, kColumn, d2, r2); m2.Remove(key); } else { // Only in M1. } CopyRowIntoArray(values, 0, d1, r1); values[joinColumn1] = values[joinColumn2] = key; tw.WriteRow(values); } // We remove all of M1's items from m2, so M2 is just unique items now. (possibly 0). // Tag those onto the end. foreach (var kv in m2) { int r2 = kv.Value; Clear(values); CopyRowIntoArray(values, kColumn, d2, r2); values[joinColumn1] = values[joinColumn2] = kv.Key; tw.WriteRow(values); } } // close tw MutableDataTable t = Reader.ReadCSV(path); DeleteLocalFile(path); // Remove duplicate columns. t.DeleteColumn(joinColumn2); return(t); }