public void GetMissingColumnIsNull() { MutableDataTable dt = GetTable(); Column c = dt.GetColumn("missing"); Assert.Null(c); }
public void GetColumn() { MutableDataTable dt = GetTable(); Column c = dt.GetColumn("first"); Assert.Equal("first", c.Name); Assert.Equal(new string[] { "Bob", "Fred" }, c.Values); }
public void Columns() { MutableDataTable dt = GetTable(); Assert.Equal(new string[] { "name", "age", "favorite fruit" }, dt.ColumnNames); Assert.Null(dt.GetColumn("missing")); // missing columns return null }
public void MutateRow() { MutableDataTable dt = GetTable(); Row row = dt.GetRow(0); row["first"] = "Ed"; // modify by row Assert.Equal("Ed", row["first"]); Assert.Equal("Ed", row.Values[0]); Assert.Equal(dt.GetColumn("first").Values[0], "Ed"); // Verify lookup by column }
// $$$ Clarify - multiple joins (inner, outer, etc) /// <summary> /// Performs a full outer join on two in-memory tables and returns a new table. /// The number of rows in the resulting table is the sum of rows from each source table. /// The number of columns in teh new table is the sum of columns in the the source tables minus 1 /// (since the join column is redundant) /// </summary> /// <param name="d1"></param> /// <param name="d2"></param> /// <param name="columnName">column name to join on. Both tables must have this column name.</param> /// <returns>a new table</returns> public static MutableDataTable Join(MutableDataTable d1, MutableDataTable d2, string columnName) { Column c1 = d1.GetColumn(columnName); if (c1 == null) { throw new InvalidOperationException("Missing column"); } Column c2 = d2.GetColumn(columnName); if (c2 == null) { throw new InvalidOperationException("Missing column"); } // Place d1 in first set of columns, and d2 in second set. int kColumn = d1.Columns.Length; int kTotalColumns = kColumn + d2.Columns.Length; // Indices into new table where join columns are. int joinColumn1 = Utility.GetColumnIndexFromName(d1.ColumnNames, columnName); int joinColumn2 = Utility.GetColumnIndexFromName(d2.ColumnNames, columnName) + kColumn; // $$$ could really optimize. Sort both on column and then zip. Dictionary<string, int> m1 = GetRowIndex(c1); Dictionary<string, int> m2 = GetRowIndex(c2); // $$$ column names may not be unique. //string[] headers = d1.ColumnNames.Union(d2.ColumnNames).ToArray(); string[] headers = new string[kTotalColumns]; Array.Copy(d1.ColumnNames.ToArray(), 0, headers, 0, kColumn); Array.Copy(d2.ColumnNames.ToArray(), 0, headers, kColumn, kTotalColumns - kColumn); string[] values = new string[headers.Length]; string path = GetTempFileName(); using (CsvWriter tw = new CsvWriter(path, headers)) { foreach (var kv in m1) { Clear(values); string key = kv.Key; // join column int r1 = kv.Value; int r2; if (m2.TryGetValue(key, out r2)) { // In both. write out CopyRowIntoArray(values, kColumn, d2, r2); m2.Remove(key); } else { // Only in M1. } CopyRowIntoArray(values, 0, d1, r1); values[joinColumn1] = values[joinColumn2] = key; tw.WriteRow(values); } // We remove all of M1's items from m2, so M2 is just unique items now. (possibly 0). // Tag those onto the end. foreach (var kv in m2) { int r2 = kv.Value; Clear(values); CopyRowIntoArray(values, kColumn, d2, r2); values[joinColumn1] = values[joinColumn2] = kv.Key; tw.WriteRow(values); } } // close tw MutableDataTable t = Reader.ReadCSV(path); DeleteLocalFile(path); // Remove duplicate columns. t.DeleteColumn(joinColumn2); return t; }
public List <string> RetrieveDataFromHeader(string header) { string[] columnData = csv.GetColumn(header).Values; return(columnData.Distinct().ToList()); }