// Build index on key // Note: keys overwrite so only last one left // TODO: index with duplicates for join void BuildIndex(DataTableLocal table, DataHeading keyhdg, Dictionary <DataRow, int> dict) { var ndx = keyhdg.MakeIndex(table.Heading); foreach (var row in table.GetRows()) { var values = Enumerable.Range(0, keyhdg.Degree).Select(x => row.Values[ndx[x]]).ToArray(); dict[DataRow.Create(keyhdg, values)] = row.Order; } }
// Generalised Set via naive cross product // Handles all cases, projecting onto common heading, not necessarily optimal DataTableLocal GeneralisedSet(DataTableLocal other, DataHeading newheading, JoinOps joinops) { Logger.WriteLine(4, "GenSet L={0} R={1} new={2} j={3}", this.Heading, other.Heading, newheading, joinops); var ldict = new Dictionary <DataRow, int>(); var rdict = new Dictionary <DataRow, int>(); switch (joinops) { case JoinOps.MINUS: case JoinOps.INTERSECT: BuildIndex(other, newheading, rdict); break; case JoinOps.SYMDIFF: BuildIndex(this, newheading, ldict); BuildIndex(other, newheading, rdict); break; } var newtable = DataTableLocal.Create(newheading); if (joinops == JoinOps.UNION || rdict.Count > 0) { var lmovndx = newheading.MakeIndex(Heading); foreach (var row in this.GetRows()) //TODO:Enumerable { var newrow = row.Project(newheading, lmovndx); var ok = (joinops == JoinOps.MINUS || joinops == JoinOps.SYMDIFF) ? !rdict.ContainsKey(newrow) : (joinops == JoinOps.INTERSECT) ? rdict.ContainsKey(newrow) : true; if (ok) { newtable.AddRow(newrow); } } } if (joinops == JoinOps.UNION || ldict.Count > 0) { var rmovndx = newheading.MakeIndex(other.Heading); foreach (var row in other.GetRows()) //TODO:Enumerable { var newrow = row.Project(newheading, rmovndx); var ok = (joinops == JoinOps.SYMDIFF) ? !ldict.ContainsKey(newrow) : true; if (ok) { newtable.AddRow(newrow); } } } Logger.WriteLine(4, "[GenSet={0}]", newtable); return(newtable); }
// Rows from both tables projected on common heading private DataTableLocal Union(DataTableLocal other, DataHeading newheading) { var rmovendx = newheading.MakeIndex(Heading); var lmovendx = newheading.MakeIndex(other.Heading); var newtable = DataTableLocal.Create(newheading); foreach (var row in this.GetRows()) //TODO:Enumerable { newtable.AddRow(row.Project(newheading, lmovendx)); } foreach (var row in other.GetRows()) //TODO:Enumerable { newtable.AddRow(row.Project(this.Heading, rmovendx)); } Logger.WriteLine(4, "[Union={0}]", newtable); return(newtable); }
// Simpler algorithm when both have same heading private DataTableLocal Union(DataTableLocal other) { if (!this.Heading.Equals(other.Heading)) { throw new EvaluatorException("tables have different headings"); } // for each column in table 1 find its index in table 2 var newtable = DataTableLocal.Create(this.Heading); foreach (var row in this.GetRows()) //TODO:Enumerable { newtable.AddRow(row); } foreach (var row in other.GetRows()) //TODO:Enumerable { newtable.AddRow(row); } Logger.WriteLine(4, "[Union={0}]", newtable); return(newtable); }
///================================================================= /// /// implementations -- common code /// // Join via naive cross product and project onto given header // Handles anything, but not necessarily optimal // Needs to generate extra rows for multiple matches DataTableLocal GeneralisedJoin(DataTableLocal other, DataHeading newheading, DataHeading joinhdr) { Logger.WriteLine(4, "GenJoin L={0} R={1} new={2} j={3}", this.Heading, other.Heading, newheading, joinhdr); var cmpindex = Heading.MakeIndex(other.Heading); var thisindex = newheading.MakeIndex(Heading); var otherindex = newheading.MakeIndex(other.Heading); var newtable = DataTableLocal.Create(newheading); foreach (var row1 in this.GetRows()) //TODO:Enumerable { foreach (var row2 in other.GetRows()) //TODO:Enumerable { if (Matches(row1, row2, cmpindex)) { var newrow = DataRow.Create(newheading, row1.MergeValues(thisindex, row2, otherindex)); newtable.AddRow(newrow); } } } Logger.WriteLine(4, "[Join={0}]", newtable); return(newtable); }