// Antijoin via hash index and project onto given header DataTableLocal GeneralisedAntijoin(DataTableLocal other, DataHeading newheading, DataHeading joinhdng) { Logger.WriteLine(4, "GenAntijoin L={0} R={1} new={2} j={3}", this.Heading, other.Heading, newheading, joinhdng); // Build a dictionary on other var odict = new Dictionary <DataRow, int>(); BuildIndex(other, joinhdng, odict); // Build each new row based on join heading and if it's not in the odict // add row based on newheading to the new table var cmpndx = joinhdng.MakeIndex(Heading); var movndx = newheading.MakeIndex(Heading); var newtable = DataTableLocal.Create(newheading); foreach (var row in this.GetRows()) //TODO:Enumerable { var newrow = row.Project(joinhdng, cmpndx); if (!odict.ContainsKey(newrow)) { newtable.AddRow(row.Project(newheading, movndx)); } } Logger.WriteLine(4, "[Antijoin={0}]", newtable); return(newtable); }
public override DataTable Read(string file, DataHeading heading) { var path = GetPath(file); if (!File.Exists(path)) { return(null); } var table = DataTableLocal.Create(heading); using (var rdr = new TextFieldParser(path) { TextFieldType = FieldType.Delimited, Delimiters = new string[] { "," }, }) { for (var id = 0; !rdr.EndOfData; ++id) { var row = rdr.ReadFields(); if (id > 0) { if (_hasid) { row = (new string[] { id.ToString() }) .Concat(row).ToArray(); } try { table.AddRow(row); } catch (Exception ex) { throw ProgramError.Fatal("Source Csv", "Error in row {0} of {1}: {2}", id, path, ex.Message); } } } } return(table); }
// Create a Table by converting a value // Each row has its own heading, which must match. public RelationValue TableC(HeadingValue hdgarg, params TypedValue[] valueargs) { Logger.Assert(valueargs.Length == 1, "TableC"); var heading = hdgarg.AsHeading(); var value = valueargs[0]; DataTable newtable = null; if (value.DataType is DataTypeTuple) { newtable = DataTableLocal.Create(heading, new DataRow[] { value.AsRow() }); } else if (value.DataType is DataTypeUser) { var user = value as UserValue; newtable = DataTableLocal.Create(heading, new DataRow[] { DataRow.Create(heading, user.Value) }); } else if (value.DataType is DataTypeRelation) { newtable = value.AsTable(); } Logger.Assert(newtable != null, "TableC"); Logger.WriteLine(3, "[Table={0}]", newtable); return(RelationValue.Create(newtable)); }
// Create new table as a copy (the other might be a different kind) public new static DataTableLocal Create(DataHeading heading, IEnumerable <DataRow> rows) { DataTableLocal newtable = DataTableLocal.Create(heading); foreach (var row in rows) { newtable.AddRow(row); } return(newtable); }
public override DataTable Read(string file, DataHeading heading) { var newtable = DataTableLocal.Create(heading); Console.WriteLine(file); var line = Console.ReadLine(); newtable.AddRow(DataRow.Create(heading, line)); return(newtable); }
public void SetListEnd() { Logger.Assert(_valueholder.DataType is DataTypeRelation); var datatype = _valueholder.DataType; var rows = _valueholder._list.Select(t => DataRow.Create(_valueholder.DataType.Heading, t)); var table = DataTableLocal.Create(_valueholder.DataType.Heading, rows); _valueholder._values[_valueholder._colidx] = RelationValue.Create(table); _valueholder._list = null; }
// Create new table and add tuples to it public new static DataTableLocal Create(DataHeading heading, IEnumerable <ExpressionEval> texprs) { DataTableLocal newtable = DataTableLocal.Create(heading); foreach (var expr in texprs) { newtable.AddRow(expr.Evaluate().AsRow()); } return(newtable); }
// Generalised Set via naive cross product // Handles all cases, projecting onto common heading, not necessarily optimal DataTableLocal GeneralisedSet(DataTableLocal other, DataHeading newheading, JoinOps joinops) { Logger.WriteLine(4, "GenSet L={0} R={1} new={2} j={3}", this.Heading, other.Heading, newheading, joinops); var ldict = new Dictionary <DataRow, int>(); var rdict = new Dictionary <DataRow, int>(); switch (joinops) { case JoinOps.MINUS: case JoinOps.INTERSECT: BuildIndex(other, newheading, rdict); break; case JoinOps.SYMDIFF: BuildIndex(this, newheading, ldict); BuildIndex(other, newheading, rdict); break; } var newtable = DataTableLocal.Create(newheading); if (joinops == JoinOps.UNION || rdict.Count > 0) { var lmovndx = newheading.MakeIndex(Heading); foreach (var row in this.GetRows()) //TODO:Enumerable { var newrow = row.Project(newheading, lmovndx); var ok = (joinops == JoinOps.MINUS || joinops == JoinOps.SYMDIFF) ? !rdict.ContainsKey(newrow) : (joinops == JoinOps.INTERSECT) ? rdict.ContainsKey(newrow) : true; if (ok) { newtable.AddRow(newrow); } } } if (joinops == JoinOps.UNION || ldict.Count > 0) { var rmovndx = newheading.MakeIndex(other.Heading); foreach (var row in other.GetRows()) //TODO:Enumerable { var newrow = row.Project(newheading, rmovndx); var ok = (joinops == JoinOps.SYMDIFF) ? !ldict.ContainsKey(newrow) : true; if (ok) { newtable.AddRow(newrow); } } } Logger.WriteLine(4, "[GenSet={0}]", newtable); return(newtable); }
// relation representing heading public RelationValue Schema(RelationValue relarg) { var heading = DataHeading.Create("Name:text", "Type:text"); var table = DataTableLocal.Create(heading); foreach (var col in relarg.Value.Heading.Columns) { table.AddRow(DataRow.Create(heading, col.Name, col.DataType.Name)); } return(RelationValue.Create(table)); }
// note that the heading implies the order of values -- which is critical! // read a table -- heading already known DataTable ReadTable(DataHeading heading) { //Logger.Assert(_reader.ReadInt32() == Persist.RelationSignature); var table = DataTableLocal.Create(heading); var cardinality = _reader.ReadInt32(); while (cardinality-- > 0) { table.AddRow(ReadRow(heading)); } return(table); }
// Project onto named columns public override DataTable Project(ExpressionEval[] exprs) { var newheading = DataHeading.Create(exprs); var newtable = DataTableLocal.Create(newheading); var newexprs = newtable.Heading.Reorder(exprs); foreach (var row in GetRows()) //TODO:Enumerable { newtable.AddRow(row.Transform(newheading, newexprs)); } Logger.WriteLine(4, "[Project={0}]", newtable); return(newtable); }
// Transform -- new table containing new columns generated by expressions public override DataTable Transform(DataHeading newheading, ExpressionEval[] exprs) { Logger.WriteLine(4, "Transform {0} exprs={1}", newheading, exprs.Count()); Logger.Assert(exprs.Count() == newheading.Degree, "degree"); var newtable = DataTableLocal.Create(newheading); var newexprs = newtable.Heading.Reorder(exprs); foreach (var row in GetRows()) //TODO:Enumerable { newtable.AddRow(row.Transform(newheading, newexprs)); } Logger.WriteLine(4, "[{0}]", newtable); return(newtable); }
// sequence of integers public RelationValue Sequence(NumberValue countarg) { var heading = DataHeading.Create("N:number"); var table = DataTableLocal.Create(heading); var n = Decimal.Zero; var count = (int)countarg.Value; for (var i = 0; i < count; ++i) { table.AddRow(DataRow.Create(heading, new TypedValue[] { NumberValue.Create(n) })); n += 1; } return(RelationValue.Create(table)); }
// Take -- take some rows, discard the rest public override DataTable Take(NumberValue value) { var newtable = DataTableLocal.Create(Heading); var count = value.Value; foreach (var row in GetRows()) //TODO:Enumerable { if (count-- > 0) { newtable.AddRow(row); } } return(newtable); }
// Restrict -- new table containing rows that pass the test public override DataTable Restrict(ExpressionEval expr) { var newtable = DataTableLocal.Create(Heading); foreach (var row in GetRows()) //TODO:Enumerable { if (expr.EvalPred(row).Value) { newtable.AddRow(row); } } Logger.WriteLine(4, "Restrict {0}", newtable); return(newtable); }
// Transform with ordered calculations - different algorithm // 1. Build index // 2. Read input file using index // 3. Transform and write output file public override DataTable TransformOrdered(DataHeading newheading, ExpressionEval[] exprs, ExpressionEval[] orderexps) { Logger.WriteLine(4, "TransformOrdered {0} exprs={1},{2}", newheading, exprs.Count(), orderexps.Count()); var numacc = exprs.Where(e => e.HasFold).Sum(e => e.AccumCount); var newtable = DataTableLocal.Create(newheading); var newexprs = newtable.Heading.Reorder(exprs); var ordidx = OrderedIndex.Create(orderexps, Heading); // list of indexes of not-folded columns var notfold = exprs.Where(e => !e.HasFold) .Select(e => newheading.FindIndex(e.Name)).ToArray(); // Build index for (var ord = 0; ord < Cardinality; ++ord) //TODO:Enumerable { ordidx.Add(GetRow(ord), ord); } AccumulatorBlock accblk = null; // Read in index order, with access to ordering info DataRow lastrow = null; foreach (var ord in ordidx.RowOrdinals) { var oldrow = _rows[ord]; oldrow.OrderedIndex = ordidx; // so row functions can access it // if there is a group break, reset the accumulators if (ordidx.IsBreak) { accblk = AccumulatorBlock.Create(numacc); } DataRow newrow = oldrow.TransformAggregate(newheading, accblk, newexprs); // save the current row, output it on group break or when any non-fold column has changed // any rows not output will have identical non-fold cols so only running sums are lost var nfchg = (lastrow != null && !notfold.All(x => newrow.Values[x].Equals(lastrow.Values[x]))); if (nfchg || (lastrow != null && ordidx.IsBreak)) { newtable.AddRaw(lastrow); } lastrow = newrow; // guaranteed to be different! } if (lastrow != null) { newtable.AddRaw(lastrow); } Logger.WriteLine(4, "[{0}]", newtable); return(newtable); }
// add rows in this if there is a match in the other on join columns private DataTableLocal Semijoin(DataTableLocal other) { var cmpndx = other.Heading.MakeIndex(Heading); var newtable = DataTableLocal.Create(Heading); foreach (var row in GetRows()) //TODO:Enumerable { if (other.HasMatch(row, cmpndx)) { newtable.AddRow(row); } } Logger.WriteLine(4, "[Matching={0}]", newtable); return(newtable); }
// Rename some columns, data unchanged // It can be possible to copy and graft on new heading, but for now just don't public override DataTable Rename(ExpressionEval[] exprs) { Logger.Assert(exprs.Length == Degree, "reorder mismatch"); // note: this is an explicit heading. Order matters. var newheading = Heading.Rename(exprs); var newtable = DataTableLocal.Create(newheading); var newexprs = newtable.Heading.Reorder(exprs); foreach (var row in GetRows()) { newtable.AddRow(row.Transform(newheading, newexprs)); } //newtable.AddRow(DataRow.Create(heading, row.Values)); Logger.WriteLine(4, "[Rename={0}]", newtable); return(newtable); }
// add rows in this if there is a match in the other on join columns private DataTableLocal Divide(DataTableLocal other, DataHeading newheading) { var cmpndx = other.Heading.MakeIndex(Heading); var movendx = newheading.MakeIndex(Heading); var newtable = DataTableLocal.Create(newheading); foreach (var row in GetRows()) //TODO:Enumerable { if (other.HasMatch(row, cmpndx)) { newtable.AddRow(row.Project(newheading, movendx)); } } Logger.WriteLine(4, "[Matching={0}]", newtable); return(newtable); }
// Make connection to database based on available flags and current status // does not return on error void ConnectDatabase() { if (_status > CatalogStatus.Started) { return; // just the once } Logger.Assert(_status == CatalogStatus.Started, _status); Logger.WriteLine(2, $"Catalog Connect database {this}"); // create empty catalog var table = DataTableLocal.Create(_catalogtableheading); GlobalVars.AddEntry(CatalogTableName, table.DataType, EntryKinds.Value, EntryFlags.Public | EntryFlags.System, RelationValue.Create(table)); GlobalVars.FindEntry(CatalogTableName).Flags |= EntryFlags.Database; // Sql or not? Open it. var ext = Path.GetExtension(DatabasePath); if (ext == "") { DatabasePath = Path.ChangeExtension(DatabasePath, (SqlFlag) ? DefaultSqlDatabaseExtension : DefaultDatabaseExtension); } SqlFlag |= (ext == DefaultSqlDatabaseExtension || DatabaseKind != DatabaseKinds.Memory); DatabaseName = Path.GetFileNameWithoutExtension(DatabasePath); if (SqlFlag) { if (DatabaseKind == DatabaseKinds.Memory) { DatabaseKind = DatabaseKinds.Sqlite; } Logger.WriteLine(3, "Catalog database={0} kind={1}", DatabasePath, DatabaseKind); if (!SqlTarget.Open(DatabasePath, DatabaseKind)) { throw ProgramError.Fatal("Catalog", "Cannot open database: {0} ({1})", DatabasePath, DatabaseKind); } } else { if (LoadFlag && !Directory.Exists(DatabasePath)) { throw ProgramError.Fatal("Catalog", "Database does not exist: {0}", DatabasePath); } } _status = CatalogStatus.Connected; Logger.WriteLine(3, "[CC {0}]", this); }
DataTableLocal Read(DataHeading heading) { var schema = GetSchema(); var scols = schema.Columns; var rows = schema.Rows; var newtab = DataTableLocal.Create(heading); foreach (System.Data.DataRow row in rows) { var values = Enumerable.Range(0, scols.Count) .Select(x => TypedValue.Convert(heading.Columns[x].DataType, row.IsNull(x) ? null : row[x])) .ToArray(); var newrow = DataRow.Create(heading, values); newtab.AddRow(newrow); } return(newtab); }
// Rows from both tables projected on common heading private DataTableLocal Union(DataTableLocal other, DataHeading newheading) { var rmovendx = newheading.MakeIndex(Heading); var lmovendx = newheading.MakeIndex(other.Heading); var newtable = DataTableLocal.Create(newheading); foreach (var row in this.GetRows()) //TODO:Enumerable { newtable.AddRow(row.Project(newheading, lmovendx)); } foreach (var row in other.GetRows()) //TODO:Enumerable { newtable.AddRow(row.Project(this.Heading, rmovendx)); } Logger.WriteLine(4, "[Union={0}]", newtable); return(newtable); }
// persist a catalog entry public byte[] ToBinary() { using (var writer = PersistWriter.Create()) { writer.Write(Name); writer.Write((byte)Kind); writer.Write((byte)Flags); writer.Write(DataType); if (IsDatabase) { writer.WriteValue(RelationValue.Create(DataTableLocal.Create(Value.Heading))); } else if (Kind != EntryKinds.Type) { writer.WriteValue(Value); } return(writer.ToArray()); } }
public override DataTable Read(string table, DataHeading heading) { Logger.WriteLine(2, "Sql Read '{0}'", table); if (table == "*") { return(Read(heading)); } var tabnew = DataTableLocal.Create(heading); for (var reader = Open(table); reader.Read();) { var values = heading.Columns.Select(c => MakeValue(reader, c.Name, c.DataType)).ToArray(); var row = DataRow.Create(heading, values); tabnew.AddRow(row); } Close(); return(tabnew); }
public override DataTable Read(string file, DataHeading heading) { var path = GetPath(file); if (!File.Exists(path)) { return(null); } var newtable = DataTableLocal.Create(heading); using (var rdr = File.OpenText(path)) { for (var line = rdr.ReadLine(); line != null; line = rdr.ReadLine()) { newtable.AddRow(DataRow.Create(heading, line)); } } return(newtable); }
// set difference private DataTableLocal Minus(DataTableLocal other) { if (!this.Heading.Equals(other.Heading)) { throw new EvaluatorException("tables have different headings"); } var newtable = DataTableLocal.Create(this.Heading); foreach (var row in this.GetRows()) //TODO:Enumerable { if (!other.Contains(row)) { newtable.AddRow(row); } } Logger.WriteLine(4, "[Minus={0}]", newtable); return(newtable); }
// Simpler algorithm when both have same heading private DataTableLocal Union(DataTableLocal other) { if (!this.Heading.Equals(other.Heading)) { throw new EvaluatorException("tables have different headings"); } // for each column in table 1 find its index in table 2 var newtable = DataTableLocal.Create(this.Heading); foreach (var row in this.GetRows()) //TODO:Enumerable { newtable.AddRow(row); } foreach (var row in other.GetRows()) //TODO:Enumerable { newtable.AddRow(row); } Logger.WriteLine(4, "[Union={0}]", newtable); return(newtable); }
///================================================================= /// /// implementations -- common code /// // Join via naive cross product and project onto given header // Handles anything, but not necessarily optimal // Needs to generate extra rows for multiple matches DataTableLocal GeneralisedJoin(DataTableLocal other, DataHeading newheading, DataHeading joinhdr) { Logger.WriteLine(4, "GenJoin L={0} R={1} new={2} j={3}", this.Heading, other.Heading, newheading, joinhdr); var cmpindex = Heading.MakeIndex(other.Heading); var thisindex = newheading.MakeIndex(Heading); var otherindex = newheading.MakeIndex(other.Heading); var newtable = DataTableLocal.Create(newheading); foreach (var row1 in this.GetRows()) //TODO:Enumerable { foreach (var row2 in other.GetRows()) //TODO:Enumerable { if (Matches(row1, row2, cmpindex)) { var newrow = DataRow.Create(newheading, row1.MergeValues(thisindex, row2, otherindex)); newtable.AddRow(newrow); } } } Logger.WriteLine(4, "[Join={0}]", newtable); return(newtable); }
// Recursive expansion // Creates new empty table, add seed, join op (only union for now) and expression public override DataTable Recurse(int flags, ExpressionEval expr) { Logger.WriteLine(4, "Recurse {0} {1}", flags, expr); Logger.Assert(expr.ReturnType == DataType); var newtable = DataTableLocal.Create(Heading); foreach (var row in _rows) { newtable.AddRaw(row); } // by ordinal, to main position and notice new rows for (var ord = 0; ord < newtable._rows.Count; ++ord) { var newrows = expr.EvalOpen(newtable._rows[ord]).AsTable(); foreach (var row in newrows.GetRows()) { newtable.AddRow(row); } } return(newtable); }
// Transform with Aggregation // Maintain index on output public override DataTable TransformAggregate(DataHeading newheading, ExpressionEval[] exprs) { Logger.WriteLine(4, "TransformAggregate {0} exprs={1}", newheading, exprs.Length); var numacc = exprs.Where(e => e.HasFold).Sum(e => e.AccumCount); var newtable = DataTableLocal.Create(newheading); // create a dictionary for output records var dict = new Dictionary <DataRow, int>(); var accblks = new List <AccumulatorBlock>(); var newexprs = newtable.Heading.Reorder(exprs); foreach (var oldrow in this.GetRows()) //TODO:Enumerable { var temprow = oldrow.Transform(newheading, newexprs); if (!dict.ContainsKey(temprow)) { // First time this new row seen, add to output and index it var accblk = AccumulatorBlock.Create(numacc); var newrow = oldrow.TransformAggregate(newheading, accblk, newexprs); newtable.AddRaw(newrow); Logger.Assert(newtable._dict[newtable._rows[newtable.Cardinality - 1]] == newtable.Cardinality - 1); dict.Add(temprow, newtable.Cardinality - 1); accblks.Add(accblk); } else { // Subsequent time row seen, update output by index // TODO: only need to update each row once at end from accumulators var ord = dict[temprow]; var newrow = newtable._rows[ord]; var accblk = accblks[ord]; newtable.Replace(newrow, oldrow.TransformAggregate(newheading, accblk, newexprs)); } } Logger.WriteLine(4, "[{0}]", newtable); return(newtable); }