public TNodeAppendTo(TNode Parent, RecordWriter Writer, FNodeSet Output) : base(Parent) { // Check that the column count is the same; we dont care about the schema // if (Writer.SourceSchema.Count != Output.Count) throw new Exception("Attempting to write a different number of recors to a stream"); this._writer = Writer; this._output = Output; }
public AggregatePlan(RecordWriter Output, DataSet Source, Predicate Filter, FNodeSet Keys, AggregateSet Aggregates, FNodeSet ReturnSet, StaticRegister BaseMem, StaticRegister ReturnMem, string TempDir) : base() { this._writer = Output; this._source = Source; this._filter = Filter; this._keys = Keys ?? new FNodeSet(); this._aggregates = Aggregates; this._returnset = ReturnSet; this._basememory = BaseMem; this._returnmemory = ReturnMem; this._sink = TempDir ?? Source.Directory; this.Name = "AGGREGATE"; }
public static void BufferText(string FullPath, RecordWriter W, int Skip, char[] Delim, char Escape) { // Read text // using (StreamReader sr = new StreamReader(FullPath)) { // Handle headers // int ticks = 0; while (ticks < Skip) { ticks++; sr.ReadLine(); } // Loop // while (sr.EndOfStream == false) { W.Insert(Splitter.ToRecord(sr.ReadLine(), W.SourceSchema, Delim, Escape)); } } }
public void WriteToFinal(RecordWriter Writter, FNodeSet Fields) { if (Writter.SourceSchema != Fields.Columns) throw new Exception("Base stream and output schema are different"); // Create a static register // StaticRegister reg = new StaticRegister(null); // Assign the register to the leaf node set // Fields.AssignRegister(reg); // Load // foreach (KeyValuePair<Record, CompoundRecord> t in this._cache) { // Assign the value to the register // reg.Assign(Record.Join(t.Key, this._Reducers.Evaluate(t.Value))); // Evaluate the record // Record r = Fields.Evaluate(); // Write // Writter.Insert(r); } }
public override void Extend(RecordWriter Output, DataSet Data, FNodeSet ClusterVariables, FNodeSet OtherKeepers, Predicate Where) { // Check that the ClusterVariable count matches the internal node set count // if (ClusterVariables.Count != this._fields.Count) throw new ArgumentException("The cluster variable count passed does not match the internal cluster variable count"); // Create the selectors // FNodeSet values = OtherKeepers.CloneOfMe(); FNode n = new FNodeResult(null, new RowClusterCellFunction(this._rule, this._means)); foreach (FNode t in ClusterVariables.Nodes) { n.AddChildNode(t.CloneOfMe()); } values.Add("CLUSTER_ID", n); // Run a fast select // FastReadPlan plan = new FastReadPlan(Data, Where, values, Output); }
public static long Render(AggregateStructure Data, RecordWriter Output, FNodeSet Fields) { if (Data._Headers.Count == 0) { Data._Cache.WriteToFinal(Output, Fields); return (long)Data._Cache.Count; } long writes = 0; foreach (Header h in Data._Headers) { KeyValueSet kvs = KeyValueSet.Open(h, Data._keys, Data._aggregates); writes += (long)kvs.Count; kvs.WriteToFinal(Output, Fields); } return writes; }
public long Render(RecordWriter Output) { // If the header cache is empty, then just return the rendered group by set // if (this._Headers.Count == 0) { this._Cache.WriteToFinal(Output); return (long)this._Cache.Count; } // Dump the current set // this._Headers.Add(KeyValueSet.Save(this._TempDir, this._Cache)); // Create a table // Table t = new Table(this._TempDir, TableHeader.TempName(), this._Cache.OutputSchema); // Otherwise, we need to union all the headers // long Counter = 0; for (int i = 0; i < this._Headers.Count - 1; i++) { KeyValueSet gbs1 = KeyValueSet.Open(this._Headers[i], this._Cache.BaseMappers, this._Cache.BaseReducers); for (int j = i + 1; j < this._Headers.Count; j++) { KeyValueSet gbs2 = KeyValueSet.Open(this._Headers[j], this._Cache.BaseMappers, this._Cache.BaseReducers); KeyValueSet.Union(gbs1, gbs2); KeyValueSet.Save(this._Headers[j], gbs2); } // Union in the set // gbs1.WriteToFinal(Output); Counter += (long)gbs1.Count; } // Drop all headers // DataSetManager.DropRecordSet(this._Headers); return Counter; }
// Nest Loop Generic Joins // private static void NestedLoopInnerJoin(RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, StaticRegister Memory1, StaticRegister Memory2) { // Cursors // RecordReader Reader1 = Data1.OpenReader(); // Table One Loop // while (!Reader1.EndOfData) { Memory1.Assign(Reader1.ReadNext()); // Table Two Loop // RecordReader Reader2 = Data2.OpenReader(); while (!Reader2.EndOfData) { Memory2.Assign(Reader2.ReadNext()); if (Where.Render()) Output.Insert(Fields.Evaluate()); } } }
// Main Join Functions // /// <summary> /// Allows the user to perform a join based on the equality predicate AND each predicate link via 'AND' /// </summary> /// <param name="Output"></param> /// <param name="JM"></param> /// <param name="JA"></param> /// <param name="T1"></param> /// <param name="J1"></param> /// <param name="T2"></param> /// <param name="J2"></param> /// <param name="CM"></param> public static void Join(MergeMethod JM, MergeAlgorithm JA, RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, Key Equality1, Key Equality2, StaticRegister Memory1, StaticRegister Memory2) { // Do some checks first // if (Where == null) Where = Predicate.TrueForAll; if (Equality1.Count != Equality2.Count) throw new Exception("Both join keys must have the same length"); if (Equality1.Count == 0) JA = MergeAlgorithm.NestedLoop; // Nested loop; if the algorithm is nested loop and we have keys, we need to build a new where clause that has the equality predicates // FNodeResult nl_node = new FNodeResult(null, new AndMany()); nl_node.AddChildNode(Where.Node.CloneOfMe()); for (int i = 0; i < Equality1.Count; i++) { FNodeFieldRef left = new FNodeFieldRef(null, Equality1[i], Data1.Columns.ColumnAffinity(Equality1[i]), Data1.Columns.ColumnSize(Equality1[i]), Memory1); FNodeFieldRef right = new FNodeFieldRef(null, Equality2[i], Data2.Columns.ColumnAffinity(Equality2[i]), Data2.Columns.ColumnSize(Equality2[i]), Memory2); FNodeResult eq = new FNodeResult(null, new CellBoolEQ()); eq.AddChildren(left, right); nl_node.AddChildNode(eq); } Predicate nl_where = (Equality1.Count == 0 ? Where : new Predicate(nl_node)); // Switch // switch (JA) { case MergeAlgorithm.SortMerge: SortMerge(JM, Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2); break; case MergeAlgorithm.NestedLoop: NestedLoop(JM, Output, Fields, nl_where, Data1, Data2, Memory1, Memory2); break; default: HashTable(JM, Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2); break; } }
public static void HashTable(MergeMethod JM, RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, Key Equality1, Key Equality2, StaticRegister Memory1, StaticRegister Memory2) { // Build temp hash tables // DataSet h1 = IndexBuilder.Build(Data1, Equality1, Data1.Directory); DataSet h2 = IndexBuilder.Build(Data2, Equality2, Data2.Directory); // Combine has tables // DataSet hash = BuildJoinHelper(h1, h2, JM); // Exit if the hash table has no records // if (hash.IsEmpty) { DataSetManager.DropData(h1); DataSetManager.DropData(h2); DataSetManager.DropData(hash); return; } // Sort the table by the first and second set ids, keys 0 and 2 // hash.Sort(new Key(0, 2)); // Open a reader // RecordReader ac = hash.OpenReader(); // Define logic // int sid1 = (int)ac.Read()[0].INT; int sid2 = (int)ac.Read()[2].INT; int rid1 = 0; int rid2 = 0; bool isnull1 = false; bool isnull2 = false; // Create the temp variables // RecordSet ts1 = Data1.PopAt(sid1); RecordSet ts2 = Data2.PopAt(sid2); // Main loop // while (!ac.EndOfData) { // Read the record id // Record dr = ac.ReadNext(); sid1 = (int)dr[0].INT; rid1 = (int)dr[1].INT; sid2 = (int)dr[2].INT; rid2 = (int)dr[3].INT; isnull1 = dr[0].IsNull; isnull2 = dr[2].IsNull; // Check if we need to re-buffer a shard // if (ts1.ID != sid1 && !isnull1) ts1 = Data1.PopAt(sid1); if (ts2.ID != sid2 && !isnull2) ts2 = Data2.PopAt(sid2); // Create the output record - table one // if (!isnull1) Memory1.Assign(ts1[rid1]); else Memory1.Assign(ts1.Columns.NullRecord); // Create the output record - table two // if (!isnull2) Memory2.Assign(ts2[rid2]); else Memory2.Assign(ts2.Columns.NullRecord); // Write the output record // Record t = Fields.Evaluate(); if (Where.Render()) Output.Insert(t); } // Drop tables // DataSetManager.DropData(h1); DataSetManager.DropData(h2); DataSetManager.DropData(hash); }
public static void SortMerge(MergeMethod JM, RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, Key Equality1, Key Equality2, StaticRegister Memory1, StaticRegister Memory2) { switch (JM) { case MergeMethod.Cross: CrossJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2); break; case MergeMethod.Inner: SortMergeInnerJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2); break; case MergeMethod.Left: SortMergeLeftJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, false); break; case MergeMethod.Right: SortMergeRightJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, false); break; case MergeMethod.Full: SortMergeLeftJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, false); SortMergeRightJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, true); break; case MergeMethod.AntiLeft: SortMergeLeftJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, true); break; case MergeMethod.AntiRight: SortMergeRightJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, true); break; case MergeMethod.AntiInner: SortMergeLeftJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, true); SortMergeRightJoin(Output, Fields, Where, Data1, Data2, Equality1, Equality2, Memory1, Memory2, true); break; } }
private static void SortMergeRightJoin(RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, Key Equality1, Key Equality2, StaticRegister Memory1, StaticRegister Memory2, bool AntiJoin) { // Check sort // CheckSort(Data1, Equality1, Data2, Equality2); // function variables // int c = 0; RecordReader c1 = Data1.OpenReader(); RecordReader c2 = Data2.OpenReader(); // main loop // while (!c1.EndOfData && !c2.EndOfData) { // get the compare // Record r1 = c1.Read(); Record r2 = c2.Read(); c = Record.Compare(r1, Equality1, r2, Equality2); Memory1.Assign(r1); Memory2.Assign(r2); // RS1 < RS2 // if (c < 0) { c1.Advance(); } // RS1 > RS2 // else if (c > 0) { if (Where.Render()) { Memory1.Assign(Data1.Columns.NullRecord); Output.Insert(Fields.Evaluate()); } c2.Advance(); } // RS1 == RS2 and AntiJoin // else if (AntiJoin) { c2.Advance(); } // RS1 == RS2 // else { int k = 0; while (c == 0) { // Add the record // Output.Insert(Fields.Evaluate()); // Advance p2 // k++; c1.Advance(); if (c1.EndOfData) break; r1 = c1.Read(); Memory1.Assign(r1); // Break if the new c != 0 // c = Record.Compare(r1, Equality1, r2, Equality2); if (c != 0) break; } c1.Revert(k); c2.Advance(); } } Memory1.Assign(Data1.Columns.NullRecord); while (!c2.EndOfData) { Memory2.Assign(c2.ReadNext()); if (Where.Render()) Output.Insert(Fields.Evaluate()); } }
public static void NestedLoop(MergeMethod JM, RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, StaticRegister Memory1, StaticRegister Memory2) { switch (JM) { case MergeMethod.Cross: CrossJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2); break; case MergeMethod.Inner: NestedLoopInnerJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2); break; case MergeMethod.Left: NestedLoopLeftJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, false); break; case MergeMethod.Right: NestedLoopRightJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, false); break; case MergeMethod.Full: NestedLoopLeftJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, false); NestedLoopRightJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, true); break; case MergeMethod.AntiLeft: NestedLoopLeftJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, true); break; case MergeMethod.AntiRight: NestedLoopRightJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, true); break; case MergeMethod.AntiInner: NestedLoopLeftJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, true); NestedLoopRightJoin(Output, Fields, Where, Data1, Data2, Memory1, Memory2, true); break; } }
private static void NestedLoopRightJoin(RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, StaticRegister Memory1, StaticRegister Memory2, bool AntiJoin) { // Cursors // RecordReader Reader2 = Data2.OpenReader(); bool match = false; // Table Two Loop // while (!Reader2.EndOfData) { Memory2.Assign(Reader2.ReadNext()); // Table One Loop // RecordReader Reader1 = Data1.OpenReader(); match = false; while (!Reader1.EndOfData) { Memory1.Assign(Reader1.ReadNext()); if (Where.Render()) { if (AntiJoin == false) Output.Insert(Fields.Evaluate()); match = true; } } Memory1.Assign(Reader1.SourceSchema.NullRecord); if (!match) { Output.Insert(Fields.Evaluate()); } } }
public void WriteToFinal(RecordWriter Writter) { Schema s = Schema.Join(this._Maps.Columns, this._Reducers.GetSchema); FNodeSet leafs = new FNodeSet(s); this.WriteToFinal(Writter, leafs); }
public MergePlan(MergeMethod JM, MergeAlgorithm JA, RecordWriter Output, FNodeSet Fields, Predicate Where, DataSet Data1, DataSet Data2, Key Equality1, Key Equality2, StaticRegister Memory1, StaticRegister Memory2) : base() { this._use_method = JM; this._use_algorithm = JA; this._output = Output; this._fields = Fields; this._where = Where; this._data1 = Data1; this._data2 = Data2; this._key1 = Equality1; this._key2 = Equality2; this._mem1 = Memory1; this._mem2 = Memory2; this.Name = "MERGE"; }
public abstract void Extend(RecordWriter Output, DataSet Data, FNodeSet Inputs, FNodeSet OtherKeepValues, Predicate Where);