public AggregatePlan(RecordWriter Output, DataSet Source, Predicate Filter, FNodeSet Keys, AggregateSet Aggregates, FNodeSet ReturnSet, StaticRegister BaseMem, StaticRegister ReturnMem, string TempDir) : base() { this._writer = Output; this._source = Source; this._filter = Filter; this._keys = Keys ?? new FNodeSet(); this._aggregates = Aggregates; this._returnset = ReturnSet; this._basememory = BaseMem; this._returnmemory = ReturnMem; this._sink = TempDir ?? Source.Directory; this.Name = "AGGREGATE"; }
public static KeyValueSet Open(Header h, FNodeSet Fields, AggregateSet CR) { RecordSet rs = BinarySerializer.BufferRecordSet(h.Path); KeyValueSet gbs = new KeyValueSet(Fields, CR); gbs.ImportFromInterim(rs); return gbs; }
// Constructor // public KeyValueSet(FNodeSet Fields, AggregateSet Aggregates) { this._Maps = Fields; this._Reducers = Aggregates; this._cache = new Dictionary<Record, CompoundRecord>(Fields.Columns.NullRecord); }
public override RecordSet Initialize(DataSet Data, Predicate Where, FNodeSet Fields, int Clusters) { // Get the min of each field // AggregateSet set1 = new AggregateSet(); for (int i = 0; i < Fields.Count; i++) { set1.Add(new AggregateMin(Fields[i].CloneOfMe()), Fields.Alias(i)); } // Get the max of each field // AggregateSet set2 = new AggregateSet(); for (int i = 0; i < Fields.Count; i++) { set2.Add(new AggregateMax(Fields[i].CloneOfMe()), Fields.Alias(i)); } // Render the min and max // RecordSet rs1 = AggregatePlan.Render(Data, Where, new FNodeSet(), set1); RecordSet rs2 = AggregatePlan.Render(Data, Where, new FNodeSet(), set2); // Create the output means table // RecordSet rs = new RecordSet(Schema.Join(new Schema("key int, count double"), rs1.Columns)); // Fill in the gaps // for (int i = 0; i < Clusters; i++) { if (i == 0) { RecordBuilder rb = new RecordBuilder(); rb.Add(0); rb.Add(0D); rb.Add(rs1[0]); rs.Add(rb.ToRecord()); } else if (i == Clusters - 1) { RecordBuilder rb = new RecordBuilder(); rb.Add(Clusters - 1); rb.Add(0D); rb.Add(rs2[0]); rs.Add(rb.ToRecord()); } else { RecordBuilder rb = new RecordBuilder(); rb.Add(i); rb.Add(0D); for (int j = 0; j < rs1.Columns.Count; j++) { double clus = (double)Clusters; double jay = (double)j; rb.Add(rs1[0][j].DOUBLE + (rs2[0][j].DOUBLE - rs1[0][j].DOUBLE) / clus * jay); } rs.Add(rb.ToRecord()); } } return rs; }
public override RecordSet Initialize(DataSet Data, Predicate Where, FNodeSet Fields, int Clusters) { AggregateSet set = new AggregateSet(); set.Add(new AggregateSum(FNodeFactory.Value(1D)), "CLUSTER_ELEMENT_COUNT"); for (int i = 0; i < Fields.Count; i++) { set.Add(new AggregateAverage(Fields[i].CloneOfMe()), Fields.Alias(i)); } FNode rnd = new FNodeResult(null, new CellRandomInt()); rnd.AddChildNode(new FNodeValue(rnd, new Cell(this.Seed))); rnd.AddChildNode(new FNodeValue(rnd, new Cell(0))); rnd.AddChildNode(new FNodeValue(rnd, new Cell(Clusters))); FNodeSet keys = new FNodeSet(); keys.Add(rnd); RecordSet rs = AggregatePlan.Render(Data, Where, keys, set); return rs; }
private bool ItterateOnce() { // Create the cluster mapping FNode; this node does the nearest neighbor test // FNodeSet keys = new FNodeSet(); FNode n = new FNodeResult(null, new RowClusterCellFunction(this._rule, this._means)); foreach (FNode t in this._fields.Nodes) { n.AddChildNode(t.CloneOfMe()); } keys.Add("CLUSTER_ID", n); // Create the aggregate// AggregateSet set = new AggregateSet(); // Add a counter to the aggregate // set.Add(new AggregateSum(FNodeFactory.Value(1D)), "CLUSTER_ELEMENT_COUNT"); // load the aggregate with the mean aggregates // for (int i = 0; i < this._fields.Count; i++) { set.Add(new AggregateAverage(this._fields[i].CloneOfMe()), this._fields.Alias(i)); } // Load the aggregate with the variance aggregates // for (int i = 0; i < this._fields.Count; i++) { set.Add(new AggregateVarianceP(this._fields[i].CloneOfMe()), "VAR_" + this._fields.Alias(i)); } // Run the aggregate; this is basically a horse aggregate step with the cluster node mapping as the key, and averaging as the value RecordSet rs = AggregatePlan.Render(this._data, this._where, keys, set); // Need to chop up the recordset we just created // Key mean_keeper = Key.Build(this._means.Columns.Count); RecordSet means = FastReadPlan.Render(rs, Predicate.TrueForAll, mean_keeper, long.MaxValue); Key stat_keeper = new Key(0,1); // keep the id and the count for (int i = mean_keeper.Count; i < rs.Columns.Count; i++) { stat_keeper.Add(i); } this._stats = FastReadPlan.Render(rs, Predicate.TrueForAll, stat_keeper, long.MaxValue); // Check for cluster misses; cluster misses occur when no node maps to a cluster correctly // if (means.Count != this._means.Count) { this.HandleNullCluster(means); } // Compare the changes between itterations double change = this.CompareChanges(this._means, means); // Set the means to the newly calculated means // this._means = means; // Return a boolean indicating if we failed or not return change < this._exit_condition; }
public AggregateStructure(string TempDir, FNodeSet Fields, AggregateSet Aggregates) :this(TempDir, Fields, Aggregates, new List<Header>()) { }
// Constructor // public AggregateStructure(string TempDir, FNodeSet Fields, AggregateSet Aggregates, List<Header> Headers) { this._TempDir = TempDir; this._Cache = new KeyValueSet(Fields, Aggregates); this._Headers = new List<Header>(); this._keys = Fields; this._aggregates = Aggregates; }
/* private DataSet GenerateSortedDataSet(DataSet Data, FNodeSet Keys, AggregateSet Aggregates, Predicate Where) { // Create the output nodes // FNodeSet nodes = new FNodeSet(); for(int i = 0; i < Keys.Count; i++) { nodes.Add(Keys.Alias(i), Keys[i].CloneOfMe()); } List<int> indexes = Aggregates.FieldRefs; foreach(int i in indexes) { nodes.Add(Data.Columns.ColumnName(i), new FNodeFieldRef(null, i, Data.Columns.ColumnAffinity(i), null)); } // Create the temp table // DataSet t = new RecordSet(nodes.Columns); if (Data.IsBig) { t = new Table(Data.Directory, Header.TempName(), nodes.Columns); } // Get data // RecordWriter w = t.OpenWriter(); FastReadPlan frp = new FastReadPlan(Data, Where, nodes, w); w.Close(); // Sort the data // Key k = Key.Build(Keys.Count); t.Sort(k); // Return the data // return t; } private void ExecuteSortedSet(RecordWriter Output, RecordReader BaseReader, FNodeSet Keys, AggregateSet Aggregates, FNodeSet ReturnSet, StaticRegister BaseMem, StaticRegister ReturnMem) { CompoundRecord agg_data = Aggregates.Initialize(); Record key_data = null; Record lag_key = null; long Reads = 0; long Writes = 0; while (!BaseReader.EndOfData) { // Assign the current register // BaseMem.Assign(BaseReader.ReadNext()); // Get the key value // key_data = Keys.Evaluate(); // Check for a key change // if (lag_key == null) lag_key = key_data; if (!Record.Equals(key_data, lag_key)) { // Assing the combined records to the register // ReturnMem.Assign(Record.Join(key_data, Aggregates.Evaluate(agg_data))); // Add the record to the output dataset // Output.Insert(ReturnSet.Evaluate()); // Reset the aggregate // agg_data = Aggregates.Initialize(); // Writes // Writes++; } // Accumulate the data // Aggregates.Accumulate(agg_data); Reads++; } ReturnMem.Assign(Record.Join(key_data, Aggregates.Evaluate(agg_data))); Output.Insert(ReturnSet.Evaluate()); this._reads = Reads; this._writes = Writes + 1; } */ public static RecordSet Render(DataSet Source, Predicate Filter, FNodeSet Keys, AggregateSet Aggregates) { Schema s = Schema.Join(Keys.Columns, Aggregates.GetSchema); RecordSet rs = new RecordSet(s); RecordWriter w = rs.OpenWriter(); StaticRegister mem1 = new StaticRegister(Source.Columns); Keys.AssignRegister(mem1); Aggregates.AssignRegister(mem1); StaticRegister mem2 = new StaticRegister(rs.Columns); FNodeSet out_nodes = new FNodeSet(rs.Columns); out_nodes.AssignRegister(mem2); AggregatePlan plan = new AggregatePlan(w, Source, Filter, Keys, Aggregates, new FNodeSet(s), mem1, mem2, Source.Directory); plan.Execute(); w.Close(); return rs; }
public static Schema GetInterimSchema(FNodeSet Keys, AggregateSet Aggregates) { return Schema.Join(Keys.Columns, Aggregates.GetSchema); }
public AggregateSet CloneOfMe() { AggregateSet agg = new AggregateSet(); for (int i = 0; i < this.Count; i++) { agg.Add(this._cache[i].CloneOfMe(), this._alias[i]); } return agg; }