// return (or possibly allocate) an accumulator block AccumulatorBlock GetAccum(int haccum, int naccum) { if (!_accumstore.ContainsKey(haccum)) { return(AccumulatorBlock.Create(naccum)); } return(_accumstore[haccum]); }
// Transform with ordered calculations - different algorithm // 1. Build index // 2. Read input file using index // 3. Transform and write output file public override DataTable TransformOrdered(DataHeading newheading, ExpressionEval[] exprs, ExpressionEval[] orderexps) { Logger.WriteLine(4, "TransformOrdered {0} exprs={1},{2}", newheading, exprs.Count(), orderexps.Count()); var numacc = exprs.Where(e => e.HasFold).Sum(e => e.AccumCount); var newtable = DataTableLocal.Create(newheading); var newexprs = newtable.Heading.Reorder(exprs); var ordidx = OrderedIndex.Create(orderexps, Heading); // list of indexes of not-folded columns var notfold = exprs.Where(e => !e.HasFold) .Select(e => newheading.FindIndex(e.Name)).ToArray(); // Build index for (var ord = 0; ord < Cardinality; ++ord) //TODO:Enumerable { ordidx.Add(GetRow(ord), ord); } AccumulatorBlock accblk = null; // Read in index order, with access to ordering info DataRow lastrow = null; foreach (var ord in ordidx.RowOrdinals) { var oldrow = _rows[ord]; oldrow.OrderedIndex = ordidx; // so row functions can access it // if there is a group break, reset the accumulators if (ordidx.IsBreak) { accblk = AccumulatorBlock.Create(numacc); } DataRow newrow = oldrow.TransformAggregate(newheading, accblk, newexprs); // save the current row, output it on group break or when any non-fold column has changed // any rows not output will have identical non-fold cols so only running sums are lost var nfchg = (lastrow != null && !notfold.All(x => newrow.Values[x].Equals(lastrow.Values[x]))); if (nfchg || (lastrow != null && ordidx.IsBreak)) { newtable.AddRaw(lastrow); } lastrow = newrow; // guaranteed to be different! } if (lastrow != null) { newtable.AddRaw(lastrow); } Logger.WriteLine(4, "[{0}]", newtable); return(newtable); }
// read an accumulator block public AccumulatorBlock ReadAccum() { var ibase = ReadByte(); var result = ReadValue(); var naccum = ReadByte(); var accum = AccumulatorBlock.Create(naccum); accum.IndexBase = ibase; accum.Result = result; for (int i = 0; i < naccum; ++i) { accum.Accumulators[i] = ReadValue(); } return(accum); }
// Transform with Aggregation // Maintain index on output public override DataTable TransformAggregate(DataHeading newheading, ExpressionEval[] exprs) { Logger.WriteLine(4, "TransformAggregate {0} exprs={1}", newheading, exprs.Length); var numacc = exprs.Where(e => e.HasFold).Sum(e => e.AccumCount); var newtable = DataTableLocal.Create(newheading); // create a dictionary for output records var dict = new Dictionary <DataRow, int>(); var accblks = new List <AccumulatorBlock>(); var newexprs = newtable.Heading.Reorder(exprs); foreach (var oldrow in this.GetRows()) //TODO:Enumerable { var temprow = oldrow.Transform(newheading, newexprs); if (!dict.ContainsKey(temprow)) { // First time this new row seen, add to output and index it var accblk = AccumulatorBlock.Create(numacc); var newrow = oldrow.TransformAggregate(newheading, accblk, newexprs); newtable.AddRaw(newrow); Logger.Assert(newtable._dict[newtable._rows[newtable.Cardinality - 1]] == newtable.Cardinality - 1); dict.Add(temprow, newtable.Cardinality - 1); accblks.Add(accblk); } else { // Subsequent time row seen, update output by index // TODO: only need to update each row once at end from accumulators var ord = dict[temprow]; var newrow = newtable._rows[ord]; var accblk = accblks[ord]; newtable.Replace(newrow, oldrow.TransformAggregate(newheading, accblk, newexprs)); } } Logger.WriteLine(4, "[{0}]", newtable); return(newtable); }