/// <summary> /// A main work of each thread when grouping. /// For each result row, add/get a group in/from the global Dictionary and compute the /// corresponding aggregate values for the group. /// Aggregate functions results are stored using Buckets (an array of value holders stored as a value on a key). /// </summary> /// <param name="job"> A group by job class. </param> protected override void SingleThreadGroupByWork(object job) { #region DECL var tmpJob = ((GroupByJobBuckets)job); var results = tmpJob.resTable; var groups = tmpJob.groups; var aggregates = tmpJob.aggregates; AggregateBucketResult[] buckets = null; AggregateBucketResult[] spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates); TableResults.RowProxy row; #endregion DECL for (int i = tmpJob.start; i < tmpJob.end; i++) { row = results[i]; buckets = groups.GetOrAdd(i, spareBuckets); // If the spare part was inserted, create a brand-new in advance. if (spareBuckets != null && object.ReferenceEquals(spareBuckets, buckets)) { spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates); } for (int j = 0; j < aggregates.Length; j++) { aggregates[j].ApplyThreadSafe(in row, buckets[j]); } } }
private GroupByResults SingleThreadGroupBy(RowHasher hasher, RowEqualityComparerGroupKey comparer, ITableResults resTable) { #region DECL hasher.SetCache(comparer.comparers); AggregateBucketResult[] buckets = null; var groups = new Dictionary <GroupDictKey, AggregateBucketResult[]>(comparer); TableResults.RowProxy row; GroupDictKey key; #endregion DECL for (int i = 0; i < resTable.NumberOfMatchedElements; i++) { row = resTable[i]; key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct. if (!groups.TryGetValue(key, out buckets)) { buckets = AggregateBucketResult.CreateBucketResults(this.aggregates); groups.Add(key, buckets); } for (int j = 0; j < this.aggregates.Length; j++) { this.aggregates[j].Apply(in row, buckets[j]); } } return(new DictGroupDictKeyBucket(groups, resTable)); }
/// <summary> /// Main work of a thread when grouping. /// For each result row, try to add it to the Dictionary and apply aggregate functions. /// Note that when the hash is computed. The comparer cache is set. /// So when the insertion happens, it does not have to compute the values for comparison. /// Aggregate functions results are stored using Buckets (an array of value holders stored as a value on a key). /// </summary> protected override void SingleThreadGroupByWork(object job) { #region DECL var tmpJob = ((GroupByJobBuckets)job); var hasher = tmpJob.hasher; var aggregates = tmpJob.aggregates; var results = tmpJob.resTable; var groups = tmpJob.groups; AggregateBucketResult[] buckets; TableResults.RowProxy row; GroupDictKey key; #endregion DECL for (int i = tmpJob.start; i < tmpJob.end; i++) { row = results[i]; key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct. if (!groups.TryGetValue(key, out buckets)) { buckets = AggregateBucketResult.CreateBucketResults(aggregates); groups.Add(key, buckets); } for (int j = 0; j < aggregates.Length; j++) { aggregates[j].Apply(in row, buckets[j]); } } }
public SingleGroupGroupByStreamed(QueryExpressionInfo expressionInfo, IGroupByExecutionHelper executionHelper, int columnCount, int[] usedVars) : base(expressionInfo, executionHelper, columnCount, usedVars) { this.finalResults = AggregateBucketResult.CreateBucketResults(this.aggregates); Aggregate.ExtractNonAstAggsAndResults(this.aggregates, this.finalResults, out nonAsterixAggregates, out nonAsterixResults); if (this.finalResults.Length != this.nonAsterixResults.Length) { this.containsAst = true; } }
public static AggregateBucketResult[] CreateBucketResults(Aggregate[] aggregates) { if (aggregates.Length == 0) { return(null); } var aggResults = new AggregateBucketResult[aggregates.Length]; for (int i = 0; i < aggResults.Length; i++) { aggResults[i] = AggregateBucketResult.Factory(aggregates[i].GetAggregateReturnType(), aggregates[i].GetFuncName()); } return(aggResults); }
/// <summary> /// A methods that is used only in the context of fully streamed version of group by. /// The keys put in the concurrent Dictionary will be of the base type AggregateBucketResult(T) since /// the keys and agg value will be stored in the same array to save a bit of memory. /// Note that the values must always be set beforehand. /// </summary> public static bool Equals(Type type, AggregateBucketResult x, AggregateBucketResult y) { if (type == typeof(int)) { return(Equals((AggregateBucketResultStreamed <int>)x, (AggregateBucketResultStreamed <int>)y)); } else if ((type == typeof(string))) { return(Equals((AggregateBucketResultStreamed <string>)x, (AggregateBucketResultStreamed <string>)y)); } else { throw new ArgumentException($"Aggregate bucket result compare, unkown type to compare. Type = {type}."); } }
/// <summary> /// A main work of each thread when grouping. /// The values are stored using arrays in the first step (an index corresponding to a group results is placed as a value on a key, the results can be then accessed via the stored index). /// In the second step, the values are reinserted into newly created buckets. /// For each result row, perform a local grouping with a simple Dictionary storing aggs. results in Lists. /// Afterwards merge the computed groups with the groups in the global Dictionary and store the agg. results in buckets. /// Notice that the local part is using hash cache with comparers when inserting into the Dictionary /// and when inserting into the global Dictionary, the hash values are stored in the groupDictKey. /// </summary> /// <param name="job"> A group by job class. </param> protected override void SingleThreadGroupByWork(object job) { // Local part with Lists. #region DECL var tmpJob = ((GroupByJobMixListsBuckets)job); var results = tmpJob.resTable; var groups = tmpJob.groups; var aggregates = tmpJob.aggregates; var hasher = tmpJob.hasher; var aggResults = tmpJob.aggResults; int position; TableResults.RowProxy row; GroupDictKey key; #endregion DECL for (int i = tmpJob.start; i < tmpJob.end; i++) { row = results[i]; key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct. if (!groups.TryGetValue(key, out position)) { position = groups.Count; groups.Add(key, position); } for (int j = 0; j < aggregates.Length; j++) { aggregates[j].Apply(in row, aggResults[j], position); } } // Global part with buckets. var globalGroups = tmpJob.globalGroups; AggregateBucketResult[] buckets = null; AggregateBucketResult[] spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates); foreach (var item in groups) { buckets = globalGroups.GetOrAdd(item.Key, spareBuckets); if (spareBuckets != null && object.ReferenceEquals(spareBuckets, buckets)) { spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates); } for (int j = 0; j < aggregates.Length; j++) { aggregates[j].MergeThreadSafe(buckets[j], aggResults[j], item.Value); } } }
/// <summary> /// Creates jobs for the parallel group by. /// Note that the last job in the array has the start/end set to the end of result table. /// /// Note that the passed aggregates results, are the ones that the rest will be merged into. /// They are expected to be at the last index of the jobs => they must have at least one result assigned. /// </summary> /// <param name="resTable"> A place to store aggregation results. </param> /// <param name="aggs"> Aggregation functions. </param> /// <param name="aggResults"> The results of the merge is stored in this isntances. It is placed into the last job. </param> private GroupByJob[] CreateJobs(ITableResults resTable, Aggregate[] aggs, AggregateBucketResult[] aggResults) { GroupByJob[] jobs = new GroupByJob[this.ThreadCount]; int current = 0; // No that this is never <= 0 because it was checked when picking the impl. int addition = resTable.NumberOfMatchedElements / this.ThreadCount; for (int i = 0; i < jobs.Length - 1; i++) { jobs[i] = new GroupByJob(aggs, AggregateBucketResult.CreateBucketResults(aggs), current, current + addition, resTable); current += addition; } jobs[jobs.Length - 1] = new GroupByJob(aggs, aggResults, current, resTable.NumberOfMatchedElements, resTable); return(jobs); }
public SingleGroupGroupByHalfStreamed(QueryExpressionInfo expressionInfo, IGroupByExecutionHelper helper, int columnCount, int[] usedVars) : base(expressionInfo, helper, columnCount, usedVars) { this.matcherNonAsterixResults = new AggregateBucketResult[this.executionHelper.ThreadCount][]; this.finalResults = AggregateBucketResult.CreateBucketResults(this.aggregates); Aggregate.ExtractNonAstAggsAndResults(this.aggregates, this.finalResults, out nonAsterixAggregates, out finalNonAsterixResults); if (this.finalResults.Length != this.finalNonAsterixResults.Length) { this.containsAst = true; } this.numberOfMatchedElements = new int[this.executionHelper.ThreadCount]; for (int i = 0; i < this.matcherNonAsterixResults.Length; i++) { this.matcherNonAsterixResults[i] = AggregateBucketResult.CreateBucketResults(this.nonAsterixAggregates); } }
/// <summary> /// A main work of each thread when grouping. /// The values are stored using arrays (an index corresponding to a group results is placed as a value on a key, the results can be then accessed via the stored index). /// For each result row, perform a local grouping with a simple Dictionary. /// Afterwards merge the computed groups with the groups in the global Dictionary. /// Notice that the local part is using hash cache with comparers when inserting into the Dictionary /// and when inserting into the global Dictionary, the hash values are stored in the groupDictKey. /// </summary> /// <param name="job"> A group by job class. </param> protected override void SingleThreadGroupByWork(object job) { // Local part #region DECL var tmpJob = ((GroupByJobBuckets)job); var results = tmpJob.resTable; var groups = tmpJob.groups; var aggregates = tmpJob.aggregates; var hasher = tmpJob.hasher; AggregateBucketResult[] buckets = null; TableResults.RowProxy row; GroupDictKey key; #endregion DECL for (int i = tmpJob.start; i < tmpJob.end; i++) { row = results[i]; key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct. if (!groups.TryGetValue(key, out buckets)) { buckets = AggregateBucketResult.CreateBucketResults(aggregates); groups.Add(key, buckets); } for (int j = 0; j < aggregates.Length; j++) { aggregates[j].Apply(in row, buckets[j]); } } // Global part var globalGroups = tmpJob.globalGroups; foreach (var item in groups) { buckets = globalGroups.GetOrAdd(item.Key, item.Value); // Note that the returned value can be the same as given in arguments. // That means that it inserted the given group. // If it did not, merge its results with the returned one. if (item.Value != null && !object.ReferenceEquals(buckets, item.Value)) { for (int j = 0; j < aggregates.Length; j++) { aggregates[j].MergeThreadSafe(buckets[j], item.Value[j]); } } } }
/// <summary> /// Called only if the grouping runs in paralel. /// Merges local group results into the global results. /// </summary> private void MergeResults(GroupJob job, int matcherID) { foreach (var item in job.groups) { var keyFull = new GroupDictKeyFull(item.Key.hash, job.resTable[item.Key.position]); var buckets = this.globalGroups.GetOrAdd(keyFull, job.spareBuckets); if (job.spareBuckets != null && object.ReferenceEquals(job.spareBuckets, buckets)) { job.spareBuckets = AggregateBucketResult.CreateBucketResults(this.aggregates); } for (int j = 0; j < this.aggregates.Length; j++) { this.aggregates[j].MergeThreadSafe(buckets[j], job.aggResults[j], item.Value); } } this.groupJobs[matcherID] = null; }
public TwoStepHalfStreamedListBucket(QueryExpressionInfo expressionInfo, IGroupByExecutionHelper executionHelper, int columnCount, int[] usedVars) : base(expressionInfo, executionHelper, columnCount, usedVars) { this.groupJobs = new GroupJob[this.executionHelper.ThreadCount]; // Create an initial job, comparers and hashers. this.CreateHashersAndComparers(out ExpressionComparer[] comparers, out ExpressionHasher[] hashers); var firstComp = RowEqualityComparerGroupKey.Factory(null, comparers, true); var firstHasher = new RowHasher(hashers); firstHasher.SetCache(firstComp.comparers); this.groupJobs[0] = new GroupJob(this.aggregates, firstComp, firstHasher, AggregateBucketResult.CreateBucketResults(this.aggregates), new TableResults(this.ColumnCount, this.executionHelper.FixedArraySize, this.usedVars)); for (int i = 1; i < this.executionHelper.ThreadCount; i++) { CloneHasherAndComparer(firstComp, firstHasher, out RowEqualityComparerGroupKey newComp, out RowHasher newHasher); groupJobs[i] = new GroupJob(this.aggregates, newComp, newHasher, AggregateBucketResult.CreateBucketResults(this.aggregates), new TableResults(this.ColumnCount, this.executionHelper.FixedArraySize, this.usedVars)); } this.globalGroups = new ConcurrentDictionary <GroupDictKeyFull, AggregateBucketResult[]>(RowEqualityComparerGroupDickKeyFull.Factory(comparers, false)); }
/// <summary> /// Creates a new array of buckets that is used as key/value into a Dictionary inside the streamed version /// of group by. /// If the last array was inserted into the Dictionary, the function inits a brand-new one. /// Otherwise it only actualises internal values of the last created one. /// </summary> public AggregateBucketResult[] Create(Element[] result) { if (this.lastWasInserted) { this.lastBucketsKeyValue = new AggregateBucketResult[this.keysCount + this.aggregates.Length]; // Init the aggregation funcs. result buckets. for (int i = this.keysCount; i < this.keysCount + this.aggregates.Length; i++) { var agg = this.aggregates[i - this.keysCount]; this.lastBucketsKeyValue[i] = AggregateBucketResult.Factory(agg.GetAggregateReturnType(), agg.GetFuncName()); } } // Init key buckets. for (int i = 0; i < keysCount; i++) { this.lastBucketsKeyValue[i] = factories[i].Create(this.lastWasInserted, result); } return(this.lastBucketsKeyValue); }
public override void Process(int matcherID, Element[] result) { var job = this.groupJobs[matcherID]; if (result != null) { // Create a temporary row. job.resTable.temporaryRow = result; int rowPosition = job.resTable.RowCount; TableResults.RowProxy row = job.resTable[rowPosition]; var key = new GroupDictKey(job.hasher.Hash(in row), rowPosition); // It's a struct. AggregateBucketResult[] buckets = null; if (!job.groups.TryGetValue(key, out buckets)) { buckets = AggregateBucketResult.CreateBucketResults(aggregates); job.groups.Add(key, buckets); // Store the temporary row in the table. This causes copying of the row to the actual Lists of table. // While the position of the stored row proxy remains the same, next time someone tries to access it, // it returns the elements from the actual table and not the temporary row. job.resTable.StoreTemporaryRow(); job.resTable.temporaryRow = null; } for (int j = 0; j < this.aggregates.Length; j++) { this.aggregates[j].Apply(in row, buckets[j]); } } else { // If it runs in single thread. No need to merge the results. if (this.groupJobs.Length > 1) { this.MergeResults(job, matcherID); } } }
/// <summary> /// Sets values to the Count(*) aggregates because there is nothing to be computed. /// Finds the non Count(*) aggregates and if there are some, they are further passed into /// grouping (parallel or single thread) based on InParallel flag, altogether with corresponding aggregate results. /// Note that the we are passing direct reference to the aggregates results and aggregates. Thus it assumes /// that the further methods merge data only into the passed aggregate results. /// </summary> public override GroupByResults Group(ITableResults resTable) { var nonAsterixCountAggregates = new List <Aggregate>(); var nonAsterixAggResults = new List <AggregateBucketResult>(); var aggResults = AggregateBucketResult.CreateBucketResults(this.aggregates); for (int i = 0; i < this.aggregates.Length; i++) { if (this.aggregates[i].IsAstCount) { // Actualise Count(*). ((Count <int>)aggregates[i]).IncBy(resTable.NumberOfMatchedElements, aggResults[i]); } else { // Non astrix counts are further passed into the computatoin functions. nonAsterixCountAggregates.Add(this.aggregates[i]); nonAsterixAggResults.Add(aggResults[i]); } } // Note that the result will reside in the aggResults variable after the computation is finished. if (nonAsterixCountAggregates.Count != 0) { // If work can be split equaly use parallel sol. (Split equaly means that each thread will receive at least one portion of the result table.) if (this.InParallel && (resTable.NumberOfMatchedElements / this.ThreadCount > 0)) { this.ParallelGroupBy(resTable, nonAsterixCountAggregates.ToArray(), nonAsterixAggResults.ToArray()); } else { this.SingleThreadGroupBy(resTable, nonAsterixCountAggregates.ToArray(), nonAsterixAggResults.ToArray()); } } return(CreateGroupByResults(aggResults, resTable)); }
public static T GetFinalValue <T>(AggregateBucketResult bucket) { return(((IGetFinal <T>)bucket).GetFinal(0)); }
public void IncByThreadSafe(int value, AggregateBucketResult bucket) { var tmpBucket = (AggregateBucketResult <int>)bucket; Interlocked.Add(ref tmpBucket.aggResult, value); }
public void IncBy(int value, AggregateBucketResult bucket) { var tmpBucket = (AggregateBucketResult <int>)bucket; tmpBucket.aggResult += value; }