예제 #1
0
        /// <summary>
        /// A main work of each thread when grouping.
        /// For each result row, add/get a group in/from the global Dictionary and compute the
        /// corresponding aggregate values for the group.
        /// Aggregate functions results are stored using Buckets (an array of value holders stored as a value on a key).
        /// </summary>
        /// <param name="job"> A group by job class. </param>
        protected override void SingleThreadGroupByWork(object job)
        {
            #region DECL
            var tmpJob     = ((GroupByJobBuckets)job);
            var results    = tmpJob.resTable;
            var groups     = tmpJob.groups;
            var aggregates = tmpJob.aggregates;
            AggregateBucketResult[] buckets      = null;
            AggregateBucketResult[] spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates);
            TableResults.RowProxy   row;
            #endregion DECL

            for (int i = tmpJob.start; i < tmpJob.end; i++)
            {
                row     = results[i];
                buckets = groups.GetOrAdd(i, spareBuckets);
                // If the spare part was inserted, create a brand-new in advance.
                if (spareBuckets != null && object.ReferenceEquals(spareBuckets, buckets))
                {
                    spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates);
                }
                for (int j = 0; j < aggregates.Length; j++)
                {
                    aggregates[j].ApplyThreadSafe(in row, buckets[j]);
                }
            }
        }
예제 #2
0
        private GroupByResults SingleThreadGroupBy(RowHasher hasher, RowEqualityComparerGroupKey comparer, ITableResults resTable)
        {
            #region DECL
            hasher.SetCache(comparer.comparers);
            AggregateBucketResult[] buckets = null;
            var groups = new Dictionary <GroupDictKey, AggregateBucketResult[]>(comparer);
            TableResults.RowProxy row;
            GroupDictKey          key;
            #endregion DECL

            for (int i = 0; i < resTable.NumberOfMatchedElements; i++)
            {
                row = resTable[i];
                key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct.
                if (!groups.TryGetValue(key, out buckets))
                {
                    buckets = AggregateBucketResult.CreateBucketResults(this.aggregates);
                    groups.Add(key, buckets);
                }

                for (int j = 0; j < this.aggregates.Length; j++)
                {
                    this.aggregates[j].Apply(in row, buckets[j]);
                }
            }

            return(new DictGroupDictKeyBucket(groups, resTable));
        }
        /// <summary>
        /// Main work of a thread when grouping.
        /// For each result row, try to add it to the Dictionary and apply aggregate functions.
        /// Note that when the hash is computed. The comparer cache is set.
        /// So when the insertion happens, it does not have to compute the values for comparison.
        /// Aggregate functions results are stored using Buckets (an array of value holders stored as a value on a key).
        /// </summary>
        protected override void SingleThreadGroupByWork(object job)
        {
            #region DECL
            var tmpJob     = ((GroupByJobBuckets)job);
            var hasher     = tmpJob.hasher;
            var aggregates = tmpJob.aggregates;
            var results    = tmpJob.resTable;
            var groups     = tmpJob.groups;
            AggregateBucketResult[] buckets;
            TableResults.RowProxy   row;
            GroupDictKey            key;
            #endregion DECL

            for (int i = tmpJob.start; i < tmpJob.end; i++)
            {
                row = results[i];
                key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct.

                if (!groups.TryGetValue(key, out buckets))
                {
                    buckets = AggregateBucketResult.CreateBucketResults(aggregates);
                    groups.Add(key, buckets);
                }

                for (int j = 0; j < aggregates.Length; j++)
                {
                    aggregates[j].Apply(in row, buckets[j]);
                }
            }
        }
 public SingleGroupGroupByStreamed(QueryExpressionInfo expressionInfo, IGroupByExecutionHelper executionHelper, int columnCount, int[] usedVars) : base(expressionInfo, executionHelper, columnCount, usedVars)
 {
     this.finalResults = AggregateBucketResult.CreateBucketResults(this.aggregates);
     Aggregate.ExtractNonAstAggsAndResults(this.aggregates, this.finalResults, out nonAsterixAggregates, out nonAsterixResults);
     if (this.finalResults.Length != this.nonAsterixResults.Length)
     {
         this.containsAst = true;
     }
 }
        public static AggregateBucketResult[] CreateBucketResults(Aggregate[] aggregates)
        {
            if (aggregates.Length == 0)
            {
                return(null);
            }
            var aggResults = new AggregateBucketResult[aggregates.Length];

            for (int i = 0; i < aggResults.Length; i++)
            {
                aggResults[i] = AggregateBucketResult.Factory(aggregates[i].GetAggregateReturnType(), aggregates[i].GetFuncName());
            }

            return(aggResults);
        }
예제 #6
0
 /// <summary>
 /// A methods that is used only in the context of fully streamed version of group by.
 /// The keys put in the concurrent Dictionary will be of the base type AggregateBucketResult(T) since
 /// the keys and agg value will be stored in the same array to save a bit of memory.
 /// Note that the values must always be set beforehand.
 /// </summary>
 public static bool Equals(Type type, AggregateBucketResult x, AggregateBucketResult y)
 {
     if (type == typeof(int))
     {
         return(Equals((AggregateBucketResultStreamed <int>)x, (AggregateBucketResultStreamed <int>)y));
     }
     else if ((type == typeof(string)))
     {
         return(Equals((AggregateBucketResultStreamed <string>)x, (AggregateBucketResultStreamed <string>)y));
     }
     else
     {
         throw new ArgumentException($"Aggregate bucket result compare, unkown type to compare. Type = {type}.");
     }
 }
        /// <summary>
        /// A main work of each thread when grouping.
        /// The values are stored using arrays in the first step (an index corresponding to a group results is placed as a value on a key, the results can be then accessed via the stored index).
        /// In the second step, the values are reinserted into newly created buckets.
        /// For each result row, perform a local grouping with a simple Dictionary storing aggs. results in Lists.
        /// Afterwards merge the computed groups with the groups in the global Dictionary and store the agg. results in buckets.
        /// Notice that the local part is using hash cache with comparers when inserting into the Dictionary
        /// and when inserting into the global Dictionary, the hash values are stored in the groupDictKey.
        /// </summary>
        /// <param name="job"> A group by job class. </param>
        protected override void SingleThreadGroupByWork(object job)
        {
            // Local part with Lists.
            #region DECL
            var tmpJob     = ((GroupByJobMixListsBuckets)job);
            var results    = tmpJob.resTable;
            var groups     = tmpJob.groups;
            var aggregates = tmpJob.aggregates;
            var hasher     = tmpJob.hasher;
            var aggResults = tmpJob.aggResults;
            int position;
            TableResults.RowProxy row;
            GroupDictKey          key;
            #endregion DECL

            for (int i = tmpJob.start; i < tmpJob.end; i++)
            {
                row = results[i];
                key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct.

                if (!groups.TryGetValue(key, out position))
                {
                    position = groups.Count;
                    groups.Add(key, position);
                }
                for (int j = 0; j < aggregates.Length; j++)
                {
                    aggregates[j].Apply(in row, aggResults[j], position);
                }
            }

            // Global part with buckets.
            var globalGroups = tmpJob.globalGroups;
            AggregateBucketResult[] buckets      = null;
            AggregateBucketResult[] spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates);
            foreach (var item in groups)
            {
                buckets = globalGroups.GetOrAdd(item.Key, spareBuckets);
                if (spareBuckets != null && object.ReferenceEquals(spareBuckets, buckets))
                {
                    spareBuckets = AggregateBucketResult.CreateBucketResults(aggregates);
                }
                for (int j = 0; j < aggregates.Length; j++)
                {
                    aggregates[j].MergeThreadSafe(buckets[j], aggResults[j], item.Value);
                }
            }
        }
예제 #8
0
        /// <summary>
        /// Creates jobs for the parallel group by.
        /// Note that the last job in the array has the start/end set to the end of result table.
        ///
        /// Note that the passed aggregates results, are the ones that the rest will be merged into.
        /// They are expected to be at the last index of the jobs => they must have at least one result assigned.
        /// </summary>
        /// <param name="resTable"> A place to store aggregation results. </param>
        /// <param name="aggs"> Aggregation functions. </param>
        /// <param name="aggResults"> The results of the merge is stored in this isntances. It is placed into the last job. </param>
        private GroupByJob[] CreateJobs(ITableResults resTable, Aggregate[] aggs, AggregateBucketResult[] aggResults)
        {
            GroupByJob[] jobs    = new GroupByJob[this.ThreadCount];
            int          current = 0;
            // No that this is never <= 0 because it was checked when picking the impl.
            int addition = resTable.NumberOfMatchedElements / this.ThreadCount;

            for (int i = 0; i < jobs.Length - 1; i++)
            {
                jobs[i]  = new GroupByJob(aggs, AggregateBucketResult.CreateBucketResults(aggs), current, current + addition, resTable);
                current += addition;
            }

            jobs[jobs.Length - 1] = new GroupByJob(aggs, aggResults, current, resTable.NumberOfMatchedElements, resTable);
            return(jobs);
        }
예제 #9
0
        public SingleGroupGroupByHalfStreamed(QueryExpressionInfo expressionInfo, IGroupByExecutionHelper helper, int columnCount, int[] usedVars) : base(expressionInfo, helper, columnCount, usedVars)
        {
            this.matcherNonAsterixResults = new AggregateBucketResult[this.executionHelper.ThreadCount][];
            this.finalResults             = AggregateBucketResult.CreateBucketResults(this.aggregates);
            Aggregate.ExtractNonAstAggsAndResults(this.aggregates, this.finalResults, out nonAsterixAggregates, out finalNonAsterixResults);
            if (this.finalResults.Length != this.finalNonAsterixResults.Length)
            {
                this.containsAst = true;
            }

            this.numberOfMatchedElements = new int[this.executionHelper.ThreadCount];
            for (int i = 0; i < this.matcherNonAsterixResults.Length; i++)
            {
                this.matcherNonAsterixResults[i] = AggregateBucketResult.CreateBucketResults(this.nonAsterixAggregates);
            }
        }
예제 #10
0
        /// <summary>
        /// A main work of each thread when grouping.
        /// The values are stored using arrays (an index corresponding to a group results is placed as a value on a key, the results can be then accessed via the stored index).
        /// For each result row, perform a local grouping with a simple Dictionary.
        /// Afterwards merge the computed groups with the groups in the global Dictionary.
        /// Notice that the local part is using hash cache with comparers when inserting into the Dictionary
        /// and when inserting into the global Dictionary, the hash values are stored in the groupDictKey.
        /// </summary>
        /// <param name="job"> A group by job class. </param>
        protected override void SingleThreadGroupByWork(object job)
        {
            // Local part
            #region DECL
            var tmpJob     = ((GroupByJobBuckets)job);
            var results    = tmpJob.resTable;
            var groups     = tmpJob.groups;
            var aggregates = tmpJob.aggregates;
            var hasher     = tmpJob.hasher;
            AggregateBucketResult[] buckets = null;
            TableResults.RowProxy   row;
            GroupDictKey            key;
            #endregion DECL

            for (int i = tmpJob.start; i < tmpJob.end; i++)
            {
                row = results[i];
                key = new GroupDictKey(hasher.Hash(in row), i); // It's a struct.
                if (!groups.TryGetValue(key, out buckets))
                {
                    buckets = AggregateBucketResult.CreateBucketResults(aggregates);
                    groups.Add(key, buckets);
                }
                for (int j = 0; j < aggregates.Length; j++)
                {
                    aggregates[j].Apply(in row, buckets[j]);
                }
            }

            // Global part
            var globalGroups = tmpJob.globalGroups;

            foreach (var item in groups)
            {
                buckets = globalGroups.GetOrAdd(item.Key, item.Value);
                // Note that the returned value can be the same as given in arguments.
                // That means that it inserted the given group.
                // If it did not, merge its results with the returned one.
                if (item.Value != null && !object.ReferenceEquals(buckets, item.Value))
                {
                    for (int j = 0; j < aggregates.Length; j++)
                    {
                        aggregates[j].MergeThreadSafe(buckets[j], item.Value[j]);
                    }
                }
            }
        }
 /// <summary>
 /// Called only if the grouping runs in paralel.
 /// Merges local group results into the global results.
 /// </summary>
 private void MergeResults(GroupJob job, int matcherID)
 {
     foreach (var item in job.groups)
     {
         var keyFull = new GroupDictKeyFull(item.Key.hash, job.resTable[item.Key.position]);
         var buckets = this.globalGroups.GetOrAdd(keyFull, job.spareBuckets);
         if (job.spareBuckets != null && object.ReferenceEquals(job.spareBuckets, buckets))
         {
             job.spareBuckets = AggregateBucketResult.CreateBucketResults(this.aggregates);
         }
         for (int j = 0; j < this.aggregates.Length; j++)
         {
             this.aggregates[j].MergeThreadSafe(buckets[j], job.aggResults[j], item.Value);
         }
     }
     this.groupJobs[matcherID] = null;
 }
        public TwoStepHalfStreamedListBucket(QueryExpressionInfo expressionInfo, IGroupByExecutionHelper executionHelper, int columnCount, int[] usedVars) : base(expressionInfo, executionHelper, columnCount, usedVars)
        {
            this.groupJobs = new GroupJob[this.executionHelper.ThreadCount];

            // Create an initial job, comparers and hashers.
            this.CreateHashersAndComparers(out ExpressionComparer[] comparers, out ExpressionHasher[] hashers);
            var firstComp   = RowEqualityComparerGroupKey.Factory(null, comparers, true);
            var firstHasher = new RowHasher(hashers);

            firstHasher.SetCache(firstComp.comparers);

            this.groupJobs[0] = new GroupJob(this.aggregates, firstComp, firstHasher, AggregateBucketResult.CreateBucketResults(this.aggregates), new TableResults(this.ColumnCount, this.executionHelper.FixedArraySize, this.usedVars));
            for (int i = 1; i < this.executionHelper.ThreadCount; i++)
            {
                CloneHasherAndComparer(firstComp, firstHasher, out RowEqualityComparerGroupKey newComp, out RowHasher newHasher);
                groupJobs[i] = new GroupJob(this.aggregates, newComp, newHasher, AggregateBucketResult.CreateBucketResults(this.aggregates), new TableResults(this.ColumnCount, this.executionHelper.FixedArraySize, this.usedVars));
            }

            this.globalGroups = new ConcurrentDictionary <GroupDictKeyFull, AggregateBucketResult[]>(RowEqualityComparerGroupDickKeyFull.Factory(comparers, false));
        }
        /// <summary>
        /// Creates a new array of buckets that is used as key/value into a Dictionary inside the streamed version
        /// of group by.
        /// If the last array was inserted into the Dictionary, the function inits a brand-new one.
        /// Otherwise it only actualises internal values of the last created one.
        /// </summary>
        public AggregateBucketResult[] Create(Element[] result)
        {
            if (this.lastWasInserted)
            {
                this.lastBucketsKeyValue = new AggregateBucketResult[this.keysCount + this.aggregates.Length];

                // Init the aggregation funcs. result buckets.
                for (int i = this.keysCount; i < this.keysCount + this.aggregates.Length; i++)
                {
                    var agg = this.aggregates[i - this.keysCount];
                    this.lastBucketsKeyValue[i] = AggregateBucketResult.Factory(agg.GetAggregateReturnType(), agg.GetFuncName());
                }
            }

            // Init key buckets.
            for (int i = 0; i < keysCount; i++)
            {
                this.lastBucketsKeyValue[i] = factories[i].Create(this.lastWasInserted, result);
            }
            return(this.lastBucketsKeyValue);
        }
        public override void Process(int matcherID, Element[] result)
        {
            var job = this.groupJobs[matcherID];

            if (result != null)
            {
                // Create a temporary row.
                job.resTable.temporaryRow = result;
                int rowPosition           = job.resTable.RowCount;
                TableResults.RowProxy row = job.resTable[rowPosition];
                var key = new GroupDictKey(job.hasher.Hash(in row), rowPosition); // It's a struct.
                AggregateBucketResult[] buckets = null;

                if (!job.groups.TryGetValue(key, out buckets))
                {
                    buckets = AggregateBucketResult.CreateBucketResults(aggregates);
                    job.groups.Add(key, buckets);
                    // Store the temporary row in the table. This causes copying of the row to the actual Lists of table.
                    // While the position of the stored row proxy remains the same, next time someone tries to access it,
                    // it returns the elements from the actual table and not the temporary row.
                    job.resTable.StoreTemporaryRow();
                    job.resTable.temporaryRow = null;
                }
                for (int j = 0; j < this.aggregates.Length; j++)
                {
                    this.aggregates[j].Apply(in row, buckets[j]);
                }
            }
            else
            {
                // If it runs in single thread. No need to merge the results.
                if (this.groupJobs.Length > 1)
                {
                    this.MergeResults(job, matcherID);
                }
            }
        }
예제 #15
0
        /// <summary>
        /// Sets values to the Count(*) aggregates because there is nothing to be computed.
        /// Finds the non Count(*) aggregates and if there are some, they are further passed into
        /// grouping (parallel or single thread) based on InParallel flag, altogether with corresponding aggregate results.
        /// Note that the we are passing direct reference to the aggregates results and aggregates. Thus it assumes
        /// that the further methods merge data only into the passed aggregate results.
        /// </summary>
        public override GroupByResults Group(ITableResults resTable)
        {
            var nonAsterixCountAggregates = new List <Aggregate>();
            var nonAsterixAggResults      = new List <AggregateBucketResult>();
            var aggResults = AggregateBucketResult.CreateBucketResults(this.aggregates);

            for (int i = 0; i < this.aggregates.Length; i++)
            {
                if (this.aggregates[i].IsAstCount)
                {
                    // Actualise Count(*).
                    ((Count <int>)aggregates[i]).IncBy(resTable.NumberOfMatchedElements, aggResults[i]);
                }
                else
                {
                    // Non astrix counts are further passed into the computatoin functions.
                    nonAsterixCountAggregates.Add(this.aggregates[i]);
                    nonAsterixAggResults.Add(aggResults[i]);
                }
            }

            // Note that the result will reside in the aggResults variable after the computation is finished.
            if (nonAsterixCountAggregates.Count != 0)
            {
                // If work can be split equaly use parallel sol. (Split equaly means that each thread will receive at least one portion of the result table.)
                if (this.InParallel && (resTable.NumberOfMatchedElements / this.ThreadCount > 0))
                {
                    this.ParallelGroupBy(resTable, nonAsterixCountAggregates.ToArray(), nonAsterixAggResults.ToArray());
                }
                else
                {
                    this.SingleThreadGroupBy(resTable, nonAsterixCountAggregates.ToArray(), nonAsterixAggResults.ToArray());
                }
            }
            return(CreateGroupByResults(aggResults, resTable));
        }
예제 #16
0
 public static T GetFinalValue <T>(AggregateBucketResult bucket)
 {
     return(((IGetFinal <T>)bucket).GetFinal(0));
 }
예제 #17
0
        public void IncByThreadSafe(int value, AggregateBucketResult bucket)
        {
            var tmpBucket = (AggregateBucketResult <int>)bucket;

            Interlocked.Add(ref tmpBucket.aggResult, value);
        }
예제 #18
0
        public void IncBy(int value, AggregateBucketResult bucket)
        {
            var tmpBucket = (AggregateBucketResult <int>)bucket;

            tmpBucket.aggResult += value;
        }