Пример #1
0
        static void MergeTwoHeapElem(HeapElem h1, HeapElem h2)
        {
            List <float> keyEl = new List <float>(dictCol.Keys);
            // We have 2 in the list
            HeapStatElem hse = new HeapStatElem();
            HeapElem     he  = new HeapElem();

            he.spread = h1.spread + h2.spread;
            he.start  = h1.start;
            he.end    = h2.end;

            hse.range_high_key = h2.hsElem.range_high_key;
            hse.equal_rows     = h2.hsElem.equal_rows;

            hse.distint_range_rows = h1.hsElem.distint_range_rows + 1 + h2.hsElem.distint_range_rows;
            hse.range_rows         = h1.hsElem.range_rows + h1.hsElem.equal_rows + h2.hsElem.range_rows;
            hse.average_range_rows = hse.range_rows / hse.distint_range_rows;

            he.hsElem = hse;

            List <float> lisError = new List <float>();

            // Now that we will go from the start to the end
            // indices, so that we can find out the error values
            for (int j = he.start; j <= he.end; j++)
            {
                float estimate = hse.average_range_rows;
                float actual   = dictCol[keyEl[j]];

                float err = (estimate + 1) / (actual + 1);
                if (err < 1)
                {
                    err = 1 / err;
                }

                lisError.Add(err);
            }

            lisError.Sort();
            he.errorList = lisError;
            // Now we will need to generate the sorted
            // list and find out the new metrics

            float prod1 = 0, sum1 = 0;
            int   k = 0;

            for (int j = he.start; j <= he.end; j++, k++)
            {
                float val = (k + 1) * lisError[k];
                prod1 += val;
                sum1  += lisError[k];
            }


            float sumn  = k * (k + 1) / 2;
            float sumns = k * (k + 1) * (2 * k + 1) / 6;

            float betanom = k * prod1 - sumn * sum1;
            float betaden = k * (sumns) - sumn * sumn;

            float betam = betanom / betaden;

            float alphanom = sum1 * sumns - prod1 * sumn;
            float alpha    = alphanom / betaden;

            h1.mergeend       = he.end;
            h1.mergestart     = he.start;
            h1.mergehsElemM   = hse;
            h1.mergeerrorList = lisError;
            h1.mergeintercept = betam;
            h1.mergeslope     = alpha;
            h1.mergespread    = he.spread;
        }
Пример #2
0
        // This is the one that creates various histogram elements
        // from the values we already store and keep
        static void CreateHistogramFromAlgorithm(List <float> colVal, List <StatStep> histLs, int num)
        {
            //we  have the histogram
            int          countHistInit = dictCol.Count();
            List <float> keyEl         = new List <float>(dictCol.Keys);


            List <HeapStatElem> lhse      = new List <HeapStatElem>();
            List <float>        errorList = new List <float>();


            // Next will be the priority heap implementation
            // So if there are modulo values, they will be picked i nthe end
            // in a separate process
            for (int i = 0; i < countHistInit / num - 1; i++)
            {
                HeapStatElem hs = new HeapStatElem();

                hs.range_high_key = keyEl[i * num + num - 1];

                // For each histogram element , add their values in the q-error metric, which is a temporary thing

                for (int j = 0; j < num - 1; j++)
                {
                    hs.range_rows += dictCol[keyEl[i * num + j]];
                }
                hs.equal_rows         = dictCol[keyEl[i * num + (num - 1)]];
                hs.distint_range_rows = num - 1;
                hs.average_range_rows = hs.range_rows / hs.distint_range_rows;
                lhse.Add(hs);
            }

            HeapStatElem hsElem = new HeapStatElem();

            hsElem.equal_rows     = dictCol[keyEl[countHistInit - 1]];
            hsElem.range_high_key = keyEl[countHistInit - 1];
            int tot = 0;
            int d   = 0;

            for (int i = ((countHistInit / num) - 1) * num; i < countHistInit - 1; i++)
            {
                if (i < countHistInit - 1)
                {
                    tot += dictCol[keyEl[i]];
                    d++;
                }
            }

            hsElem.range_rows         = tot;
            hsElem.average_range_rows = tot / d;
            hsElem.distint_range_rows = d;

            // Final bucket if there are some values left, we will work on it
            lhse.Add(hsElem);

            // We have the hist Elem.
            // Now use that to find the  -q list and fill the q-error for each bucket.

            // Now have the q-errors

            List <float>    qeList     = new List <float>();
            List <HeapElem> heElemList = new List <HeapElem>();

            maxalpha = float.MinValue;
            maxbeta  = float.MinValue;


            foreach (var v in keyEl)
            {
                float estimate = CalculateEMQEsitmateFromHist(ref lhse, v);
                float actual   = dictCol[v];

                //  now we have the actual and estimate
                float qe = (estimate + 1) / (actual + 1);
                if (qe < 1)
                {
                    qe = (float)(1) / qe;
                }

                qeList.Add(qe);
            }

            for (int i = 0; i < countHistInit / num; i++)
            {
                HeapElem he = new HeapElem();
                he.start     = i * num;
                he.end       = (i + 1) * (num) - 1;
                he.hsElem    = lhse[i];
                he.spread    = (int)lhse[i].range_rows + (int)lhse[i].equal_rows;
                he.errorList = new List <float>();

                float prod1 = 0, sum2 = 0, nls = 0, ls = 0;
                for (int j = 0; j < num; j++)
                {
                    he.errorList.Add(qeList[i * num + j]);
                }
                he.errorList.Sort();
                for (int j = 0; j < num; j++)

                {
                    float val = (j + 1) * he.errorList[j];
                    prod1 += val;

                    sum2 += he.errorList[j];
                }


                float sumn    = (num) * (num + 1) / 2;
                float sumns   = (num) * (num + 1) * (2 * num + 1) / 6;
                float betanom = num * prod1 - (sumn) * sum2;
                float betaden = num * sumns - (sumn) * (sumn);
                float beta    = betanom / betaden;

                float alphanom = prod1 * sumns - sum2 * sumn;
                float alphaden = betaden;
                float alpha    = alphanom / alphaden;

                he.intercept = alpha;
                he.slope     = beta;

                if (alpha > maxalpha)
                {
                    maxalpha = alpha;
                }

                if (beta > maxbeta)
                {
                    maxbeta = beta;
                }

                // We will get the merged norm, that we will need
                he.mergenorm = he.GetMergeNorm();
                heElemList.Add(he);
                // At the end of the three, we will need to find the beta and alpha
            }

            HeapElem heElem = new HeapElem();
            int      c      = 0;

            heElem.start = (countHistInit / num) * num;
            heElem.end   = countHistInit - 1;

            heElem.hsElem = lhse[countHistInit / num - 1];

            float prod1o = 0, sum2o = 0, nlso = 0, lso = 0;

            heElem.errorList = new List <float>();

            // Now for the last bucket
            for (int i = (countHistInit / num) * num; i < countHistInit; i++)
            {
                c++;
                heElem.errorList.Add(qeList[i]);
            }

            heElem.errorList.Sort();


            for (int i = (countHistInit / num) * num, k = 0; i < countHistInit; i++, k++)
            {
                float val = (k + 1) * heElem.errorList[k];
                prod1o += val;

                sum2o += heElem.errorList[k];
            }

            heElem.spread = c;

            float sumno    = c * (c + 1) / 2;
            float sumnso   = c * (c + 1) * (2 * c + 1) / 6;
            float betanomo = c * prod1o - sumno * sum2o;
            float betadeno = c * sumnso - sumno * sumno;

            float betao = betanomo / betadeno;

            float alphanomo = sum2o * sumnso - prod1o * sumno;
            float alphao    = alphanomo / betadeno;

            if (alphao > maxalpha)
            {
                maxalpha = alphao;
            }
            if (betao > maxbeta)
            {
                maxbeta = betao;
            }

            heElem.intercept = alphao;
            heElem.slope     = betao;
            heElem.mergenorm = heElem.GetMergeNorm();
            heElemList.Add(heElem);

            // The list if ready, now, we need to build the Heap where the top element with the
            // We will iterate over this list and now create the their mergestories.

            int heapElemCount = heElemList.Count;

            for (int i = 0; i < heapElemCount - 1; i++)
            {
                HeapElem h1 = heElemList[i];
                HeapElem h2 = heElemList[i + 1];


                MergeTwoHeapElem(h1, h2);
                // We have for nodes, their merged values.
            }
            // Heap Elem List has all the heaps with their mergeability information defined.

            Heap.PriorityQueue <HeapElem, Tuple <float, float> > heapForWork = new Heap.PriorityQueue <HeapElem, Tuple <float, float> >();
            for (int i = 0; i < heElemList.Count - 1; i++)
            {
                heapForWork.Enqueue(heElemList[i], new Tuple <float, float>(heElemList[i].mergeslope, heElemList[i].mergeintercept));
            }

            // At this point the heap is ready. Now pop and keep merging till the end.
            while (heapForWork.Count > numSteps)
            {
                HeapElem he = heapForWork.Dequeue().Key;

                // We got the he elem, now the hard work of the merged one to be inserted.

                HeapElem     heNew = new HeapElem();
                HeapStatElem hs    = new HeapStatElem();
                heNew.hsElem    = he.mergehsElemM;
                heNew.intercept = he.mergeintercept;
                heNew.slope     = he.mergeslope;
                heNew.intercept = he.mergeintercept;
                heNew.errorList = he.mergeerrorList;
                heNew.spread    = he.spread;
                heNew.end       = he.mergeend;
                heNew.start     = he.mergestart;


                int k = 0;
                for (int i = 0; i < heElemList.Count; i++)
                {
                    if (heElemList[i].start > he.end)
                    {
                        k             = i;
                        heElemList[i] = heNew;
                        break;
                    }
                }

                if (k < heElemList.Count - 2)
                {
                    // Now to merge it with the next element.
                    MergeTwoHeapElem(heNew, heElemList[k + 2]);
                    // We iwll keep removing it after every change.
                    heElemList.RemoveAt(k + 1);
                }



                foreach (var v in heapForWork)
                {
                    if (v.Key.end + 1 == he.start)
                    {
                        MergeTwoHeapElem(v.Key, heNew);
                        break;
                    }
                }

                // So both the sides are merged and we can have them in the heap inserted back again.
            }

            // At this point, we will have merged a few cases here and here

            // Now build the list

            foreach (var v in heElemList)
            {
                // Should have the lmited he element list
                StatStep ss = new StatStep();
                ss.average_range_rows = v.hsElem.average_range_rows;
                ss.distint_range_rows = v.hsElem.distint_range_rows;
                ss.equal_rows         = v.hsElem.equal_rows;
                ss.range_high_key     = v.hsElem.range_high_key;
                ss.range_rows         = v.hsElem.range_rows;
                histLs.Add(ss);
            }

            return;
        }