static void MergeTwoHeapElem(HeapElem h1, HeapElem h2) { List <float> keyEl = new List <float>(dictCol.Keys); // We have 2 in the list HeapStatElem hse = new HeapStatElem(); HeapElem he = new HeapElem(); he.spread = h1.spread + h2.spread; he.start = h1.start; he.end = h2.end; hse.range_high_key = h2.hsElem.range_high_key; hse.equal_rows = h2.hsElem.equal_rows; hse.distint_range_rows = h1.hsElem.distint_range_rows + 1 + h2.hsElem.distint_range_rows; hse.range_rows = h1.hsElem.range_rows + h1.hsElem.equal_rows + h2.hsElem.range_rows; hse.average_range_rows = hse.range_rows / hse.distint_range_rows; he.hsElem = hse; List <float> lisError = new List <float>(); // Now that we will go from the start to the end // indices, so that we can find out the error values for (int j = he.start; j <= he.end; j++) { float estimate = hse.average_range_rows; float actual = dictCol[keyEl[j]]; float err = (estimate + 1) / (actual + 1); if (err < 1) { err = 1 / err; } lisError.Add(err); } lisError.Sort(); he.errorList = lisError; // Now we will need to generate the sorted // list and find out the new metrics float prod1 = 0, sum1 = 0; int k = 0; for (int j = he.start; j <= he.end; j++, k++) { float val = (k + 1) * lisError[k]; prod1 += val; sum1 += lisError[k]; } float sumn = k * (k + 1) / 2; float sumns = k * (k + 1) * (2 * k + 1) / 6; float betanom = k * prod1 - sumn * sum1; float betaden = k * (sumns) - sumn * sumn; float betam = betanom / betaden; float alphanom = sum1 * sumns - prod1 * sumn; float alpha = alphanom / betaden; h1.mergeend = he.end; h1.mergestart = he.start; h1.mergehsElemM = hse; h1.mergeerrorList = lisError; h1.mergeintercept = betam; h1.mergeslope = alpha; h1.mergespread = he.spread; }
// This is the one that creates various histogram elements // from the values we already store and keep static void CreateHistogramFromAlgorithm(List <float> colVal, List <StatStep> histLs, int num) { //we have the histogram int countHistInit = dictCol.Count(); List <float> keyEl = new List <float>(dictCol.Keys); List <HeapStatElem> lhse = new List <HeapStatElem>(); List <float> errorList = new List <float>(); // Next will be the priority heap implementation // So if there are modulo values, they will be picked i nthe end // in a separate process for (int i = 0; i < countHistInit / num - 1; i++) { HeapStatElem hs = new HeapStatElem(); hs.range_high_key = keyEl[i * num + num - 1]; // For each histogram element , add their values in the q-error metric, which is a temporary thing for (int j = 0; j < num - 1; j++) { hs.range_rows += dictCol[keyEl[i * num + j]]; } hs.equal_rows = dictCol[keyEl[i * num + (num - 1)]]; hs.distint_range_rows = num - 1; hs.average_range_rows = hs.range_rows / hs.distint_range_rows; lhse.Add(hs); } HeapStatElem hsElem = new HeapStatElem(); hsElem.equal_rows = dictCol[keyEl[countHistInit - 1]]; hsElem.range_high_key = keyEl[countHistInit - 1]; int tot = 0; int d = 0; for (int i = ((countHistInit / num) - 1) * num; i < countHistInit - 1; i++) { if (i < countHistInit - 1) { tot += dictCol[keyEl[i]]; d++; } } hsElem.range_rows = tot; hsElem.average_range_rows = tot / d; hsElem.distint_range_rows = d; // Final bucket if there are some values left, we will work on it lhse.Add(hsElem); // We have the hist Elem. // Now use that to find the -q list and fill the q-error for each bucket. // Now have the q-errors List <float> qeList = new List <float>(); List <HeapElem> heElemList = new List <HeapElem>(); maxalpha = float.MinValue; maxbeta = float.MinValue; foreach (var v in keyEl) { float estimate = CalculateEMQEsitmateFromHist(ref lhse, v); float actual = dictCol[v]; // now we have the actual and estimate float qe = (estimate + 1) / (actual + 1); if (qe < 1) { qe = (float)(1) / qe; } qeList.Add(qe); } for (int i = 0; i < countHistInit / num; i++) { HeapElem he = new HeapElem(); he.start = i * num; he.end = (i + 1) * (num) - 1; he.hsElem = lhse[i]; he.spread = (int)lhse[i].range_rows + (int)lhse[i].equal_rows; he.errorList = new List <float>(); float prod1 = 0, sum2 = 0, nls = 0, ls = 0; for (int j = 0; j < num; j++) { he.errorList.Add(qeList[i * num + j]); } he.errorList.Sort(); for (int j = 0; j < num; j++) { float val = (j + 1) * he.errorList[j]; prod1 += val; sum2 += he.errorList[j]; } float sumn = (num) * (num + 1) / 2; float sumns = (num) * (num + 1) * (2 * num + 1) / 6; float betanom = num * prod1 - (sumn) * sum2; float betaden = num * sumns - (sumn) * (sumn); float beta = betanom / betaden; float alphanom = prod1 * sumns - sum2 * sumn; float alphaden = betaden; float alpha = alphanom / alphaden; he.intercept = alpha; he.slope = beta; if (alpha > maxalpha) { maxalpha = alpha; } if (beta > maxbeta) { maxbeta = beta; } // We will get the merged norm, that we will need he.mergenorm = he.GetMergeNorm(); heElemList.Add(he); // At the end of the three, we will need to find the beta and alpha } HeapElem heElem = new HeapElem(); int c = 0; heElem.start = (countHistInit / num) * num; heElem.end = countHistInit - 1; heElem.hsElem = lhse[countHistInit / num - 1]; float prod1o = 0, sum2o = 0, nlso = 0, lso = 0; heElem.errorList = new List <float>(); // Now for the last bucket for (int i = (countHistInit / num) * num; i < countHistInit; i++) { c++; heElem.errorList.Add(qeList[i]); } heElem.errorList.Sort(); for (int i = (countHistInit / num) * num, k = 0; i < countHistInit; i++, k++) { float val = (k + 1) * heElem.errorList[k]; prod1o += val; sum2o += heElem.errorList[k]; } heElem.spread = c; float sumno = c * (c + 1) / 2; float sumnso = c * (c + 1) * (2 * c + 1) / 6; float betanomo = c * prod1o - sumno * sum2o; float betadeno = c * sumnso - sumno * sumno; float betao = betanomo / betadeno; float alphanomo = sum2o * sumnso - prod1o * sumno; float alphao = alphanomo / betadeno; if (alphao > maxalpha) { maxalpha = alphao; } if (betao > maxbeta) { maxbeta = betao; } heElem.intercept = alphao; heElem.slope = betao; heElem.mergenorm = heElem.GetMergeNorm(); heElemList.Add(heElem); // The list if ready, now, we need to build the Heap where the top element with the // We will iterate over this list and now create the their mergestories. int heapElemCount = heElemList.Count; for (int i = 0; i < heapElemCount - 1; i++) { HeapElem h1 = heElemList[i]; HeapElem h2 = heElemList[i + 1]; MergeTwoHeapElem(h1, h2); // We have for nodes, their merged values. } // Heap Elem List has all the heaps with their mergeability information defined. Heap.PriorityQueue <HeapElem, Tuple <float, float> > heapForWork = new Heap.PriorityQueue <HeapElem, Tuple <float, float> >(); for (int i = 0; i < heElemList.Count - 1; i++) { heapForWork.Enqueue(heElemList[i], new Tuple <float, float>(heElemList[i].mergeslope, heElemList[i].mergeintercept)); } // At this point the heap is ready. Now pop and keep merging till the end. while (heapForWork.Count > numSteps) { HeapElem he = heapForWork.Dequeue().Key; // We got the he elem, now the hard work of the merged one to be inserted. HeapElem heNew = new HeapElem(); HeapStatElem hs = new HeapStatElem(); heNew.hsElem = he.mergehsElemM; heNew.intercept = he.mergeintercept; heNew.slope = he.mergeslope; heNew.intercept = he.mergeintercept; heNew.errorList = he.mergeerrorList; heNew.spread = he.spread; heNew.end = he.mergeend; heNew.start = he.mergestart; int k = 0; for (int i = 0; i < heElemList.Count; i++) { if (heElemList[i].start > he.end) { k = i; heElemList[i] = heNew; break; } } if (k < heElemList.Count - 2) { // Now to merge it with the next element. MergeTwoHeapElem(heNew, heElemList[k + 2]); // We iwll keep removing it after every change. heElemList.RemoveAt(k + 1); } foreach (var v in heapForWork) { if (v.Key.end + 1 == he.start) { MergeTwoHeapElem(v.Key, heNew); break; } } // So both the sides are merged and we can have them in the heap inserted back again. } // At this point, we will have merged a few cases here and here // Now build the list foreach (var v in heElemList) { // Should have the lmited he element list StatStep ss = new StatStep(); ss.average_range_rows = v.hsElem.average_range_rows; ss.distint_range_rows = v.hsElem.distint_range_rows; ss.equal_rows = v.hsElem.equal_rows; ss.range_high_key = v.hsElem.range_high_key; ss.range_rows = v.hsElem.range_rows; histLs.Add(ss); } return; }