protected static long[] known_N_compute_B_and_K_slow(long N, double epsilon, double delta, int quantiles, double[] returnSamplingRate) { int maxBuffers = 50; int maxHeight = 50; double N_double = N; // One possibility is to use one buffer of size N // long ret_b = 1; long ret_k = N; double sampling_rate = 1.0; long memory = N; // Otherwise, there are at least two buffers (b >= 2) // and the height of the tree is at least three (h >= 3) // // We restrict the search for b and h to MAX_BINOM, a large enough value for // practical values of epsilon >= 0.001 and delta >= 0.00001 // double logarithm = System.Math.Log(2.0 * quantiles / delta); double c = 2.0 * epsilon * N_double; for (long b = 2; b < maxBuffers; b++) { for (long h = 3; h < maxHeight; h++) { double binomial = Arithmetic.Binomial(b + h - 2, h - 1); long tmp = (long)System.Math.Ceiling(N_double / binomial); if ((b * tmp < memory) && ((h - 2) * binomial - Arithmetic.Binomial(b + h - 3, h - 3) + Arithmetic.Binomial(b + h - 3, h - 2) <= c)) { ret_k = tmp; ret_b = b; memory = ret_k * b; sampling_rate = 1.0; } if (delta > 0.0) { double t = (h - 2) * Arithmetic.Binomial(b + h - 2, h - 1) - Arithmetic.Binomial(b + h - 3, h - 3) + Arithmetic.Binomial(b + h - 3, h - 2); double u = logarithm / epsilon; double v = Arithmetic.Binomial(b + h - 2, h - 1); double w = logarithm / (2.0 * epsilon * epsilon); // From our SIGMOD 98 paper, we have two equantions to satisfy: // t <= u * alpha/(1-alpha)^2 // kv >= w/(1-alpha)^2 // // Denoting 1/(1-alpha) by x, // we see that the first inequality is equivalent to // t/u <= x^2 - x // which is satisfied by x >= 0.5 + 0.5 * sqrt (1 + 4t/u) // Plugging in this value into second equation yields // k >= wx^2/v double x = 0.5 + 0.5 * System.Math.Sqrt(1.0 + 4.0 * t / u); long k = (long)System.Math.Ceiling(w * x * x / v); if (b * k < memory) { ret_k = k; ret_b = b; memory = b * k; sampling_rate = N_double * 2.0 * epsilon * epsilon / logarithm; } } } } long[] result = new long[2]; result[0] = ret_b; result[1] = ret_k; returnSamplingRate[0] = sampling_rate; return(result); }
protected static long[] unknown_N_compute_B_and_K_raw(double epsilon, double delta, int quantiles) { long[] result; // delta can be set to zero, i.ed, all quantiles should be approximate with probability 1 if (epsilon <= 0.0) { result = new long[4]; result[0] = 1; result[1] = long.MaxValue; result[2] = long.MaxValue; result[3] = 0; return(result); } if (epsilon >= 1.0 || delta >= 1.0) { // can make any error we wish result = new long[4]; result[0] = 2; result[1] = 1; result[2] = 3; result[3] = 0; return(result); } if (delta <= 0.0) { // no way around exact quantile search result = new long[4]; result[0] = 1; result[1] = long.MaxValue; result[2] = long.MaxValue; result[3] = 0; return(result); } int max_b = 50; int max_h = 50; int max_H = 50; int max_Iterations = 2; long best_b = long.MaxValue; long best_k = long.MaxValue; long best_h = long.MaxValue; long best_memory = long.MaxValue; double pow = System.Math.Pow(2.0, max_H); double logDelta = System.Math.Log(2.0 / (delta / quantiles)) / (2.0 * epsilon * epsilon); //double logDelta = System.Math.Log(2.0/(quantiles*delta)) / (2.0*epsilon*epsilon); while (best_b == long.MaxValue && max_Iterations-- > 0) { //until we find a solution // identify that combination of b and h that minimizes b*k. // exhaustive search. for (int b = 2; b <= max_b; b++) { for (int h = 2; h <= max_h; h++) { double Ld = Arithmetic.Binomial(b + h - 2, h - 1); double Ls = Arithmetic.Binomial(b + h - 3, h - 1); // now we have k>=c*(1-alpha)^-2. // let's compute c. //double c = System.Math.Log(2.0/(delta/quantiles)) / (2.0*epsilon*epsilon*System.Math.Min(Ld, 8.0*Ls/3.0)); double c = logDelta / System.Math.Min(Ld, 8.0 * Ls / 3.0); // now we have k>=d/alpha. // let's compute d. double beta = Ld / Ls; double cc = (beta - 2.0) * (max_H - 2.0) / (beta + pow - 2.0); double d = (h + 3 + cc) / (2.0 * epsilon); /* * double d = (Ld*(h+max_H-1.0) + Ls*((h+1)*pow - 2.0*(h+max_H))) / (Ld + Ls*(pow-2.0)); * d = (d + 2.0) / (2.0*epsilon); */ // now we have c*(1-alpha)^-2 == d/alpha. // we solve this equation for alpha yielding two solutions // alpha_1,2 = (c + 2*d +- Sqrt(c*c + 4*c*d))/(2*d) double f = c * c + 4.0 * c * d; if (f < 0.0) { continue; // non real solution to equation } double root = System.Math.Sqrt(f); double alpha_one = (c + 2.0 * d + root) / (2.0 * d); double alpha_two = (c + 2.0 * d - root) / (2.0 * d); // any alpha must satisfy 0<alpha<1 to yield valid solutions Boolean alpha_one_OK = false; Boolean alpha_two_OK = false; if (0.0 < alpha_one && alpha_one < 1.0) { alpha_one_OK = true; } if (0.0 < alpha_two && alpha_two < 1.0) { alpha_two_OK = true; } if (alpha_one_OK || alpha_two_OK) { double alpha = alpha_one; if (alpha_one_OK && alpha_two_OK) { // take the alpha that minimizes d/alpha alpha = System.Math.Max(alpha_one, alpha_two); } else if (alpha_two_OK) { alpha = alpha_two; } // now we have k=Ceiling(Max(d/alpha, (h+1)/(2*epsilon))) long k = (long)System.Math.Ceiling(System.Math.Max(d / alpha, (h + 1) / (2.0 * epsilon))); if (k > 0) { // valid solution? long memory = b * k; if (memory < best_memory) { // found a solution requiring less memory best_k = k; best_b = b; best_h = h; best_memory = memory; } } } } //end for h } //end for b if (best_b == long.MaxValue) { Console.WriteLine("Warning: Computing b and k looks like a lot of work!"); // no solution found so fard very unlikelyd Anyway, try again. max_b *= 2; max_h *= 2; max_H *= 2; } } //end while result = new long[4]; result[3] = 0; if (best_b == long.MaxValue) { // no solution found. // no way around exact quantile search. result[0] = 1; result[1] = long.MaxValue; result[2] = long.MaxValue; } else { result[0] = best_b; result[1] = best_k; result[2] = best_h; result[3] = 1; } return(result); }
protected static long[] known_N_compute_B_and_K_quick(long N, double epsilon) { int maxBuffers = 50; int maxHeight = 50; double N_double = (double)N; double c = N_double * epsilon * 2.0; int[] heightMaximums = new int[maxBuffers - 1]; // for each b, determine maximum height, i.ed the height for which x<=0 and x is a maximum // with x = binomial(b+h-2, h-1) - binomial(b+h-3, h-3) + binomial(b+h-3, h-2) - N * epsilon * 2.0 for (int b1 = 2; b1 <= maxBuffers; b1++) { int h = 3; while (h <= maxHeight && // skip heights until x<=0 (h - 2) * (Arithmetic.Binomial(b1 + h - 2, h - 1)) - (Arithmetic.Binomial(b1 + h - 3, h - 3)) + (Arithmetic.Binomial(b1 + h - 3, h - 2)) - c > 0.0 ) { h++; } //from now on x is monotonically growing... while (h <= maxHeight && // skip heights until x>0 (h - 2) * (Arithmetic.Binomial(b1 + h - 2, h - 1)) - (Arithmetic.Binomial(b1 + h - 3, h - 3)) + (Arithmetic.Binomial(b1 + h - 3, h - 2)) - c <= 0.0 ) { h++; } h--; //go back to last height // was x>0 or did we loop without finding anything? int hMax; if (h >= maxHeight && (h - 2) * (Arithmetic.Binomial(b1 + h - 2, h - 1)) - (Arithmetic.Binomial(b1 + h - 3, h - 3)) + (Arithmetic.Binomial(b1 + h - 3, h - 2)) - c > 0.0) { hMax = int.MinValue; } else { hMax = h; } heightMaximums[b1 - 2] = hMax; //safe some space } //end for // for each b, determine the smallest k satisfying the constraints, i.e. // for each b, determine kMin, with kMin = N/binomial(b+hMax-2,hMax-1) long[] kMinimums = new long[maxBuffers - 1]; for (int b2 = 2; b2 <= maxBuffers; b2++) { int h = heightMaximums[b2 - 2]; long kMin = long.MaxValue; if (h > int.MinValue) { double value = (Arithmetic.Binomial(b2 + h - 2, h - 1)); long tmpK = (long)(System.Math.Ceiling(N_double / value)); if (tmpK <= long.MaxValue) { kMin = tmpK; } } kMinimums[b2 - 2] = kMin; } // from all b's, determine b that minimizes b*kMin long multMin = long.MaxValue; int minB = -1; for (int b3 = 2; b3 <= maxBuffers; b3++) { if (kMinimums[b3 - 2] < long.MaxValue) { long mult = ((long)b3) * ((long)kMinimums[b3 - 2]); if (mult < multMin) { multMin = mult; minB = b3; } } } long b, k; if (minB != -1) { // epsilon large enough? b = minB; k = kMinimums[minB - 2]; } else { // epsilon is very small or zero. b = 1; // the only possible solution without violating the k = N; // approximation guarantees is exact quantile search. } long[] result = new long[2]; result[0] = b; result[1] = k; return(result); }
/// <summary> /// Returns the probability distribution function. /// </summary> /// <param name="k"></param> /// <returns></returns> public double ProbabilityDistributionFunction(int k) { return(Arithmetic.Binomial(my_s, k) * Arithmetic.Binomial(my_N - my_s, my_n - k) / Arithmetic.Binomial(my_N, my_n)); }