public override string ToString() { var s = "["; var i = 0; for (; InterpolationCDFRanks[i] == i + 1 && i < InterpolationCDFRanks.Count; i++) { } s += $"1-{i - 1}"; var ranksToShow = 100 + i; for (; i < ranksToShow && i < InterpolationSize; i++) { s += ","; s += InterpolationCDFRanks[i]; } if (InterpolationSize > ranksToShow) { s += "..."; s += InterpolationCDFRanks.Last(); } s += "]"; return($"Zipf for N = {N}, α = {Alpha}, K = {K}, C = {C}, ϵ = {Epsilon}. Size = {InterpolationSize}. Ranks = {s}"); }
private void InitInterpolationWithBoundedError(double maxPermittedError) { int rank = 1, startRank = 1, midPointRank = 1; var prevPdf = PDF(rank); var prevCdf = prevPdf; var midPointCdf = prevCdf; InterpolationCDFRanks.Add(1); InterpolationCDFValues.Add(prevCdf); // Advance three pointers, one at the start of the segment, one at the midpoint, and one at the end. // The midpoint advances at half the speed of the end of the segment. for (rank = 2; rank <= N - 1; rank++) { var currPdf = PDF(rank); var currCdf = Min(1.0, prevCdf + currPdf); if ((rank - startRank) % 2 == 0) { midPointRank++; midPointCdf += PDF(midPointRank); } // No need to interpolate if the segment has no gap that needs interpolating. if (rank - startRank >= 2) { // Add a provisional interpolation point. InterpolationCDFRanks.Add(rank); InterpolationCDFValues.Add(currCdf); // Calculate the interpolation error for the midpoint. // If it is still small enough, remove the provisional interpolation point. var endPointPosition = InterpolationSize - 1; var cdfInterpolator = CDFInterpolator(endPointPosition); var estimatedMidpointCdf = Min(1.0, cdfInterpolator.Y(midPointRank)); var relativeError = Abs(midPointCdf - estimatedMidpointCdf) / midPointCdf; InterpolationCDFRanks.RemoveAt(endPointPosition); InterpolationCDFValues.RemoveAt(endPointPosition); // It is not guaranteed that the midpoint of the segment is where the worst error would be found. // Thus permit less error than requested in the hope that this will keep the true maximum error // below maxPermittedError as well. if (relativeError >= maxPermittedError / 7) { // We decided we need an interpolation point, thus are starting a new segment. midPointRank = startRank = rank - 1; midPointCdf = prevCdf; InterpolationCDFRanks.Add(rank - 1); InterpolationCDFValues.Add(prevCdf); } } prevPdf = currPdf; prevCdf = currCdf; } // Close out the last segment. InterpolationCDFRanks.Add(N); InterpolationCDFValues.Add(1); }
/// <summary> /// Perform no interpolation; store CDF values for all ranks, which may consume much memory. /// </summary> private void InitInterpolationWithZeroError() { var cdf = 0.0; for (var rank = 1; rank <= N; rank++) { cdf += PDF(rank); InterpolationCDFRanks.Add(rank); InterpolationCDFValues.Add(cdf); } }
/// <summary> /// Interpolate the CDF using the control points. /// </summary> /// <param name="rank">Rank for which the CDF is sought.</param> /// <returns>The cumulative density value for the given rank, which varies from a small value at rank = 1 /// to one for rank = N.</returns> public double ApproximateCDF(int rank) { // NOTE: This is expensive: execution time proportional to rank/2. if (rank >= N) { return(1.0); } if (rank <= 1) { return(PDF(1)); } var position = InterpolationCDFRanks.BinarySearch(rank); if (position > 0) { return(InterpolationCDFValues[position]); // Exact match - no interpolation needed. } position = ~position; Interpolator <double> interpolator = CDFInterpolator(position); return(Min(1, interpolator.Y(rank))); }