private Integer CountPoints(Integer max, Integer c1, Integer c2, Integer coef, Integer denom) { // Count points under the hyperbola: // (x0 - b1*v + b2*u)*(y0 + a1*v - a2*u) = n // Horizontal: For u = 1 to max calculate v in terms of u. // vertical: For v = 1 to max calculate u in terms of v. // Note that there are two positive solutions and we // take the smaller of the two, the one nearest the axis. // By being frugal we can re-use most of the calculation // from the previous point. // We use the identity (a >= 0, b >= 0, c > 0; a, b, c elements of Z): // floor((b-sqrt(a)/c) = floor((b-ceiling(sqrt(a)))/c) // to enable using integer arithmetic. // Formulas: // v = ((a1*b2+a2*b1)*(u+c1)-sqrt((u+c1)^2-4*a1*b1*n))/(2*a1*b1)-c2 // u = ((a1*b2+a2*b1)*(v+c2)-sqrt((v+c2)^2-4*a2*b2*n))/(2*a2*b2)-c1 var sum = (Integer)0; var a = c1 * c1 - 2 * denom * n; var b = c1 * coef; var da = 2 * c1 - 1; for (var i = (Integer)1; i < max; i++) { da += 2; a += da; b += coef; sum += (b - IntegerMath.CeilingSquareRoot(a)) / denom; } return(sum - (max - 1) * c2); }
public void VFloor2(BigInteger u1, BigInteger a1, BigInteger b1, BigInteger c1, BigInteger c2, BigInteger abba, BigInteger ab2, out BigInteger v1, out BigInteger v2) { var uu = (u1 + c1) << 1; var t1 = ab2 << 1; var t2 = abba * uu - a1 + b1 - t1 * c2; var t3 = uu - a1 - b1; var t4 = IntegerMath.Square(t3) - t1 * n; v1 = (t2 - IntegerMath.CeilingSquareRoot(t4)) / t1; v2 = (t2 + (abba << 1) - IntegerMath.CeilingSquareRoot(t4 + ((t3 + 1) << 2))) / t1; }
private Integer CountPoints(bool horizontal, Integer max, Integer a2, Integer b2, Integer a1, Integer b1, Integer x0, Integer y0) { // Count points under the hyperbola: // (x0 - b1*v + b2*u)*(y0 + a1*v - a2*u) = n // Horizontal: For u = 1 to max calculate v in terms of u. // vertical: For v = 1 to max calculate u in terms of v. // Note that there are two positive solutions and we // take the smaller of the two, the one nearest the axis. // By being frugal we can re-use most of the calculation // from the previous point. // We use the identity (a >= 0, b >= 0, c > 0; a, b, c elements of Z): // floor((b-sqrt(a)/c) = floor((b-ceiling(sqrt(a)))/c) // to enable using integer arithmetic. // Formulas: // a2d = b2*a2, a1d = b1*a1, // m01s = b2*a1+a2*b1, mxy0d = b2*y0-a2*x0, // mxy1d = a1*x0-b1*y0, // mxy0 = b2*y0+a2*x0, mxy1 = b1*y0+a1*x0 // v = floor((-sqrt((u+mxy1)^2-4*a1d*n)+m01s*u+mxy1d)/(2*a1d)) // u = floor((-sqrt((v+mxy0)^2-4*a2d*n)+m01s*v+mxy0d)/(2*a2d)) var sum = (Integer)0; var mx1 = a1 * x0; var my1 = b1 * y0; var mxy1 = mx1 + my1; var m01s = b2 * a1 + a2 * b1; var denom = 2 * a1 * b1; var a = mxy1 * mxy1 - 2 * denom * n; var b = horizontal ? mx1 - my1 : my1 - mx1; var da = 2 * mxy1 - 1; var imax = (long)max; for (var i = (long)1; i <= imax; i++) { da += 2; a += da; b += m01s; sum += (b - IntegerMath.CeilingSquareRoot(a)) / denom; } return(sum); }
public ulong ProcessRegionManual(int thread, ulong w, ulong a1, ulong b1, ulong c1, ulong a2, ulong b2, ulong c2) { if (w <= 1) { return(0); } var s = (ulong)0; var umax = w - 1; var t1 = (a1 * b2 + b1 * a2) << 1; var t2 = (c1 << 1) - a1 - b1; var t3 = (t2 << 2) + 12; var t4 = (a1 * b1) << 2; var t5 = t1 * (1 + c1) - a1 + b1 - t4 * c2; var t6 = IntegerMath.Square(t2 + 2) - t4 * n; Debug.Assert(t6 == UInt128.Square(t2 + 2) - t4 * (UInt128)n); var u = (ulong)1; while (true) { Debug.Assert((t5 - IntegerMath.CeilingSquareRoot(t6)) / t4 == VFloor(u, a1, b1, c1, a2, b2, c2)); s += (t5 - IntegerMath.CeilingSquareRoot(t6)) / t4; if (u >= umax) { break; } t5 += t1; t6 += t3; t3 += 8; ++u; } Debug.Assert(s == ProcessRegionHorizontal(w, 0, a1, b1, c1, a2, b2, c2)); #if DIAG Console.WriteLine("ProcessRegionManual: s = {0}", s); #endif return(s); }
public long Evaluate(long n) { if (n <= 0) { return(0); } this.n = n; u = Math.Max((long)IntegerMath.FloorPower((BigInteger)n, 2, 3) * C1 / C2, IntegerMath.CeilingSquareRoot(n)); imax = n / u; this.mobius = new MobiusRange(u + 1, threads); var batchSize = Math.Min(u, maximumBatchSize); m = new int[batchSize]; mx = new long[imax + 1]; var m0 = 0; for (var x = (long)1; x <= u; x += maximumBatchSize) { var xstart = x; var xend = Math.Min(xstart + maximumBatchSize - 1, u); m0 = mobius.GetSums(xstart, xend + 1, m, m0); ProcessBatch(xstart, xend); } ComputeMx(); return(mx[1]); }
public Int128 Evaluate(UInt128 n) { if (n == 0) { return(0); } this.n = n; u = (long)IntegerMath.Max(IntegerMath.FloorPower(n, 2, 3) * C1 / C2, IntegerMath.CeilingSquareRoot(n)); imax = (int)(n / (ulong)u); mobius = new MobiusRangeAdditive(u + 1, threads); var batchSize = Math.Min(u, maximumBatchSize); mu = new sbyte[maximumSmallBatchSize]; m = new int[batchSize]; mx = new Int128[imax + 1]; r = new int[imax + 1]; var lmax = 0; for (var i = 1; i <= imax; i += 2) { if (wheelInclude[(i % wheelSize) >> 1]) { r[lmax++] = i; } } Array.Resize(ref r, lmax); niLarge = new UInt128[imax + 1]; niSmall = new long[imax + 1]; var buckets = Math.Max(1, threads); var costs = new double[buckets]; var bucketListsLarge = Enumerable.Range(0, buckets).Select(i => new List <int>()).ToArray(); var bucketListsSmall = Enumerable.Range(0, buckets).Select(i => new List <int>()).ToArray(); for (var l = 0; l < lmax; l++) { var i = r[l]; var ni = n / (uint)i; var large = ni > largeLimit; var cost = Math.Sqrt((double)n / i) * (large ? C7 : 1); var addto = 0; var mincost = costs[0]; for (var bucket = 0; bucket < buckets; bucket++) { if (costs[bucket] < mincost) { mincost = costs[bucket]; addto = bucket; } } niLarge[i] = ni; if (large) { bucketListsLarge[addto].Add(i); } else { niSmall[i] = (long)ni; bucketListsSmall[addto].Add(i); } costs[addto] += cost; } bucketsLarge = bucketListsLarge.Select(bucket => bucket.ToArray()).ToArray(); bucketsSmall = bucketListsSmall.Select(bucket => bucket.ToArray()).ToArray(); var m0 = 0; var xmed = Math.Min((long)IntegerMath.FloorRoot(n, 2) * C5 / C6, u); for (var x = (long)1; x <= xmed; x += maximumSmallBatchSize) { var xstart = x; var xend = Math.Min(xstart + maximumSmallBatchSize - 1, xmed); m0 = mobius.GetValuesAndSums(xstart, xend + 1, mu, m, m0); ProcessBatch(xstart, xend); } for (var x = xmed + 1; x <= u; x += maximumBatchSize) { var xstart = x; var xend = Math.Min(xstart + maximumBatchSize - 1, u); m0 = mobius.GetSums(xstart, xend + 1, m, m0); ProcessBatch(xstart, xend); } ComputeMx(); return(mx[1]); }
private Integer ProcessRegion(Integer w, Integer h, Integer a1, Integer b1, Integer a2, Integer b2, Integer x0, Integer y0) { // The hyperbola is defined by H(x, y): x*y = n. // Line L1 has -slope m1 = a1/b1. // Line L2 has -slope m2 = a2/b2. // Both lines pass through P0 = (x0, y0). // The region is a parallelogram with the left side bounded L1, // the bottom bounded by L2, with width w (along L2) and height h // (along L1). The lower-left corner is P0 (the intersection of // L2 and L1) and represents (u, v) = (0, 0). // Both w and h are counted in terms of lattice points, not length. // For the purposes of counting, the lattice points on lines L1 and L2 // have already been counted. // Note that a1*b2 - b1*a2 = 1 because // m2 and m1 are Farey neighbors, e.g. 1 & 2 or 3/2 & 2 or 8/5 & 5/3 // The equations that define (u, v) in terms of (x, y) are: // u = b1*(y-y0)+a1*(x-x0) // v = b2*(y-y0)+a2*(x-x0) // And therefore the equations that define (x, y) in terms of (u, v) are: // x = x0-b1*v+b2*u // y = y0+a1*v-a2*u // Since all parameters are integers and a1*b2 - b1*a2 = 1, // every lattice point in (x, y) is a lattice point in (u, v) // and vice-versa. // Geometrically, the UV coordinate system is the composition // of a translation and two shear mappings. The UV-based hyperbola // is essentially a "mini" hyperbola that resembles the full // hyperbola in that: // - The equation is still a hyperbola (although it is now a quadratic in two variables) // - The endpoints of the curve are roughly tangent to the axes // We process the region by "lopping off" the maximal isosceles // right triangle in the lower-left corner and then process // the two remaining "slivers" in the upper-left and lower-right, // which creates two smaller "micro" hyperbolas, which we then // process recursively. // When we are in the region of the original hyperbola where // the curvature is roughly constant, the deformed hyperbola // will in fact resemble a circular arc. // A line with -slope = 1 in UV-space has -slope = (a1+a2)/(b1+b2) // in XY-space. We call this m3 and the line defining the third side // of the triangle as L3 containing point P3 tangent to the hyperbola. // This is all slightly complicated by the fact that diagonal that // defines the region that we "lop off" may be broken and shifted // up or down near the tangent point. As a result we actually have // P3 and P4 and L3 and L4. // We can measure work in units of X because it is the short // axis and it ranges from cbrt(n) to sqrt(n). If we did one // unit of work for each X coordinate we would have an O(sqrt(n)) // algorithm. But because there is only one lattice point on a // line with slope m per the denominator of m in X and because // the denominator of m roughly doubles for each subdivision, // there will be less than one unit of work for each unit of X. // As a result, each iteration reduces the work by about // a factor of two resulting in 1 + 2 + 4 + ... + sqrt(r) steps // or O(sqrt(r)). Since the sum of the sizes of the top-level // regions is O(sqrt(n)), this gives a O(n^(1/4)) algorithm for // nearly constant curvature. // However, since the hyperbola is increasingly non-circular for small // values of x, the subdivision is not nearly as beneficial (and // also not symmetric) so it is only worthwhile to use region // subdivision on regions where cubrt(n) < n < sqrt(n). // The sqrt(n) bound comes from symmetry and the Dirichlet // hyperbola method (which we also use). The cubrt(n) // bound comes from the fact that the second deriviative H''(x) // exceeds one at (2n)^(1/3) ~= 1.26*cbrt(n). Since we process // regions with adjacent integral slopes at the top level, by the // time we get to cbrt(n), the size of the region is at most // one, so we might as well process those values using the // naive approach of summing y = n/x. // Finally, at some point the region becomes small enough and we // can just count points under the hyperbola using whichever axis // is shorter. This is quite a bit harder than computing y = n/x // because the transformations we are using result in a general // quadratic in two variables. Nevertheless, with some // preliminary calculations, each value can be calculated with // a few additions, a square root and a division. // Sum the lattice points. var sum = (Integer)0; // Process regions on the stack. while (true) { // Process regions iteratively. while (true) { // Nothing left process. if (w <= 0 || h <= 0) { break; } // Check whether the point at (w, 1) is inside the hyperbola. if ((b2 * w - b1 + x0) * (a1 - a2 * w + y0) <= n) { // Remove the first row. sum += w; x0 -= b1; y0 += a1; --h; if (h == 0) { break; } } // Check whether the point at (1, h) is inside the hyperbola. if ((b2 - b1 * h + x0) * (a1 * h - a2 + y0) <= n) { // Remove the first column. sum += h; x0 += b2; y0 -= a2; --w; if (w == 0) { break; } } // Invariants for the remainder of the processing of the region: // H(u,v) at v=h, 0 <= u < 1 // H(u,v) at u=w, 0 <= v < 1 // -du/dv at v=h >= 0 // -dv/du at u=w >= 0 // In other words: the hyperbola is less than one unit away // from the axis at P1 and P2 and the distance from the axis // to the hyperbola increases monotonically as you approach // (u, v) = (0, 0). Debug.Assert((b2 - b1 * h + x0) * (a1 * h - a2 + y0) > n); Debug.Assert((b2 * w - b1 + x0) * (a1 - a2 * w + y0) > n); Debug.Assert(b2 * a1 - a2 * b1 == 1); // Find the pair of points (u3, v3) and (u4, v4) below H(u,v) where: // -dv/du at u=u3 >= 1 // -dv/du at u=u4 <= 1 // u4 = u3 + 1 // Specifically, solve: // (a1*(v+c2)-a2*(u+c1))*(b2*(u+c1)-b1*(v+c2)) = n at dv/du = -1 // Then u3 = floor(u) and u4 = u3 + 1. // Note that there are two solutions, one negative and one positive. // We take the positive solution. // We use the identity (a >= 0, b >= 0; a, b, elements of Z): // floor(b*sqrt(a/c)) = floor(sqrt(floor(b^2*a/c))) // to enable using integer arithmetic. // Formula: // u = (a1*b2+a2*b1+2*a1*b1)*sqrt(n/(a3*b3))-c1 var c1 = a1 * x0 + b1 * y0; var c2 = a2 * x0 + b2 * y0; var a3 = a1 + a2; var b3 = b1 + b2; var coef = a1 * b2 + b1 * a2; var denom = 2 * a1 * b1; var sqrtcoef = coef + denom; var u3 = IntegerMath.FloorSquareRoot(sqrtcoef * sqrtcoef * n / (a3 * b3)) - c1; var u4 = u3 + 1; // Finally compute v3 and v4 from u3 and u4 by solving // the hyperbola for v. // Note that there are two solutions, both positive. // We take the smaller solution (nearest the u axis). // Formulas: // v = ((a1*b2+a2*b1)*(u+c1)-sqrt((u+c1)^2-4*a1*b1*n))/(2*a1*b1)-c2 // u = ((a1*b2+a2*b1)*(v+c2)-sqrt((v+c2)^2-4*a2*b2*n))/(2*a2*b2)-c1 var uc1 = u3 + c1; var a = uc1 * uc1 - 2 * denom * n; var b = uc1 * coef; var v3 = u3 != 0 ? (b - IntegerMath.CeilingSquareRoot(a)) / denom - c2 : h; var v4 = (b + coef - IntegerMath.CeilingSquareRoot(a + 2 * uc1 + 1)) / denom - c2; Debug.Assert(u3 < w); // Compute the V intercept of L3 and L4. Since the lines are diagonal the intercept // is the same on both U and V axes and v13 = u03 and v14 = u04. var r3 = u3 + v3; var r4 = u4 + v4; Debug.Assert(IntegerMath.Abs(r3 - r4) <= 1); // Count points horizontally or vertically if one axis collapses (or is below our cutoff) // or if the triangle exceeds the bounds of the rectangle. if (u3 <= smallRegionCutoff || v4 <= smallRegionCutoff || r3 > h || r4 > w) { if (h > w) { sum += CountPoints(w, c1, c2, coef, denom); } else { sum += CountPoints(h, c2, c1, coef, 2 * a2 * b2); } break; } // Add the triangle defined L1, L2, and smaller of L3 and L4. var size = IntegerMath.Min(r3, r4); sum += size * (size - 1) / 2; // Adjust for the difference (if any) between L3 and L4. if (r3 != r4) { sum += r3 > r4 ? u3 : v4; } // Push left region onto the stack. stack.Push(new Region(u3, h - r3, a1, b1, a3, b3, x0 - b1 * r3, y0 + a1 * r3)); // Process right region iteratively (no change to a2 and b2). w -= r4; h = v4; a1 = a3; b1 = b3; x0 = x0 + b2 * r4; y0 = y0 - a2 * r4; } // Any more regions to process? if (stack.Count == 0) { break; } // Pop a region off the stack for processing. var region = stack.Pop(); w = region.w; h = region.h; a1 = region.a1; b1 = region.b1; a2 = region.a2; b2 = region.b2; x0 = region.x0; y0 = region.y0; } // Return the sum of lattice points in this region. return(sum); }
public MertensRangeBasic(MobiusRange mobius, long nmax) { this.mobius = mobius; this.nmax = nmax; threads = mobius.Threads; u = Math.Max((long)IntegerMath.FloorPower((BigInteger)nmax, 2, 3) * C1 / C2, IntegerMath.CeilingSquareRoot(nmax)); ulo = Math.Min(u, maximumBatchSize); mlo = new int[ulo]; mobius.GetSums(1, ulo + 1, mlo, 0); }
public long Evaluate(long n) { if (n <= 0) { return(0); } this.n = n; u = Math.Max((long)IntegerMath.FloorPower((BigInteger)n, 2, 3) * C1 / C2, IntegerMath.CeilingSquareRoot(n)); u = DownToOdd(u); imax = n / u; this.mobius = new MobiusRange(imax + 1, threads); this.mobiusOdd = new MobiusOddRange(u + 2, threads); var batchSize = Math.Min(u + 1, maximumBatchSize); mu = new sbyte[imax]; m = new int[batchSize >> 1]; sum = 0; mobius.GetValues(1, imax + 1, mu); var m0 = 0; for (var x = (long)1; x <= u; x += maximumBatchSize) { var xstart = x; var xend = Math.Min(xstart + maximumBatchSize - 2, u); m0 = mobiusOdd.GetSums(xstart, xend + 2, m, m0); ProcessBatch(xstart, xend); } return(mi1 - sum); }
public long Evaluate(long n) { if (n <= 0) { return(0); } this.n = n; u = Math.Max((long)IntegerMath.FloorPower((BigInteger)n, 2, 3) * C1 / C2, IntegerMath.CeilingSquareRoot(n)); if (u <= wheelSize) { return(new MertensFunctionDR(threads).Evaluate(n)); } imax = (int)(n / u); mobius = new MobiusRange(u + 1, threads); var batchSize = Math.Min(u, maximumBatchSize); m = new int[batchSize]; mx = new long[imax + 1]; r = new int[imax + 1]; lmax = 0; for (var i = 1; i <= imax; i += 2) { if (wheelInclude[(i % wheelSize) >> 1]) { r[lmax++] = i; } } Array.Resize(ref r, lmax); if (threads > 1) { var costs = new double[threads]; var bucketLists = new List <int> [threads]; for (var thread = 0; thread < threads; thread++) { bucketLists[thread] = new List <int>(); } for (var l = 0; l < lmax; l++) { var i = r[l]; var cost = Math.Sqrt(n / i); var addto = 0; var mincost = costs[0]; for (var thread = 0; thread < threads; thread++) { if (costs[thread] < mincost) { mincost = costs[thread]; addto = thread; } } bucketLists[addto].Add(i); costs[addto] += cost; } buckets = new int[threads][]; for (var thread = 0; thread < threads; thread++) { buckets[thread] = bucketLists[thread].ToArray(); } } var m0 = 0; for (var x = (long)1; x <= u; x += maximumBatchSize) { var xstart = x; var xend = Math.Min(xstart + maximumBatchSize - 1, u); m0 = mobius.GetSums(xstart, xend + 1, m, m0); ProcessBatch(xstart, xend); } ComputeMx(); return(mx[1]); }
public BigInteger VFloor(BigInteger u, BigInteger a1, BigInteger b1, BigInteger c1, BigInteger a2, BigInteger b2, BigInteger c2) { return((2 * (a1 * b2 + b1 * a2) * (u + c1) - a1 + b1 - IntegerMath.CeilingSquareRoot(IntegerMath.Square(2 * (u + c1) - a1 - b1) - 4 * a1 * b1 * n)) / (4 * a1 * b1) - c2); }
public BigInteger UFloor(BigInteger v, BigInteger a1, BigInteger b1, BigInteger c1, BigInteger a2, BigInteger b2, BigInteger c2) { return((2 * (a1 * b2 + b1 * a2) * (v + c2) + a2 - b2 - IntegerMath.CeilingSquareRoot(IntegerMath.Square(2 * (v + c2) - a2 - b2) - 4 * a2 * b2 * n)) / (4 * a2 * b2) - c1); }
public MertensRangeDR(MobiusRange mobius, long nmax) { this.mobius = mobius; this.nmax = nmax; threads = mobius.Threads; u = Math.Max((long)IntegerMath.FloorPower((BigInteger)nmax, 2, 3) * C1 / C2, IntegerMath.CeilingSquareRoot(nmax)); ulo = Math.Max(Math.Min(u, maximumBatchSize), minimumLowSize); mlo = new int[ulo]; values = new sbyte[ulo]; mobius.GetValuesAndSums(1, ulo + 1, values, mlo, 0); }