private void UpdateMxSmall(long x1, long x2, int[] r) { for (var l = 0; l < r.Length; l++) { var i = r[l]; var x = niSmall[i]; var sqrt = IntegerMath.FloorSquareRoot(x); var xover = Math.Min(sqrt * C3 / C4, x); xover = x / (x / xover); var s = (long)0; var jmin = UpToOdd(Math.Max(imax / i + 1, x / (x2 + 1) + 1)); var jmax = DownToOdd(Math.Min(xover, x / x1)); //s += JSumSmall1(x, jmin, ref jmax, x1); s += JSumSmall2(x, jmin, jmax, x1); var kmin = Math.Max(1, x1); var kmax = Math.Min(x / xover - 1, x2); s += KSumSmall1Mu(x, kmin, ref kmax, x1); //s += KSumSmall1M(x, kmin, ref kmax, x1); s += KSumSmall2(x, kmin, kmax, x1); mx[i] -= s; } }
private void UpdateValue(long x1, long x2, long imin, long increment) { var s1 = (long)0; for (var i = imin; i <= imax; i += increment) { var mui = mu[i]; if (mui == 0) { continue; } var x = n / i; var sqrt = IntegerMath.FloorSquareRoot(x); var xover = Math.Min(sqrt * 7 / 5, x); // 7/5 ~= sqrt(2) xover = x / (x / xover); var s2 = (long)0; var jmin = UpToOdd(Math.Max(imax / i + 1, x / (x2 + 1) + 1)); var jmax = DownToOdd(Math.Min(xover, x / x1)); s2 += JSum1(x, jmin, ref jmax, x1); s2 += JSum2(x, jmin, jmax, x1); var kmin = Math.Max(1, x1); var kmax = Math.Min(x / xover - 1, x2); s2 += KSum1(x, kmin, ref kmax, x1); s2 += KSum2(x, kmin, kmax, x1); s1 += mui * s2; } Interlocked.Add(ref sum, s1); }
public long Evaluate(long n) { if (n <= 0) { return(0); } if (n > nmax) { throw new ArgumentException("n"); } sqrt = IntegerMath.FloorSquareRoot(n); var imax = Math.Max(1, n / u); var mx = new long[imax + 1]; ProcessBatch(mx, n, imax, mlo, 1, ulo); if (ulo < u) { var mhi = new int[maximumBatchSize]; var m0 = mlo[ulo - 1]; for (var x = ulo + 1; x <= u; x += maximumBatchSize) { var xstart = x; var xend = Math.Min(xstart + maximumBatchSize - 1, u); m0 = mobius.GetSums(xstart, xend + 1, mhi, m0); ProcessBatch(mx, n, imax, mhi, xstart, xend); } } return(ComputeMx(mx, imax)); }
private void UpdateMx(long[] m, long x1, long x2) { // Add the contributions to each mx from all the small m values. for (var i = 1; i <= imax; i++) { var x = xi[i]; var sqrt = IntegerMath.FloorSquareRoot(x); var jmin = UpToOdd(Math.Max(3, x / (x2 + 1) + 1)); var jmax = DownToOdd(Math.Min(sqrt, x / x1)); var kmin = Math.Max(1, x1); var kmax = Math.Min(x2, x / sqrt - 1); var s = (long)0; s += JSum(x, jmin, ref jmax, m, x1); for (var j = jmin; j <= jmax; j += 2) { s += m[x / j - x1]; } s += KSum(x, kmin, ref kmax, m, x1); var current = T1Odd(x / kmin); for (var k = kmin; k <= kmax; k++) { var next = T1Odd(x / (k + 1)); s += (current - next) * m[k - x1]; current = next; } Interlocked.Add(ref mx[i], -s); } }
private int F2Small(UInt128 n, long x1, long x2) { var xmin = UpToOdd(Math.Max(1, x1)); var xmax = DownToOdd(Math.Min((long)IntegerMath.FloorSquareRoot(n), x2)); var s = 0; var x = xmax; var xx = (ulong)x * (ulong)x; var dx = 4 * (ulong)x - 4; while (x >= xmin) { Debug.Assert(xx == (ulong)x * (ulong)x); var mu = values[(x - x1) >> 1]; if (mu > 0) { s += T2Isolated(n / xx); } else if (mu < 0) { s -= T2Isolated(n / xx); } xx -= dx; dx -= 8; x -= 2; } return(s & 3); }
public Integer Evaluate(Integer n) { var xmax = IntegerMath.FloorSquareRoot(n); var s = Evaluate(n, 1, (long)xmax); return(2 * s - xmax * xmax); }
private int F2SmallParallel(long x1, long x2) { var xmin = UpToOdd(Math.Max(1, x1)); var xmax = DownToOdd(Math.Min((long)IntegerMath.FloorSquareRoot(n), x2)); if (threads <= 1) { return(F2SmallParallel(0, xmin, xmax, x1, 2)); } var xsmall = DownToOdd(Math.Max(xmin, Math.Min(smallCutoff, xmax))); var s = 0; for (var x = xmin; x < xsmall; x += 2) { s += IntegerMath.Mobius(x) * T2Parallel(n / ((UInt128)x * (UInt128)x)); } var tasks = new Task <int> [threads]; var increment = 2 * threads; for (var thread = 0; thread < threads; thread++) { var worker = thread; var offset = 2 * thread; tasks[thread] = Task.Factory.StartNew(() => F2SmallParallel(worker, xsmall + offset, xmax, x1, increment)); } Task.WaitAll(tasks); s += tasks.Select(task => task.Result).Sum(); return(s & 3); }
public BigInteger Evaluate(BigInteger n) { this.n = n; sum = 0; modsum = 0; var xmax = (long)IntegerMath.FloorSquareRoot(n); if (threads <= 1) { Evaluate(1, xmax); } else { EvaluateParallel(1, xmax); } if (odd) { var xmax2 = (xmax + 1) / 2; if (mod2) { return((2 * (int)(modsum & 1) - (int)(xmax2 & 1)) & 3); } return(2 * (BigInteger)sum - (BigInteger)xmax2 * xmax2); } return(2 * (BigInteger)sum - (BigInteger)xmax * xmax); }
public int TauSumInnerParallel(UInt128 y, out ulong sqrt) { sqrt = (ulong)IntegerMath.FloorSquareRoot((BigInteger)y); var sum = 0; // Create consumers. var queue = new BlockingCollection <WorkItem>(); var consumers = Math.Max(1, threads); var tasks = new Task[consumers]; for (var consumer = 0; consumer < consumers; consumer++) { var thread = consumer; tasks[consumer] = Task.Factory.StartNew(() => ConsumeTauInnerSumItems(thread, queue, y, ref sum)); } // Produce work items. var slowLimit = (ulong)Math.Pow(sqrt, 0.8); TauSumInnerParallel(queue, 1, slowLimit); TauSumInnerParallel(queue, slowLimit, sqrt + 1); // Wait for completion. queue.CompleteAdding(); Task.WaitAll(tasks); return(sum & 1); }
private void UpdateMx(long x1, long x2, long offset, long increment) { // Add the contributions to each mx from all the small m values. for (var i = offset; i <= imax; i += increment) { var x = xi[i]; var sqrt = IntegerMath.FloorSquareRoot(x); var s = (long)0; var jmin = UpToOdd(Math.Max(3, x / (x2 + 2) + 1)); var jmax = DownToOdd(Math.Min(sqrt, x / x1)); s += JSum(x, jmin, ref jmax, x1); for (var j = jmin; j <= jmax; j += 2) { s += m[(x / j - x1) >> 1]; } var kmin = Math.Max(1, x1); var kmax = Math.Min(x / sqrt - 1, x2 + 1); s += KSum(x, kmin, ref kmax, x1); var current = T1Odd(x / kmin); for (var k = kmin; k <= kmax; k++) { var next = T1Odd(x / (k + 1)); s += (current - next) * m[(k - x1) >> 1]; current = next; } mx[i] -= s; } }
private void UpdateMx(long x1, long x2, int[] r) { #if TIMER var timer = new ThreadStopwatch(); timer.Restart(); #endif for (var l = 0; l < r.Length; l++) { var i = r[l]; var x = n / i; var sqrt = IntegerMath.FloorSquareRoot(x); var xover = Math.Min(sqrt * C3 / C4, x); xover = x / (x / xover); var s = (long)0; var jmin = UpToOdd(Math.Max(imax / i + 1, x / (x2 + 1) + 1)); var jmax = DownToOdd(Math.Min(xover, x / x1)); //s += JSum1(x, jmin, ref jmax, x1); s += JSum2(x, jmin, jmax, x1); var kmin = Math.Max(1, x1); var kmax = Math.Min(x / xover - 1, x2); s += KSum1(x, kmin, ref kmax, x1); s += KSum2(x, kmin, kmax, x1); mx[i] -= s; } #if TIMER Console.WriteLine("x1 = {0:F3}, length = {1:F3}, elapsed = {2:F3} msec", (double)x1, (double)(x2 - x1 + 1), (double)timer.ElapsedTicks / ThreadStopwatch.Frequency * 1000); #endif }
private void UpdateMx(long[] mx, long n, int[] m, long x1, long x2, long imin, long imax, long increment) { for (var i = imin; i <= imax; i += increment) { if (values[i - 1] == 0) { continue; } var x = n / i; var sqrt = IntegerMath.FloorSquareRoot(x); var s = (long)0; var jmin = UpToOdd(Math.Max(imax / i + 1, x / (x2 + 1) + 1)); var jmax = DownToOdd(Math.Min(sqrt, x / x1)); s += JSum1(x, jmin, ref jmax, m, x1); s += JSum2(x, jmin, jmax, m, x1); var kmin = Math.Max(1, x1); var kmax = Math.Min(x / sqrt - 1, x2); s += KSum1(x, kmin, ref kmax, m, x1); s += KSum2(x, kmin, kmax, m, x1); mx[i] -= s; } }
public ulong Evaluate(ulong n) { var xmax = IntegerMath.FloorSquareRoot(n); var s = Evaluate(n, 1, xmax); var xmax2 = T1(xmax); return(2 * s - (ulong)xmax2 * (ulong)xmax2); }
private int SumTwoToTheOmega(BigInteger x) { // sum(2^w(d), d=[1,x]) mod 4 = sum(mu(d)TauSum(x/d^2), d=[1,floor(sqrt(x))]) mod 4 var limit = IntegerMath.FloorSquareRoot(x); if (limit <= ulong.MaxValue) { return(SumTwoToTheOmega((UInt128)x, (ulong)limit)); } throw new NotImplementedException(); }
private BigInteger T3Worker(BigInteger n, BigInteger root3, int worker, int workers) { var s = (BigInteger)0; for (var z = (BigInteger)1 + 2 * worker; z <= root3; z += 2 * workers) { var nz = n / z; var sqrtnz = IntegerMath.FloorSquareRoot(nz); var t = hyperbolicSum[worker].Evaluate(nz, (long)z + 2, (long)sqrtnz); s += 2 * t - IntegerMath.Power(T1(sqrtnz), 2) + T1(nz / z); } return(s); }
public MertensRangeInverted(MobiusRange mobius, long nmax) { this.mobius = mobius; this.nmax = nmax; threads = mobius.Threads; sum2 = 0; var sqrt = IntegerMath.FloorSquareRoot(nmax); u = Math.Max((long)IntegerMath.FloorPower((BigInteger)nmax, 2, 3) * C1 / C2, sqrt + 1); ulo = Math.Max(Math.Min(u, maximumBatchSize), minimumLowSize); mlo = new int[ulo]; values = new sbyte[ulo]; mobius.GetValuesAndSums(1, ulo + 1, values, mlo, 0); }
public BigInteger T3Slow(BigInteger n) { //Console.WriteLine("T3({0})", n); var sum = (BigInteger)0; var root3 = IntegerMath.FloorRoot(n, 3); if (threads == 0) { for (var z = (BigInteger)1; z <= root3; z++) { var nz = n / z; var sqrtnz = IntegerMath.FloorSquareRoot(nz); var t = hyperbolicSum[0].Evaluate(nz, (long)z + 1, (long)sqrtnz); sum += 2 * t - sqrtnz * sqrtnz + nz / z; } } else { var tasks = new Task[threads]; for (var i = 0; i < threads; i++) { var thread = i; tasks[i] = new Task(() => { var s = (BigInteger)0; for (var z = (BigInteger)1 + thread; z <= root3; z += threads) { var nz = n / z; var sqrtnz = IntegerMath.FloorSquareRoot(nz); var t = hyperbolicSum[thread].Evaluate(nz, (long)z + 1, (long)sqrtnz); s += 2 * t - sqrtnz * sqrtnz + nz / z; } lock (this) { sum += s; } }); tasks[i].Start(); } Task.WaitAll(tasks); } sum = 3 * sum + root3 * root3 * root3; return(sum); }
private void UpdateMxSmall(long[] mx, long n, int[] m, long x1, long x2, long imax, long min, long increment) { var kmin = Math.Max(1, x1) + min; var kmax = Math.Min(sqrt, x2); var s1 = (long)0; for (var k = kmin; k <= kmax; k += increment) { var ilast = IntegerMath.Min(imax, n / (k * k)); var nk1 = n / k; var nk2 = n / (k + 1); while (ilast > 0 && nk2 / ilast < IntegerMath.FloorSquareRoot(n / ilast)) { --ilast; } ilast = DownToOdd(ilast); var s2 = (long)0; s2 += ISum1(nk1, nk2, 1, ref ilast); s2 += ISum2(nk1, nk2, 1, ilast); s1 += m[k - x1] * s2; } Interlocked.Add(ref sum2, s1); }
private BigInteger YTan(BigInteger a) { return(T1(IntegerMath.FloorSquareRoot(n / a))); }
public int Evaluate(UInt128 n) { this.n = n; var sum = 0; sqrtn = (long)IntegerMath.FloorSquareRoot(n); kmax = (int)IntegerMath.FloorLog(n, 2); imax = (long)IntegerMath.FloorPower(n, 1, 5) * C1 / C2; xmax = DownToOdd(imax != 0 ? Xi(imax) : sqrtn); xmed = DownToOdd(Math.Min((long)(IntegerMath.FloorPower(n, 2, 7) * C3 / C4), xmax)); var dmax = (long)IntegerMath.Min(n / IntegerMath.Square((UInt128)xmed) + 1, n); mobius = new MobiusOddRangeAdditive((xmax + 2) | 1, threads); divisors = new DivisorOddRangeAdditive((dmax + 2) | 1, threads); xi = new long[imax + 1]; mx = new long[imax + 1]; // Initialize xi. for (var i = 1; i <= imax; i++) { xi[i] = Xi(i); } values = new sbyte[mobiusBatchSize >> 1]; m = new int[mobiusBatchSize >> 1]; m0 = 0; dsums = new ulong[divisorBatchSize >> 1]; d1 = d2 = 1; // Process small x values. for (var x = (long)1; x <= xmed; x += mobiusBatchSize) { var xfirst = x; var xlast = Math.Min(xmed, xfirst + mobiusBatchSize - 2); m0 = mobius.GetValuesAndSums(xfirst, xlast + 2, values, m, m0); sum += Pi2Small(xfirst, xlast); UpdateMx(xfirst, xlast); } // Process medium x values. #if true for (var x = xmed + 2; x <= xmax; x += mobiusBatchSize) { var xfirst = x; var xlast = Math.Min(xmax, xfirst + mobiusBatchSize - 2); m0 = mobius.GetValuesAndSums(xfirst, xlast + 2, values, m, m0); sum += Pi2Medium(xfirst, xlast); UpdateMx(xfirst, xlast); } #else for (var x = xmax; x > xmed; x -= mobiusBatchSize) { var xlast = x; var xfirst = Math.Max(xmed + 2, xlast - mobiusBatchSize + 2); m0 = mobius.GetValuesAndSums(xfirst, xlast + 2, values, m, m0); sum += Pi2Medium(xfirst, xlast); UpdateMx(xfirst, xlast); } #endif // Process large x values. sum += Pi2Large(); // Adjust for final parity of F2. sum -= IntegerMath.Mertens(kmax); // Compute final result. sum &= 3; Debug.Assert((sum & 1) == 0); sum >>= 1; return((sum + (n >= 2 ? 1 : 0)) % 2); }
private long Xi(long i) { return((long)IntegerMath.FloorSquareRoot(n / (ulong)i)); }
private int F2Medium(UInt128 n, long x1, long x2) { var xmin = UpToOdd(Math.Max(1, x1)); var xmax = DownToOdd(Math.Min((long)IntegerMath.FloorSquareRoot(n), x2)); var s = 0; var x = xmax; var beta = (long)(n / ((ulong)x + 2)); var eps = (long)(n % ((ulong)x + 2)); var delta = (long)(n / (ulong)x - (ulong)beta); var gamma = (long)(2 * (UInt128)beta - (UInt128)x * (UInt128)delta); var alpha = beta / (x + 2); var alphax = (alpha + 1) * (x + 2); var lastalpha = (long)-1; var count = 0; while (x >= xmin) { eps += gamma; if (eps >= x) { ++delta; gamma -= x; eps -= x; if (eps >= x) { ++delta; gamma -= x; eps -= x; if (eps >= x) { break; } } } else if (eps < 0) { --delta; gamma += x; eps += x; } beta += delta; gamma += delta << 2; alphax -= 2 * alpha + 2; if (alphax <= beta) { ++alpha; alphax += x; if (alphax <= beta) { ++alpha; alphax += x; if (alphax <= beta) { break; } } } Debug.Assert(eps == (BigInteger)n % x); Debug.Assert(beta == (BigInteger)n / x); Debug.Assert(delta == beta - (BigInteger)n / (x + 2)); Debug.Assert(gamma == 2 * beta - (BigInteger)(x - 2) * delta); Debug.Assert(alpha == n / ((BigInteger)x * x)); var mu = values[(x - x1) >> 1]; if (mu != 0) { if (alpha != lastalpha) { count &= 3; if (count != 0) { s += count * T2Sequential(lastalpha); count = 0; } lastalpha = alpha; } count += mu; } x -= 2; } count &= 3; if (count != 0) { s += count * T2Sequential(lastalpha); } var xx = (ulong)x * (ulong)x; var dx = 4 * (ulong)x - 4; while (x >= xmin) { Debug.Assert(xx == (ulong)x * (ulong)x); var mu = values[(x - x1) >> 1]; if (mu > 0) { s += T2Sequential((long)(n / xx)); } else if (mu < 0) { s -= T2Sequential((long)(n / xx)); } xx -= dx; dx -= 8; x -= 2; } return(s & 3); }
public int TauSumInnerLarge(UInt128 y, out ulong sqrt) { sqrt = (ulong)IntegerMath.FloorSquareRoot((BigInteger)y); return(TauSumInnerWorkerLarge(y, 1, sqrt + 1)); }
private Integer ProcessRegion(Integer w, Integer h, Integer a1, Integer b1, Integer a2, Integer b2, Integer x0, Integer y0) { // The hyperbola is defined by H(x, y): x*y = n. // Line L0 has slope m0 = -a2/b2. // Line L1 has slope m1 = -a1/b1. // Both lines pass through P01 = (x0, y0). // The region is a parallelogram with the left side bounded L1, // the bottom bounded by L0, with width w (along L0) and height h // (along L1). The lower-left corner is P01 (the intersection of // L0 and L1) and represents (u, v) = (0, 0). // Both w and h are counted in terms of lattice points, not length. // For the purposes of counting, the lattice points on lines L0 and L1 // have already been counted. // Note that b2*a1 - a2*b1 = 1 because // m0 and m1 are Farey neighbors, e.g. 1 & 2 or 3/2 & 2 or 8/5 & 5/3 // The equations that define (u, v) in terms of (x, y) are: // u = b1*(y-y0)+a1*(x-x0) // v = b2*(y-y0)+a2*(x-x0) // And therefore the equations that define (x, y) in terms of (u, v) are: // x = x0-b1*v+b2*u // y = y0+a1*v-a2*u // Since all parameters are integers and b2*a1 - a2*b1 = 1, // every lattice point in (x, y) is a lattice point in (u, v) // and vice-versa. // Geometrically, the UV coordinate system is the composition // of a translation and two shear mappings. The UV-based hyperbola // is essentially a "mini" hyperbola that resembles the full // hyperbola in that: // - The equation is still a hyperbola (although it is now a quadratic in two variables) // - The endpoints of the curve are roughly tangent to the axes // We process the region by "lopping off" the maximal isosceles // right triangle in the lower-left corner and then processing // the two remaining "slivers" in the upper-left and lower-right, // which creates two smaller "micro" hyperbolas, which we then // process recursively. // When we are in the region of the original hyperbola where // the curvature is roughly constant, the deformed hyperbola // will in fact resemble a circular arc. // A line with -slope = 1 in UV-space has -slope = (a2+a1)/(b2+b1) // in XY-space. We call this m2 and the line defining the third side // of the triangle as L2 contain point P2 tangent to the hyperbola. // This is all slightly complicated by the fact that diagonal that // defines the region that we "lop off" may be broken and shifted // up or down near the tangent point. As a result we actually have // P2a and P2b and L2a and L2b. // We can measure work in units of X because it is the short // axis and it ranges from cbrt(n) to sqrt(n). If we did one // unit of work for each X coordinate we would have an O(sqrt(n)) // algorithm. But because there is only one lattice point on a // line with slope m per the denominator of m in X and because // the denominator of m roughly doubles for each subdivision, // there will be less than one unit of work for each unit of X. // As a result, each iteration reduces the work by about // a factor of two resulting in 1 + 2 + 4 + ... + sqrt(r) steps // or O(sqrt(r)). Since the sum of the sizes of the top-level // regions is O(sqrt(n)), this gives a O(n^(1/4)) algorithm for // nearly constant curvature. // However, since the hyperbola is increasingly non-circular for small // values of x, the subdivision is not nearly as beneficial (and // also not symmetric) so it is only worthwhile to use region // subdivision on regions where cubrt(n) < n < sqrt(n). // The sqrt(n) bound comes from symmetry and the Dirichlet // hyperbola method, which we also use. The cubrt(n) // bound comes from the fact that the second deriviative H''(x) // exceeds one at (2n)^(1/3) ~= 1.26*cbrt(n). Since we process // regions with adjacent integral slopes at the top level, by the // time we get to cbrt(n), the size of the region is at most // one, so we might as well process those values using the // naive approach of summing y = n/x. // Finally, at some point the region becomes small enough and we // can just count points under the hyperbola using whichever axis // is shorter. This is quite a bit harder than computing y = n/x // because the transformations we are using result in a general // quadratic in two variables. Nevertheless, with some // preliminary calculations, each value can be calculated with // a few additions, a square root and a division. // Sum the lattice points. var sum = (Integer)0; // Process regions on the stack. while (true) { // Process regions iteratively. while (true) { // Nothing left process. if (w <= 0 || h <= 0) { break; } // Check whether the point at (w, 1) is inside the hyperbola. if ((b2 * w - b1 + x0) * (a1 - a2 * w + y0) <= n) { // Remove the first row. sum += w; x0 -= b1; y0 += a1; --h; if (h == 0) { break; } } // Check whether the point at (1, h) is inside the hyperbola. if ((b2 - b1 * h + x0) * (a1 * h - a2 + y0) <= n) { // Remove the first column. sum += h; x0 += b2; y0 -= a2; --w; if (w == 0) { break; } } // Invariants for the remainder of the processing of the region: // H(u,v) at v=h, 0 <= u < 1 // H(u,v) at u=w, 0 <= v < 1 // -du/dv at v=h >= 0 // -dv/du at u=w >= 0 // In other words: the hyperbola is less than one unit away // from the axis at P0 and P1 and the distance from the axis // to the hyperbola increases monotonically as you approach // (u, v) = (0, 0). Debug.Assert((b2 - b1 * h + x0) * (a1 * h - a2 + y0) > n); Debug.Assert((b2 * w - b1 + x0) * (a1 - a2 * w + y0) > n); Debug.Assert(b2 * a1 - a2 * b1 == 1); // Find the pair of points (u2a, v2a) and (u2b, v2b) below H(u,v) where: // -dv/du at u=u2a >= 1 // -dv/du at u=u2b <= 1 // u2b = u2a + 1 // Specifically, solve: // (x0 - b1*v + b2*u)*(y0 + a1*v - a2*u) = n at dv/du = -1 // and solve for the line tan = u + v tangent passing through that point. // Then u2a = floor(u) and u2b = u2a + 1. // Finally compute v2a and v2b from u2a and u2b using the tangent line // which may result in a value too small by at most one. // Note that there are two solutions, one negative and one positive. // We take the positive solution. // We use the identities (a >= 0, b >= 0, c > 0; a, b, c elements of Z): // floor(b*sqrt(a)/c) = floor(floor(sqrt(b^2*a))/c) // floor(b*sqrt(a*c)/c) = floor(sqrt(b^2*a/c)) // to enable using integer arithmetic. // Formulas: // a3b3 = b3*a3, mxy1 = b1*y0+a1*x0, mxy2 = b3*y0+a3*x0 // u = floor((2*b1*a3+1)*sqrt(a3b3*n)/a3b3-mxy1) // v = floor(-u+2*sqrt(a3b3*n)-mxy2) var a3 = a1 + a2; var b3 = b1 + b2; var a3b3 = a3 * b3; var mxy1 = a1 * x0 + b1 * y0; var mxy2 = a3 * x0 + b3 * y0; var sqrtcoef = 2 * b1 * a3 + 1; var tan = IntegerMath.FloorSquareRoot(2 * 2 * a3b3 * n) - mxy2; var u2a = IntegerMath.FloorSquareRoot(sqrtcoef * sqrtcoef * n / a3b3) - mxy1; var v2a = u2a != 0 ? tan - u2a : h; var u2b = u2a < w ? u2a + 1 : w; var v2b = tan - u2b; // Check for under-estimate of v2a and/or v2b. if (u2a != 0) { var v2aplus = v2a + 1; if ((b2 * u2a - b1 * v2aplus + x0) * (a1 * v2aplus - a2 * u2a + y0) <= n) { ++v2a; } } var v2bplus = v2b + 1; if ((b2 * u2b - b1 * v2bplus + x0) * (a1 * v2bplus - a2 * u2b + y0) <= n) { ++v2b; } // Compute the V intercept of L2a and L2b. Since the lines are diagonal the intercept // is the same on both U and V axes and v12a = u02a and v12b = u02b. var v12a = u2a + v2a; var v12b = u2b + v2b; Debug.Assert(IntegerMath.Abs(v12a - v12b) >= 0 && IntegerMath.Abs(v12a - v12b) <= 1); // Count points horizontally or vertically if one axis collapses (or is below our cutoff) // or if the triangle exceeds the bounds of the rectangle. if (u2a <= smallRegionCutoff || v2b <= smallRegionCutoff || v12a > w || v12b > h) { if (h > w) { sum += CountPoints(true, w, a2, b2, a1, b1, x0, y0); } else { sum += CountPoints(false, h, a1, b1, a2, b2, x0, y0); } break; } // Add the triangle defined L0, L1, and smaller of L2a and L2b. var v12 = IntegerMath.Min(v12a, v12b); sum += v12 * (v12 - 1) / 2; // Adjust for the difference (if any) between L2a and L2b. if (v12a != v12b) { sum += v12a > v12b ? u2a : v2b; } // Push left region onto the stack. stack.Push(new Region(u2a, h - v12a, a1, b1, a3, b3, x0 - b1 * v12a, y0 + a1 * v12a)); // Process right region iteratively (no change to a2 and b2). w -= v12b; h = v2b; a1 = a3; b1 = b3; x0 = x0 + b2 * v12b; y0 = y0 - a2 * v12b; } // Any more regions to process? if (stack.Count == 0) { break; } // Pop a region off the stack for processing. var region = stack.Pop(); w = region.w; h = region.h; a1 = region.a1; b1 = region.b1; a2 = region.a2; b2 = region.b2; x0 = region.x0; y0 = region.y0; } // Return the sum of lattice points in this region. return(sum); }
public Integer Evaluate(Integer n, BigInteger xfirst, BigInteger xlast) { this.n = n; // Count lattice points under the hyperbola x*y = n. var sum = (Integer)0; // Compute the range of values over which we will apply the // geometric algorithm. xmax = (Integer)xlast; xmin = IntegerMath.Max(xfirst, IntegerMath.Min(IntegerMath.FloorRoot(n, 3) * minimumMultiplier, xmax)); // Calculate the line tangent to the hyperbola at the x = sqrt(n). var m0 = (Integer)1; var x0 = xmax; var y0 = n / x0; var r0 = y0 + m0 * x0; Debug.Assert(r0 - m0 * x0 == y0); // Add the bottom rectangle. var width = x0 - xfirst; sum += (width + 1) * y0; // Add the isosceles right triangle corresponding to the initial // line L0 with -slope = 1. sum += width * (width + 1) / 2; // Process regions between tangent lines with integral slopes 1 & 2, // 2 & 3, etc. until we reach xmin. This provides a first // approximation to the hyperbola and accounts for the majority // of the lattice points between xmin and max. The remainder of // the points are computed by processing the regions bounded // by the two tangent lines and the hyperbola itself. while (true) { // Find the largest point (x1a, y1a) where -H'(X) >= the new slope. var m1 = m0 + 1; var x1a = IntegerMath.FloorSquareRoot(n / m1); var y1a = n / x1a; var r1a = y1a + m1 * x1a; var x1b = x1a + 1; var y1b = n / x1b; var r1b = y1b + m1 * x1b; Debug.Assert(r1a - m1 * x1a == y1a); Debug.Assert(r1b - m1 * x1b == y1b); // Handle left-overs. if (x1a < xmin) { // Remove all the points we added between xfirst and x0. var rest = x0 - xfirst; sum -= (r0 - m0 * x0) * rest + m0 * rest * (rest + 1) / 2; xmin = x0; break; } // Invariants: // The value before x1a along L1a is on or below the hyperbola. // The value after x1b along L2b is on or below the hyperbola. // The new slope is one greater than the old slope. Debug.Assert((x1a - 1) * (r1a - m1 * (x1a - 1)) <= n); Debug.Assert((x1b + 1) * (r1b - m1 * (x1b + 1)) <= n); Debug.Assert(m1 - m0 == 1); // Add the triangular wedge above the previous slope and below the new one // and bounded on the left by xfirst. var x0a = r1a - r0; width = x0a - xfirst; sum += width * (width + 1) / 2; // Account for a drop or rise from L1a to L1b. if (r1a != r1b && x1a < x0a) { // Remove the old triangle and add the new triangle. // The formula is (ow+dr)*(ow+dr+1)/2 - ow*(ow+1)/2. var ow = x1a - x0a; var dr = r1a - r1b; sum += dr * (2 * ow + dr + 1) / 2; } // Determine intersection of L0 and L1b. var x0b = r1b - r0; var y0b = r0 - m0 * x0b; Debug.Assert(r0 - m0 * x0b == r1b - m1 * x0b); // Calculate width and height of parallelogram counting only lattice points. var w = (y0 - y0b) + m1 * (x0 - x0b); var h = (y1b - y0b) + m0 * (x1b - x0b); // Process the hyperbolic region bounded by L1b and L0. sum += ProcessRegion(w, h, m1, 1, m0, 1, x0b, y0b); // Advance to the next region. m0 = m1; x0 = x1a; y0 = y1a; r0 = r1a; } // Process values from xfirst up to xmin. sum += manualAlgorithm.Evaluate(n, xfirst, xmin - 1); return(sum); }
private Integer ProcessRegion(Integer w, Integer h, Integer a1, Integer b1, Integer a2, Integer b2, Integer x0, Integer y0) { // The hyperbola is defined by H(x, y): x*y = n. // Line L1 has -slope m1 = a1/b1. // Line L2 has -slope m2 = a2/b2. // Both lines pass through P0 = (x0, y0). // The region is a parallelogram with the left side bounded L1, // the bottom bounded by L2, with width w (along L2) and height h // (along L1). The lower-left corner is P0 (the intersection of // L2 and L1) and represents (u, v) = (0, 0). // Both w and h are counted in terms of lattice points, not length. // For the purposes of counting, the lattice points on lines L1 and L2 // have already been counted. // Note that a1*b2 - b1*a2 = 1 because // m2 and m1 are Farey neighbors, e.g. 1 & 2 or 3/2 & 2 or 8/5 & 5/3 // The equations that define (u, v) in terms of (x, y) are: // u = b1*(y-y0)+a1*(x-x0) // v = b2*(y-y0)+a2*(x-x0) // And therefore the equations that define (x, y) in terms of (u, v) are: // x = x0-b1*v+b2*u // y = y0+a1*v-a2*u // Since all parameters are integers and a1*b2 - b1*a2 = 1, // every lattice point in (x, y) is a lattice point in (u, v) // and vice-versa. // Geometrically, the UV coordinate system is the composition // of a translation and two shear mappings. The UV-based hyperbola // is essentially a "mini" hyperbola that resembles the full // hyperbola in that: // - The equation is still a hyperbola (although it is now a quadratic in two variables) // - The endpoints of the curve are roughly tangent to the axes // We process the region by "lopping off" the maximal isosceles // right triangle in the lower-left corner and then process // the two remaining "slivers" in the upper-left and lower-right, // which creates two smaller "micro" hyperbolas, which we then // process recursively. // When we are in the region of the original hyperbola where // the curvature is roughly constant, the deformed hyperbola // will in fact resemble a circular arc. // A line with -slope = 1 in UV-space has -slope = (a1+a2)/(b1+b2) // in XY-space. We call this m3 and the line defining the third side // of the triangle as L3 containing point P3 tangent to the hyperbola. // This is all slightly complicated by the fact that diagonal that // defines the region that we "lop off" may be broken and shifted // up or down near the tangent point. As a result we actually have // P3 and P4 and L3 and L4. // We can measure work in units of X because it is the short // axis and it ranges from cbrt(n) to sqrt(n). If we did one // unit of work for each X coordinate we would have an O(sqrt(n)) // algorithm. But because there is only one lattice point on a // line with slope m per the denominator of m in X and because // the denominator of m roughly doubles for each subdivision, // there will be less than one unit of work for each unit of X. // As a result, each iteration reduces the work by about // a factor of two resulting in 1 + 2 + 4 + ... + sqrt(r) steps // or O(sqrt(r)). Since the sum of the sizes of the top-level // regions is O(sqrt(n)), this gives a O(n^(1/4)) algorithm for // nearly constant curvature. // However, since the hyperbola is increasingly non-circular for small // values of x, the subdivision is not nearly as beneficial (and // also not symmetric) so it is only worthwhile to use region // subdivision on regions where cubrt(n) < n < sqrt(n). // The sqrt(n) bound comes from symmetry and the Dirichlet // hyperbola method (which we also use). The cubrt(n) // bound comes from the fact that the second deriviative H''(x) // exceeds one at (2n)^(1/3) ~= 1.26*cbrt(n). Since we process // regions with adjacent integral slopes at the top level, by the // time we get to cbrt(n), the size of the region is at most // one, so we might as well process those values using the // naive approach of summing y = n/x. // Finally, at some point the region becomes small enough and we // can just count points under the hyperbola using whichever axis // is shorter. This is quite a bit harder than computing y = n/x // because the transformations we are using result in a general // quadratic in two variables. Nevertheless, with some // preliminary calculations, each value can be calculated with // a few additions, a square root and a division. // Sum the lattice points. var sum = (Integer)0; // Process regions on the stack. while (true) { // Process regions iteratively. while (true) { // Nothing left process. if (w <= 0 || h <= 0) { break; } // Check whether the point at (w, 1) is inside the hyperbola. if ((b2 * w - b1 + x0) * (a1 - a2 * w + y0) <= n) { // Remove the first row. sum += w; x0 -= b1; y0 += a1; --h; if (h == 0) { break; } } // Check whether the point at (1, h) is inside the hyperbola. if ((b2 - b1 * h + x0) * (a1 * h - a2 + y0) <= n) { // Remove the first column. sum += h; x0 += b2; y0 -= a2; --w; if (w == 0) { break; } } // Invariants for the remainder of the processing of the region: // H(u,v) at v=h, 0 <= u < 1 // H(u,v) at u=w, 0 <= v < 1 // -du/dv at v=h >= 0 // -dv/du at u=w >= 0 // In other words: the hyperbola is less than one unit away // from the axis at P1 and P2 and the distance from the axis // to the hyperbola increases monotonically as you approach // (u, v) = (0, 0). Debug.Assert((b2 - b1 * h + x0) * (a1 * h - a2 + y0) > n); Debug.Assert((b2 * w - b1 + x0) * (a1 - a2 * w + y0) > n); Debug.Assert(b2 * a1 - a2 * b1 == 1); // Find the pair of points (u3, v3) and (u4, v4) below H(u,v) where: // -dv/du at u=u3 >= 1 // -dv/du at u=u4 <= 1 // u4 = u3 + 1 // Specifically, solve: // (a1*(v+c2)-a2*(u+c1))*(b2*(u+c1)-b1*(v+c2)) = n at dv/du = -1 // Then u3 = floor(u) and u4 = u3 + 1. // Note that there are two solutions, one negative and one positive. // We take the positive solution. // We use the identity (a >= 0, b >= 0; a, b, elements of Z): // floor(b*sqrt(a/c)) = floor(sqrt(floor(b^2*a/c))) // to enable using integer arithmetic. // Formula: // u = (a1*b2+a2*b1+2*a1*b1)*sqrt(n/(a3*b3))-c1 var c1 = a1 * x0 + b1 * y0; var c2 = a2 * x0 + b2 * y0; var a3 = a1 + a2; var b3 = b1 + b2; var coef = a1 * b2 + b1 * a2; var denom = 2 * a1 * b1; var sqrtcoef = coef + denom; var u3 = IntegerMath.FloorSquareRoot(sqrtcoef * sqrtcoef * n / (a3 * b3)) - c1; var u4 = u3 + 1; // Finally compute v3 and v4 from u3 and u4 by solving // the hyperbola for v. // Note that there are two solutions, both positive. // We take the smaller solution (nearest the u axis). // Formulas: // v = ((a1*b2+a2*b1)*(u+c1)-sqrt((u+c1)^2-4*a1*b1*n))/(2*a1*b1)-c2 // u = ((a1*b2+a2*b1)*(v+c2)-sqrt((v+c2)^2-4*a2*b2*n))/(2*a2*b2)-c1 var uc1 = u3 + c1; var a = uc1 * uc1 - 2 * denom * n; var b = uc1 * coef; var v3 = u3 != 0 ? (b - IntegerMath.CeilingSquareRoot(a)) / denom - c2 : h; var v4 = (b + coef - IntegerMath.CeilingSquareRoot(a + 2 * uc1 + 1)) / denom - c2; Debug.Assert(u3 < w); // Compute the V intercept of L3 and L4. Since the lines are diagonal the intercept // is the same on both U and V axes and v13 = u03 and v14 = u04. var r3 = u3 + v3; var r4 = u4 + v4; Debug.Assert(IntegerMath.Abs(r3 - r4) <= 1); // Count points horizontally or vertically if one axis collapses (or is below our cutoff) // or if the triangle exceeds the bounds of the rectangle. if (u3 <= smallRegionCutoff || v4 <= smallRegionCutoff || r3 > h || r4 > w) { if (h > w) { sum += CountPoints(w, c1, c2, coef, denom); } else { sum += CountPoints(h, c2, c1, coef, 2 * a2 * b2); } break; } // Add the triangle defined L1, L2, and smaller of L3 and L4. var size = IntegerMath.Min(r3, r4); sum += size * (size - 1) / 2; // Adjust for the difference (if any) between L3 and L4. if (r3 != r4) { sum += r3 > r4 ? u3 : v4; } // Push left region onto the stack. stack.Push(new Region(u3, h - r3, a1, b1, a3, b3, x0 - b1 * r3, y0 + a1 * r3)); // Process right region iteratively (no change to a2 and b2). w -= r4; h = v4; a1 = a3; b1 = b3; x0 = x0 + b2 * r4; y0 = y0 - a2 * r4; } // Any more regions to process? if (stack.Count == 0) { break; } // Pop a region off the stack for processing. var region = stack.Pop(); w = region.w; h = region.h; a1 = region.a1; b1 = region.b1; a2 = region.a2; b2 = region.b2; x0 = region.x0; y0 = region.y0; } // Return the sum of lattice points in this region. return(sum); }
private ulong YTan(ulong a) { return(T1(IntegerMath.FloorSquareRoot((n / a)))); }
public Integer Evaluate(Integer n) { this.n = n; // Count lattice points under the hyperbola x*y = n. var sum = (Integer)0; // Compute the range of values over which we will apply the // geometric algorithm. xmax = IntegerMath.FloorRoot(n, 2); xmin = IntegerMath.Min(IntegerMath.FloorRoot(n, 3) * minimumMultiplier, xmax); // Calculate the line tangent to the hyperbola at the x = sqrt(n). var m2 = (Integer)1; var x2 = xmax; var y2 = n / x2; var r2 = y2 + m2 * x2; var width = x2 - xmin; Debug.Assert(r2 - m2 * x2 == y2); // Add the bottom rectangle. sum += (width + 1) * y2; // Add the isosceles right triangle corresponding to the initial // line L2 with -slope = 1. sum += width * (width + 1) / 2; // Process regions between tangent lines with integral slopes 1 & 2, // 2 & 3, etc. until we reach xmin. This provides a first // approximation to the hyperbola and accounts for the majority // of the lattice points between xmin and max. The remainder of // the points are computed by processing the regions bounded // by the two tangent lines and the hyperbola itself. while (true) { // Find the pair of points (x3, y3) and (x1, y1) where: // -H'(x3) >= the new slope // -H'(x1) <= the new slope // x1 = x3 + 1 var m1 = m2 + 1; var x3 = IntegerMath.FloorSquareRoot(n / m1); var y3 = n / x3; var r3 = y3 + m1 * x3; var x1 = x3 + 1; var y1 = n / x1; var r1 = y1 + m1 * x1; Debug.Assert(r3 - m1 * x3 == y3); Debug.Assert(r1 - m1 * x1 == y1); // Handle left-overs. if (x3 < xmin) { // Process the last few values above xmin as the number of // points above the last L2. for (var x = xmin; x < x2; x++) { sum += n / x - (r2 - m2 * x); } break; } // Invariants: // The value before x3 along L3 is on or below the hyperbola. // The value after x1 along L1 is on or below the hyperbola. // The new slope is one greater than the old slope. Debug.Assert((x3 - 1) * (r3 - m1 * (x3 - 1)) <= n); Debug.Assert((x1 + 1) * (r1 - m1 * (x1 + 1)) <= n); Debug.Assert(m1 - m2 == 1); // Add the triangular wedge above the previous slope and below the new one // and bounded on the left by xmin. var x0 = r3 - r2; width = x0 - xmin; sum += width * (width + 1) / 2; // Account for a drop or rise from L3 to L1. if (r3 != r1 && x3 < x0) { // Remove the old triangle and add the new triangle. // The formula is (ow+dr)*(ow+dr+1)/2 - ow*(ow+1)/2. var ow = x3 - x0; var dr = r3 - r1; sum += dr * (2 * ow + dr + 1) / 2; } // Determine intersection of L2 and L1. x0 = r1 - r2; var y0 = r2 - m2 * x0; Debug.Assert(r2 - m2 * x0 == r1 - m1 * x0); // Calculate width and height of parallelogram counting only lattice points. var w = (y2 - y0) + m1 * (x2 - x0); var h = (y1 - y0) + m2 * (x1 - x0); // Process the hyperbolic region bounded by L1 and L2. sum += ProcessRegion(w, h, m1, 1, m2, 1, x0, y0); // Advance to the next region. m2 = m1; x2 = x3; y2 = y3; r2 = r3; } // Process values one up to xmin. for (var x = (Integer)1; x < xmin; x++) { sum += n / x; } // Account for sqrt(n) < x <= n using the Dirichlet hyperbola method. sum = 2 * sum - xmax * xmax; return(sum); }
public BigInteger UTan(BigInteger ab1, BigInteger abba, BigInteger ab2, BigInteger a3b3, BigInteger c1) { return((ab1 + IntegerMath.FloorSquareRoot(IntegerMath.Square(abba + ab2) * n / a3b3) - (c1 << 1)) / 2); }