private static void PowCore(uint power, ref FastReducer reducer,
                                    ref BitsBuffer value, ref BitsBuffer result,
                                    ref BitsBuffer temp)
        {
            // The big modulus pow algorithm for the last or
            // the only power limb using square-and-multiply.

            // NOTE: we're using a special reducer here,
            // since it's additional overhead does pay off.

            while (power != 0)
            {
                if ((power & 1) == 1)
                {
                    result.MultiplySelf(ref value, ref temp);
                    result.Reduce(ref reducer);
                }
                if (power != 1)
                {
                    value.SquareSelf(ref temp);
                    value.Reduce(ref reducer);
                }
                power = power >> 1;
            }
        }
        private static void PowCore(uint[] power, ref FastReducer reducer,
                                    ref BitsBuffer value, ref BitsBuffer result,
                                    ref BitsBuffer temp)
        {
            // The big modulus pow algorithm for all but
            // the last power limb using square-and-multiply.

            // NOTE: we're using a special reducer here,
            // since it's additional overhead does pay off.

            for (int i = 0; i < power.Length - 1; i++)
            {
                uint p = power[i];
                for (int j = 0; j < 32; j++)
                {
                    if ((p & 1) == 1)
                    {
                        result.MultiplySelf(ref value, ref temp);
                        result.Reduce(ref reducer);
                    }
                    value.SquareSelf(ref temp);
                    value.Reduce(ref reducer);
                    p = p >> 1;
                }
            }

            PowCore(power[power.Length - 1], ref reducer, ref value, ref result,
                    ref temp);
        }
        private static void PowCore(uint power, uint[] modulus,
                                    ref BitsBuffer value, ref BitsBuffer result,
                                    ref BitsBuffer temp)
        {
            // The big modulus pow algorithm for the last or
            // the only power limb using square-and-multiply.

            // NOTE: we're using an ordinary remainder here,
            // since the reducer overhead doesn't pay off.

            while (power != 0)
            {
                if ((power & 1) == 1)
                {
                    result.MultiplySelf(ref value, ref temp);
                    result.Reduce(modulus);
                }
                if (power != 1)
                {
                    value.SquareSelf(ref temp);
                    value.Reduce(modulus);
                }
                power = power >> 1;
            }
        }
        private static void Gcd(ref BitsBuffer left, ref BitsBuffer right)
        {
            Debug.Assert(left.GetLength() >= 2);
            Debug.Assert(right.GetLength() >= 2);
            Debug.Assert(left.GetLength() >= right.GetLength());

            // Executes Lehmer's gcd algorithm, but uses the most 
            // significant bits to work with 64-bit (not 32-bit) values.
            // Furthermore we're using an optimized version due to Jebelean.

            // http://cacr.uwaterloo.ca/hac/about/chap14.pdf (see 14.4.2)
            // ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-69.ps.gz

            while (right.GetLength() > 2)
            {
                ulong x, y;

                ExtractDigits(ref left, ref right, out x, out y);

                uint a = 1U, b = 0U;
                uint c = 0U, d = 1U;

                int iteration = 0;

                // Lehmer's guessing
                while (y != 0)
                {
                    ulong q, r, s, t;

                    // odd iteration
                    q = x / y;

                    if (q > 0xFFFFFFFF)
                        break;

                    r = a + q * c;
                    s = b + q * d;
                    t = x - q * y;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                        break;
                    if (t < s || t + r > y - c)
                        break;

                    a = (uint)r;
                    b = (uint)s;
                    x = t;

                    ++iteration;
                    if (x == b)
                        break;

                    // even iteration
                    q = y / x;

                    if (q > 0xFFFFFFFF)
                        break;

                    r = d + q * b;
                    s = c + q * a;
                    t = y - q * x;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                        break;
                    if (t < s || t + r > x - b)
                        break;

                    d = (uint)r;
                    c = (uint)s;
                    y = t;

                    ++iteration;
                    if (y == c)
                        break;
                }

                if (b == 0)
                {
                    // Euclid's step
                    left.Reduce(ref right);

                    BitsBuffer temp = left;
                    left = right;
                    right = temp;
                }
                else
                {
                    // Lehmer's step
                    LehmerCore(ref left, ref right, a, b, c, d);

                    if (iteration % 2 == 1)
                    {
                        // ensure left is larger than right
                        BitsBuffer temp = left;
                        left = right;
                        right = temp;
                    }
                }
            }

            if (right.GetLength() > 0)
            {
                // Euclid's step
                left.Reduce(ref right);

                uint[] xBits = right.GetBits();
                uint[] yBits = left.GetBits();

                ulong x = ((ulong)xBits[1] << 32) | xBits[0];
                ulong y = ((ulong)yBits[1] << 32) | yBits[0];

                left.Overwrite(Gcd(x, y));
                right.Overwrite(0);
            }
        }
        private static void Gcd(ref BitsBuffer left, ref BitsBuffer right)
        {
            Debug.Assert(left.GetLength() >= 2);
            Debug.Assert(right.GetLength() >= 2);
            Debug.Assert(left.GetLength() >= right.GetLength());

            // Executes Lehmer's gcd algorithm, but uses the most
            // significant bits to work with 64-bit (not 32-bit) values.
            // Furthermore we're using an optimized version due to Jebelean.

            // http://cacr.uwaterloo.ca/hac/about/chap14.pdf (see 14.4.2)
            // ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-69.ps.gz

            while (right.GetLength() > 2)
            {
                ulong x, y;

                ExtractDigits(ref left, ref right, out x, out y);

                uint a = 1U, b = 0U;
                uint c = 0U, d = 1U;

                int iteration = 0;

                // Lehmer's guessing
                while (y != 0)
                {
                    ulong q, r, s, t;

                    // Odd iteration
                    q = x / y;

                    if (q > 0xFFFFFFFF)
                    {
                        break;
                    }

                    r = a + q * c;
                    s = b + q * d;
                    t = x - q * y;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                    {
                        break;
                    }
                    if (t < s || t + r > y - c)
                    {
                        break;
                    }

                    a = (uint)r;
                    b = (uint)s;
                    x = t;

                    ++iteration;
                    if (x == b)
                    {
                        break;
                    }

                    // Even iteration
                    q = y / x;

                    if (q > 0xFFFFFFFF)
                    {
                        break;
                    }

                    r = d + q * b;
                    s = c + q * a;
                    t = y - q * x;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                    {
                        break;
                    }
                    if (t < s || t + r > x - b)
                    {
                        break;
                    }

                    d = (uint)r;
                    c = (uint)s;
                    y = t;

                    ++iteration;
                    if (y == c)
                    {
                        break;
                    }
                }

                if (b == 0)
                {
                    // Euclid's step
                    left.Reduce(ref right);

                    BitsBuffer temp = left;
                    left  = right;
                    right = temp;
                }
                else
                {
                    // Lehmer's step
                    LehmerCore(ref left, ref right, a, b, c, d);

                    if (iteration % 2 == 1)
                    {
                        // Ensure left is larger than right
                        BitsBuffer temp = left;
                        left  = right;
                        right = temp;
                    }
                }
            }

            if (right.GetLength() > 0)
            {
                // Euclid's step
                left.Reduce(ref right);

                uint[] xBits = right.GetBits();
                uint[] yBits = left.GetBits();

                ulong x = ((ulong)xBits[1] << 32) | xBits[0];
                ulong y = ((ulong)yBits[1] << 32) | yBits[0];

                left.Overwrite(Gcd(x, y));
                right.Overwrite(0);
            }
        }
        private static void PowCore(uint power, ref FastReducer reducer,
                                    ref BitsBuffer value, ref BitsBuffer result,
                                    ref BitsBuffer temp)
        {
            // The big modulus pow algorithm for the last or
            // the only power limb using square-and-multiply.

            // NOTE: we're using a special reducer here,
            // since it's additional overhead does pay off.

            while (power != 0)
            {
                if ((power & 1) == 1)
                {
                    result.MultiplySelf(ref value, ref temp);
                    result.Reduce(ref reducer);
                }
                if (power != 1)
                {
                    value.SquareSelf(ref temp);
                    value.Reduce(ref reducer);
                }
                power = power >> 1;
            }
        }
        private static void PowCore(uint[] power, ref FastReducer reducer,
                                    ref BitsBuffer value, ref BitsBuffer result,
                                    ref BitsBuffer temp)
        {
            // The big modulus pow algorithm for all but
            // the last power limb using square-and-multiply.

            // NOTE: we're using a special reducer here,
            // since it's additional overhead does pay off.

            for (int i = 0; i < power.Length - 1; i++)
            {
                uint p = power[i];
                for (int j = 0; j < 32; j++)
                {
                    if ((p & 1) == 1)
                    {
                        result.MultiplySelf(ref value, ref temp);
                        result.Reduce(ref reducer);
                    }
                    value.SquareSelf(ref temp);
                    value.Reduce(ref reducer);
                    p = p >> 1;
                }
            }

            PowCore(power[power.Length - 1], ref reducer, ref value, ref result,
                ref temp);
        }
        private static void PowCore(uint power, uint[] modulus,
                                    ref BitsBuffer value, ref BitsBuffer result,
                                    ref BitsBuffer temp)
        {
            // The big modulus pow algorithm for the last or
            // the only power limb using square-and-multiply.

            // NOTE: we're using an ordinary remainder here,
            // since the reducer overhead doesn't pay off.

            while (power != 0)
            {
                if ((power & 1) == 1)
                {
                    result.MultiplySelf(ref value, ref temp);
                    result.Reduce(modulus);
                }
                if (power != 1)
                {
                    value.SquareSelf(ref temp);
                    value.Reduce(modulus);
                }
                power = power >> 1;
            }
        }