private static void Gcd(ref BitsBuffer left, ref BitsBuffer right)
        {
            Debug.Assert(left.GetLength() >= 2);
            Debug.Assert(right.GetLength() >= 2);
            Debug.Assert(left.GetLength() >= right.GetLength());

            // Executes Lehmer's gcd algorithm, but uses the most 
            // significant bits to work with 64-bit (not 32-bit) values.
            // Furthermore we're using an optimized version due to Jebelean.

            // http://cacr.uwaterloo.ca/hac/about/chap14.pdf (see 14.4.2)
            // ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-69.ps.gz

            while (right.GetLength() > 2)
            {
                ulong x, y;

                ExtractDigits(ref left, ref right, out x, out y);

                uint a = 1U, b = 0U;
                uint c = 0U, d = 1U;

                int iteration = 0;

                // Lehmer's guessing
                while (y != 0)
                {
                    ulong q, r, s, t;

                    // odd iteration
                    q = x / y;

                    if (q > 0xFFFFFFFF)
                        break;

                    r = a + q * c;
                    s = b + q * d;
                    t = x - q * y;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                        break;
                    if (t < s || t + r > y - c)
                        break;

                    a = (uint)r;
                    b = (uint)s;
                    x = t;

                    ++iteration;
                    if (x == b)
                        break;

                    // even iteration
                    q = y / x;

                    if (q > 0xFFFFFFFF)
                        break;

                    r = d + q * b;
                    s = c + q * a;
                    t = y - q * x;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                        break;
                    if (t < s || t + r > x - b)
                        break;

                    d = (uint)r;
                    c = (uint)s;
                    y = t;

                    ++iteration;
                    if (y == c)
                        break;
                }

                if (b == 0)
                {
                    // Euclid's step
                    left.Reduce(ref right);

                    BitsBuffer temp = left;
                    left = right;
                    right = temp;
                }
                else
                {
                    // Lehmer's step
                    LehmerCore(ref left, ref right, a, b, c, d);

                    if (iteration % 2 == 1)
                    {
                        // ensure left is larger than right
                        BitsBuffer temp = left;
                        left = right;
                        right = temp;
                    }
                }
            }

            if (right.GetLength() > 0)
            {
                // Euclid's step
                left.Reduce(ref right);

                uint[] xBits = right.GetBits();
                uint[] yBits = left.GetBits();

                ulong x = ((ulong)xBits[1] << 32) | xBits[0];
                ulong y = ((ulong)yBits[1] << 32) | yBits[0];

                left.Overwrite(Gcd(x, y));
                right.Overwrite(0);
            }
        }
        private static void Gcd(ref BitsBuffer left, ref BitsBuffer right)
        {
            Debug.Assert(left.GetLength() >= 2);
            Debug.Assert(right.GetLength() >= 2);
            Debug.Assert(left.GetLength() >= right.GetLength());

            // Executes Lehmer's gcd algorithm, but uses the most
            // significant bits to work with 64-bit (not 32-bit) values.
            // Furthermore we're using an optimized version due to Jebelean.

            // http://cacr.uwaterloo.ca/hac/about/chap14.pdf (see 14.4.2)
            // ftp://ftp.risc.uni-linz.ac.at/pub/techreports/1992/92-69.ps.gz

            while (right.GetLength() > 2)
            {
                ulong x, y;

                ExtractDigits(ref left, ref right, out x, out y);

                uint a = 1U, b = 0U;
                uint c = 0U, d = 1U;

                int iteration = 0;

                // Lehmer's guessing
                while (y != 0)
                {
                    ulong q, r, s, t;

                    // Odd iteration
                    q = x / y;

                    if (q > 0xFFFFFFFF)
                    {
                        break;
                    }

                    r = a + q * c;
                    s = b + q * d;
                    t = x - q * y;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                    {
                        break;
                    }
                    if (t < s || t + r > y - c)
                    {
                        break;
                    }

                    a = (uint)r;
                    b = (uint)s;
                    x = t;

                    ++iteration;
                    if (x == b)
                    {
                        break;
                    }

                    // Even iteration
                    q = y / x;

                    if (q > 0xFFFFFFFF)
                    {
                        break;
                    }

                    r = d + q * b;
                    s = c + q * a;
                    t = y - q * x;

                    if (r > 0x7FFFFFFF || s > 0x7FFFFFFF)
                    {
                        break;
                    }
                    if (t < s || t + r > x - b)
                    {
                        break;
                    }

                    d = (uint)r;
                    c = (uint)s;
                    y = t;

                    ++iteration;
                    if (y == c)
                    {
                        break;
                    }
                }

                if (b == 0)
                {
                    // Euclid's step
                    left.Reduce(ref right);

                    BitsBuffer temp = left;
                    left  = right;
                    right = temp;
                }
                else
                {
                    // Lehmer's step
                    LehmerCore(ref left, ref right, a, b, c, d);

                    if (iteration % 2 == 1)
                    {
                        // Ensure left is larger than right
                        BitsBuffer temp = left;
                        left  = right;
                        right = temp;
                    }
                }
            }

            if (right.GetLength() > 0)
            {
                // Euclid's step
                left.Reduce(ref right);

                uint[] xBits = right.GetBits();
                uint[] yBits = left.GetBits();

                ulong x = ((ulong)xBits[1] << 32) | xBits[0];
                ulong y = ((ulong)yBits[1] << 32) | yBits[0];

                left.Overwrite(Gcd(x, y));
                right.Overwrite(0);
            }
        }