Beispiel #1
0
        public static void HSalsa20(byte[] output, int outputOffset, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
        {
            Array16 <UInt32> state;

            state.x0  = SalsaConst0;
            state.x1  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 0);
            state.x2  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 4);
            state.x3  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 8);
            state.x4  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 12);
            state.x5  = SalsaConst1;
            state.x6  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 0);
            state.x7  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 4);
            state.x8  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 8);
            state.x9  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 12);
            state.x10 = SalsaConst2;
            state.x11 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 16);
            state.x12 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 20);
            state.x13 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 24);
            state.x14 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 28);
            state.x15 = SalsaConst3;

            SalsaCore.HSalsa(out state, ref state, 20);

            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 0, state.x0);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 4, state.x5);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 8, state.x10);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 12, state.x15);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 16, state.x6);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 20, state.x7);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 24, state.x8);
            ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 28, state.x9);
        }
Beispiel #2
0
        public static Array8 <uint> ToArray8(this byte[] source)
        {
            return(new Array8 <uint>
            {
                x0 = ByteIntegerConverter.LoadLittleEndian32(source, 0),
                x1 = ByteIntegerConverter.LoadLittleEndian32(source, 4),
                x2 = ByteIntegerConverter.LoadLittleEndian32(source, 8),
                x3 = ByteIntegerConverter.LoadLittleEndian32(source, 12),

                x4 = ByteIntegerConverter.LoadLittleEndian32(source, 16),
                x5 = ByteIntegerConverter.LoadLittleEndian32(source, 20),
                x6 = ByteIntegerConverter.LoadLittleEndian32(source, 24),
                x7 = ByteIntegerConverter.LoadLittleEndian32(source, 28)
            });
        }
        private static void PrepareInternalKey(out Array16 <UInt32> internalKey, byte[] key, int keyOffset, byte[] nonce, int nonceOffset)
        {
            internalKey.x0  = Salsa20.SalsaConst0;
            internalKey.x1  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 0);
            internalKey.x2  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 4);
            internalKey.x3  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 8);
            internalKey.x4  = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 12);
            internalKey.x5  = Salsa20.SalsaConst1;
            internalKey.x6  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 0);
            internalKey.x7  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 4);
            internalKey.x8  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 8);
            internalKey.x9  = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 12);
            internalKey.x10 = Salsa20.SalsaConst2;
            internalKey.x11 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 16);
            internalKey.x12 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 20);
            internalKey.x13 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 24);
            internalKey.x14 = ByteIntegerConverter.LoadLittleEndian32(key, keyOffset + 28);
            internalKey.x15 = Salsa20.SalsaConst3;
            SalsaCore.HSalsa(out internalKey, ref internalKey, 20);

            //key
            internalKey.x1  = internalKey.x0;
            internalKey.x2  = internalKey.x5;
            internalKey.x3  = internalKey.x10;
            internalKey.x4  = internalKey.x15;
            internalKey.x11 = internalKey.x6;
            internalKey.x12 = internalKey.x7;
            internalKey.x13 = internalKey.x8;
            internalKey.x14 = internalKey.x9;

            //const
            internalKey.x0  = Salsa20.SalsaConst0;
            internalKey.x5  = Salsa20.SalsaConst1;
            internalKey.x10 = Salsa20.SalsaConst2;
            internalKey.x15 = Salsa20.SalsaConst3;

            //nonce
            internalKey.x6 = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 16);
            internalKey.x7 = ByteIntegerConverter.LoadLittleEndian32(nonce, nonceOffset + 20);

            //offset
            internalKey.x8 = 0;
            internalKey.x9 = 0;
        }
Beispiel #4
0
        public static void GetPublicKey(ArraySegment\\ publicKey, ArraySegment\\ privateKey)
        {
            if (publicKey.Array == null)
                throw new ArgumentNullException("publicKey.Array");
            if (privateKey.Array == null)
                throw new ArgumentNullException("privateKey.Array");
            if (publicKey.Count != PublicKeySizeInBytes)
                throw new ArgumentException("privateKey.Count must be 32");
            if (privateKey.Count != PrivateKeySizeInBytes)
                throw new ArgumentException("privateKey.Count must be 32");

            // hack: abusing publicKey as temporary storage
            // todo: remove hack
            for (int i = 0; i \\ salsaState;
            salsaState.x0 = c0;
            salsaState.x1 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 0);
            salsaState.x2 = 0;
            salsaState.x3 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 4);
            salsaState.x4 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 8);
            salsaState.x5 = c1;
            salsaState.x6 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 12);
            salsaState.x7 = 0;
            salsaState.x8 = 0;
            salsaState.x9 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 16);
            salsaState.x10 = c2;
            salsaState.x11 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 20);
            salsaState.x12 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 24);
            salsaState.x13 = 0;
            salsaState.x14 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 28);
            salsaState.x15 = c3;
            SalsaCore.Salsa(out salsaState, ref salsaState, 20);

            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 0, salsaState.x0);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 4, salsaState.x1);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 8, salsaState.x2);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 12, salsaState.x3);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 16, salsaState.x4);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 20, salsaState.x5);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 24, salsaState.x6);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 28, salsaState.x7);
        }
Beispiel #5
0
        // hashes like the Curve25519 paper says
        internal static void KeyExchangeOutputHashCurve25519Paper(byte[] sharedKey, int offset)
        {
            //c = Curve25519output
            const UInt32 c0 = 'C' | 'u' << 8 | 'r' << 16 | (UInt32)'v' << 24;
            const UInt32 c1 = 'e' | '2' << 8 | '5' << 16 | (UInt32)'5' << 24;
            const UInt32 c2 = '1' | '9' << 8 | 'o' << 16 | (UInt32)'u' << 24;
            const UInt32 c3 = 't' | 'p' << 8 | 'u' << 16 | (UInt32)'t' << 24;

            Array16 <UInt32> salsaState;

            salsaState.x0  = c0;
            salsaState.x1  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 0);
            salsaState.x2  = 0;
            salsaState.x3  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 4);
            salsaState.x4  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 8);
            salsaState.x5  = c1;
            salsaState.x6  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 12);
            salsaState.x7  = 0;
            salsaState.x8  = 0;
            salsaState.x9  = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 16);
            salsaState.x10 = c2;
            salsaState.x11 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 20);
            salsaState.x12 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 24);
            salsaState.x13 = 0;
            salsaState.x14 = ByteIntegerConverter.LoadLittleEndian32(sharedKey, offset + 28);
            salsaState.x15 = c3;
            SalsaCore.Salsa(out salsaState, ref salsaState, 20);

            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 0, salsaState.x0);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 4, salsaState.x1);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 8, salsaState.x2);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 12, salsaState.x3);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 16, salsaState.x4);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 20, salsaState.x5);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 24, salsaState.x6);
            ByteIntegerConverter.StoreLittleEndian32(sharedKey, offset + 28, salsaState.x7);
        }
Beispiel #6
0
        // written by floodyberry (Andrew M.)
        // original license: MIT or PUBLIC DOMAIN
        // https://github.com/floodyberry/poly1305-donna/blob/master/poly1305-donna-unrolled.c
        public static void poly1305_auth(byte[] output, int outputOffset, byte[] m, int mStart, int mLength,
                                         ref Array8 <uint> key)
        {
            uint b;
            int  j;

            /* clamp key */
            var t0 = key.x0;
            var t1 = key.x1;
            var t2 = key.x2;
            var t3 = key.x3;

            /* precompute multipliers */
            var r0 = t0 & 0x3ffffff;

            t0 >>= 26;
            t0  |= t1 << 6;
            var r1 = t0 & 0x3ffff03;

            t1 >>= 20;
            t1  |= t2 << 12;
            var r2 = t1 & 0x3ffc0ff;

            t2 >>= 14;
            t2  |= t3 << 18;
            var r3 = t2 & 0x3f03fff;

            t3 >>= 8;
            var r4 = t3 & 0x00fffff;

            var s1 = r1 * 5;
            var s2 = r2 * 5;
            var s3 = r3 * 5;
            var s4 = r4 * 5;

            /* init state */
            uint h0 = 0;
            uint h1 = 0;
            uint h2 = 0;
            uint h3 = 0;
            uint h4 = 0;

            /* full blocks */
            if (mLength < 16)
            {
                goto poly1305_donna_atmost15bytes;
            }

poly1305_donna_16bytes:
            mStart  += 16;
            mLength -= 16;

            t0 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 16);
            t1 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 12);
            t2 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 4);


            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((ulong)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((ulong)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((ulong)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += (t3 >> 8) | (1 << 24);


poly1305_donna_mul:
            var tt0 = (ulong)h0 * r0 + (ulong)h1 * s4 + (ulong)h2 * s3 + (ulong)h3 * s2 + (ulong)h4 * s1;
            var tt1 = (ulong)h0 * r1 + (ulong)h1 * r0 + (ulong)h2 * s4 + (ulong)h3 * s3 + (ulong)h4 * s2;
            var tt2 = (ulong)h0 * r2 + (ulong)h1 * r1 + (ulong)h2 * r0 + (ulong)h3 * s4 + (ulong)h4 * s3;
            var tt3 = (ulong)h0 * r3 + (ulong)h1 * r2 + (ulong)h2 * r1 + (ulong)h3 * r0 + (ulong)h4 * s4;
            var tt4 = (ulong)h0 * r4 + (ulong)h1 * r3 + (ulong)h2 * r2 + (ulong)h3 * r1 + (ulong)h4 * r0;

            unchecked
            {
                h0 = (uint)tt0 & 0x3ffffff;
                var c = tt0 >> 26;
                tt1 += c;
                h1   = (uint)tt1 & 0x3ffffff;
                b    = (uint)(tt1 >> 26);
                tt2 += b;
                h2   = (uint)tt2 & 0x3ffffff;
                b    = (uint)(tt2 >> 26);
                tt3 += b;
                h3   = (uint)tt3 & 0x3ffffff;
                b    = (uint)(tt3 >> 26);
                tt4 += b;
                h4   = (uint)tt4 & 0x3ffffff;
                b    = (uint)(tt4 >> 26);
            }

            h0 += b * 5;

            if (mLength >= 16)
            {
                goto poly1305_donna_16bytes;
            }

            /* final bytes */
poly1305_donna_atmost15bytes:
            if (mLength == 0)
            {
                goto poly1305_donna_finish;
            }

            var mp = new byte[16];

            for (j = 0; j < mLength; j++)
            {
                mp[j] = m[mStart + j];
            }
            mp[j++] = 1;
            for (; j < 16; j++)
            {
                mp[j] = 0;
            }
            mLength = 0;

            t0 = ByteIntegerConverter.LoadLittleEndian32(mp, 0);
            t1 = ByteIntegerConverter.LoadLittleEndian32(mp, 4);
            t2 = ByteIntegerConverter.LoadLittleEndian32(mp, 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(mp, 12);
            CryptoBytes.Wipe(mp);

            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((ulong)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((ulong)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((ulong)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += t3 >> 8;

            goto poly1305_donna_mul;

poly1305_donna_finish:
            b   = h0 >> 26;
            h0  = h0 & 0x3ffffff;
            h1 += b;
            b   = h1 >> 26;
            h1  = h1 & 0x3ffffff;
            h2 += b;
            b   = h2 >> 26;
            h2  = h2 & 0x3ffffff;
            h3 += b;
            b   = h3 >> 26;
            h3  = h3 & 0x3ffffff;
            h4 += b;
            b   = h4 >> 26;
            h4  = h4 & 0x3ffffff;
            h0 += b * 5;

            var g0 = h0 + 5;

            b   = g0 >> 26;
            g0 &= 0x3ffffff;
            var g1 = h1 + b;

            b   = g1 >> 26;
            g1 &= 0x3ffffff;
            var g2 = h2 + b;

            b   = g2 >> 26;
            g2 &= 0x3ffffff;
            var g3 = h3 + b;

            b   = g3 >> 26;
            g3 &= 0x3ffffff;
            var g4 = unchecked (h4 + b - (1 << 26));

            b = (g4 >> 31) - 1;
            var nb = ~b;

            h0 = (h0 & nb) | (g0 & b);
            h1 = (h1 & nb) | (g1 & b);
            h2 = (h2 & nb) | (g2 & b);
            h3 = (h3 & nb) | (g3 & b);
            h4 = (h4 & nb) | (g4 & b);

            var f0 = (h0 | (h1 << 26)) + (ulong)key.x4;
            var f1 = ((h1 >> 6) | (h2 << 20)) + (ulong)key.x5;
            var f2 = ((h2 >> 12) | (h3 << 14)) + (ulong)key.x6;
            var f3 = ((h3 >> 18) | (h4 << 8)) + (ulong)key.x7;

            unchecked
            {
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 0, (uint)f0);
                f1 += f0 >> 32;
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 4, (uint)f1);
                f2 += f1 >> 32;
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 8, (uint)f2);
                f3 += f2 >> 32;
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 12, (uint)f3);
            }
        }
        // written by floodyberry (Andrew M.)
        // original license: MIT or PUBLIC DOMAIN
        // https://github.com/floodyberry/poly1305-donna/blob/master/poly1305-donna-unrolled.c
        public static void poly1305_auth(byte[] output, int outputOffset, byte[] m, int mStart, int mLength, ref Array8 <UInt32> key)
        {
            UInt32 t0, t1, t2, t3;
            UInt32 h0, h1, h2, h3, h4;
            UInt32 r0, r1, r2, r3, r4;
            UInt32 s1, s2, s3, s4;
            UInt32 b, nb;
            int    j;
            UInt64 tt0, tt1, tt2, tt3, tt4;
            UInt64 f0, f1, f2, f3;
            UInt32 g0, g1, g2, g3, g4;
            UInt64 c;

            /* clamp key */
            t0 = key.x0;
            t1 = key.x1;
            t2 = key.x2;
            t3 = key.x3;

            /* precompute multipliers */
            r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6;
            r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12;
            r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18;
            r3 = t2 & 0x3f03fff; t3 >>= 8;
            r4 = t3 & 0x00fffff;

            s1 = r1 * 5;
            s2 = r2 * 5;
            s3 = r3 * 5;
            s4 = r4 * 5;

            /* init state */
            h0 = 0;
            h1 = 0;
            h2 = 0;
            h3 = 0;
            h4 = 0;

            /* full blocks */
            if (mLength < 16)
            {
                goto poly1305_donna_atmost15bytes;
            }

poly1305_donna_16bytes:
            mStart  += 16;
            mLength -= 16;

            t0 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 16);
            t1 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 12);
            t2 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 4);

            //todo: looks like these can be simplified a bit
            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((UInt64)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((UInt64)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((UInt64)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += (t3 >> 8) | (1 << 24);


poly1305_donna_mul:
            tt0 = (ulong)h0 * r0 + (ulong)h1 * s4 + (ulong)h2 * s3 + (ulong)h3 * s2 + (ulong)h4 * s1;
            tt1 = (ulong)h0 * r1 + (ulong)h1 * r0 + (ulong)h2 * s4 + (ulong)h3 * s3 + (ulong)h4 * s2;
            tt2 = (ulong)h0 * r2 + (ulong)h1 * r1 + (ulong)h2 * r0 + (ulong)h3 * s4 + (ulong)h4 * s3;
            tt3 = (ulong)h0 * r3 + (ulong)h1 * r2 + (ulong)h2 * r1 + (ulong)h3 * r0 + (ulong)h4 * s4;
            tt4 = (ulong)h0 * r4 + (ulong)h1 * r3 + (ulong)h2 * r2 + (ulong)h3 * r1 + (ulong)h4 * r0;

            unchecked
            {
                h0   = (UInt32)tt0 & 0x3ffffff; c = (tt0 >> 26);
                tt1 += c; h1 = (UInt32)tt1 & 0x3ffffff; b = (UInt32)(tt1 >> 26);
                tt2 += b; h2 = (UInt32)tt2 & 0x3ffffff; b = (UInt32)(tt2 >> 26);
                tt3 += b; h3 = (UInt32)tt3 & 0x3ffffff; b = (UInt32)(tt3 >> 26);
                tt4 += b; h4 = (UInt32)tt4 & 0x3ffffff; b = (UInt32)(tt4 >> 26);
            }
            h0 += b * 5;

            if (mLength >= 16)
            {
                goto poly1305_donna_16bytes;
            }

            /* final bytes */
poly1305_donna_atmost15bytes:
            if (mLength == 0)
            {
                goto poly1305_donna_finish;
            }

            byte[] mp = new byte[16];//todo remove allocation

            for (j = 0; j < mLength; j++)
            {
                mp[j] = m[mStart + j];
            }
            mp[j++] = 1;
            for (; j < 16; j++)
            {
                mp[j] = 0;
            }
            mLength = 0;

            t0 = ByteIntegerConverter.LoadLittleEndian32(mp, 0);
            t1 = ByteIntegerConverter.LoadLittleEndian32(mp, 4);
            t2 = ByteIntegerConverter.LoadLittleEndian32(mp, 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(mp, 12);
            CryptoExtensions.Wipe(mp);

            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((UInt64)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((UInt64)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((UInt64)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += t3 >> 8;

            goto poly1305_donna_mul;

poly1305_donna_finish:
            b   = h0 >> 26; h0 = h0 & 0x3ffffff;
            h1 += b; b = h1 >> 26; h1 = h1 & 0x3ffffff;
            h2 += b; b = h2 >> 26; h2 = h2 & 0x3ffffff;
            h3 += b; b = h3 >> 26; h3 = h3 & 0x3ffffff;
            h4 += b; b = h4 >> 26; h4 = h4 & 0x3ffffff;
            h0 += b * 5;

            g0 = h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff;
            g1 = h1 + b; b = g1 >> 26; g1 &= 0x3ffffff;
            g2 = h2 + b; b = g2 >> 26; g2 &= 0x3ffffff;
            g3 = h3 + b; b = g3 >> 26; g3 &= 0x3ffffff;
            g4 = unchecked (h4 + b - (1 << 26));

            b  = (g4 >> 31) - 1;
            nb = ~b;
            h0 = (h0 & nb) | (g0 & b);
            h1 = (h1 & nb) | (g1 & b);
            h2 = (h2 & nb) | (g2 & b);
            h3 = (h3 & nb) | (g3 & b);
            h4 = (h4 & nb) | (g4 & b);

            f0 = ((h0) | (h1 << 26)) + (UInt64)key.x4;
            f1 = ((h1 >> 6) | (h2 << 20)) + (UInt64)key.x5;
            f2 = ((h2 >> 12) | (h3 << 14)) + (UInt64)key.x6;
            f3 = ((h3 >> 18) | (h4 << 8)) + (UInt64)key.x7;

            unchecked
            {
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 0, (uint)f0); f1 += (f0 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 4, (uint)f1); f2 += (f1 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 8, (uint)f2); f3 += (f2 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 12, (uint)f3);
            }
        }