예제 #1
0
        /*public static void XorLittleEndian32(byte[] buf, int offset, UInt32 value)
         * {
         *  buf[offset + 0] ^= (byte)value;
         *  buf[offset + 1] ^= (byte)(value >> 8);
         *  buf[offset + 2] ^= (byte)(value >> 16);
         *  buf[offset + 3] ^= (byte)(value >> 24);
         * }*/

        /*public static void XorLittleEndian32(byte[] output, int outputOffset, byte[] input, int inputOffset, UInt32 value)
         * {
         *  output[outputOffset + 0] = (byte)(input[inputOffset + 0] ^ value);
         *  output[outputOffset + 1] = (byte)(input[inputOffset + 1] ^ (value >> 8));
         *  output[outputOffset + 2] = (byte)(input[inputOffset + 2] ^ (value >> 16));
         *  output[outputOffset + 3] = (byte)(input[inputOffset + 3] ^ (value >> 24));
         * }*/

        #endregion

        #region Array8

        public static void Array8LoadLittleEndian32(out Array8 <UInt32> output, byte[] input, int inputOffset)
        {
            output.x0 = LoadLittleEndian32(input, inputOffset + 0);
            output.x1 = LoadLittleEndian32(input, inputOffset + 4);
            output.x2 = LoadLittleEndian32(input, inputOffset + 8);
            output.x3 = LoadLittleEndian32(input, inputOffset + 12);
            output.x4 = LoadLittleEndian32(input, inputOffset + 16);
            output.x5 = LoadLittleEndian32(input, inputOffset + 20);
            output.x6 = LoadLittleEndian32(input, inputOffset + 24);
            output.x7 = LoadLittleEndian32(input, inputOffset + 28);
        }
예제 #2
0
 internal static void Sha512Init(out Array8 <UInt64> state)
 {
     state.x0 = 0x6a09e667f3bcc908;
     state.x1 = 0xbb67ae8584caa73b;
     state.x2 = 0x3c6ef372fe94f82b;
     state.x3 = 0xa54ff53a5f1d36f1;
     state.x4 = 0x510e527fade682d1;
     state.x5 = 0x9b05688c2b3e6c1f;
     state.x6 = 0x1f83d9abfb41bd6b;
     state.x7 = 0x5be0cd19137e2179;
 }
예제 #3
0
        internal static void Core(out Array8 <UInt64> outputState, ref Array8 <UInt64> inputState, ref Array16 <UInt64> input)
        {
            unchecked
            {
                UInt64 a = inputState.x0;
                UInt64 b = inputState.x1;
                UInt64 c = inputState.x2;
                UInt64 d = inputState.x3;
                UInt64 e = inputState.x4;
                UInt64 f = inputState.x5;
                UInt64 g = inputState.x6;
                UInt64 h = inputState.x7;

                UInt64 w0  = input.x0;
                UInt64 w1  = input.x1;
                UInt64 w2  = input.x2;
                UInt64 w3  = input.x3;
                UInt64 w4  = input.x4;
                UInt64 w5  = input.x5;
                UInt64 w6  = input.x6;
                UInt64 w7  = input.x7;
                UInt64 w8  = input.x8;
                UInt64 w9  = input.x9;
                UInt64 w10 = input.x10;
                UInt64 w11 = input.x11;
                UInt64 w12 = input.x12;
                UInt64 w13 = input.x13;
                UInt64 w14 = input.x14;
                UInt64 w15 = input.x15;

                int t = 0;
                while (true)
                {
                    ulong t1, t2;

                    {//0
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w0;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//1
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w1;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//2
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w2;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//3
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w3;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//4
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w4;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//5
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w5;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//6
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w6;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//7
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w7;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//8
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w8;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//9
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w9;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//10
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w10;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//11
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w11;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//12
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w12;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//13
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w13;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//14
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w14;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//15
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w15;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    if (t == 80)
                    {
                        break;
                    }

                    w0 += ((w14 >> 19) ^ (w14 << (64 - 19)) ^ (w14 >> 61) ^ (w14 << (64 - 61)) ^ (w14 >> 6)) +
                          w9 +
                          ((w1 >> 1) ^ (w1 << (64 - 1)) ^ (w1 >> 8) ^ (w1 << (64 - 8)) ^ (w1 >> 7));
                    w1 += ((w15 >> 19) ^ (w15 << (64 - 19)) ^ (w15 >> 61) ^ (w15 << (64 - 61)) ^ (w15 >> 6)) +
                          w10 +
                          ((w2 >> 1) ^ (w2 << (64 - 1)) ^ (w2 >> 8) ^ (w2 << (64 - 8)) ^ (w2 >> 7));
                    w2 += ((w0 >> 19) ^ (w0 << (64 - 19)) ^ (w0 >> 61) ^ (w0 << (64 - 61)) ^ (w0 >> 6)) +
                          w11 +
                          ((w3 >> 1) ^ (w3 << (64 - 1)) ^ (w3 >> 8) ^ (w3 << (64 - 8)) ^ (w3 >> 7));
                    w3 += ((w1 >> 19) ^ (w1 << (64 - 19)) ^ (w1 >> 61) ^ (w1 << (64 - 61)) ^ (w1 >> 6)) +
                          w12 +
                          ((w4 >> 1) ^ (w4 << (64 - 1)) ^ (w4 >> 8) ^ (w4 << (64 - 8)) ^ (w4 >> 7));
                    w4 += ((w2 >> 19) ^ (w2 << (64 - 19)) ^ (w2 >> 61) ^ (w2 << (64 - 61)) ^ (w2 >> 6)) +
                          w13 +
                          ((w5 >> 1) ^ (w5 << (64 - 1)) ^ (w5 >> 8) ^ (w5 << (64 - 8)) ^ (w5 >> 7));
                    w5 += ((w3 >> 19) ^ (w3 << (64 - 19)) ^ (w3 >> 61) ^ (w3 << (64 - 61)) ^ (w3 >> 6)) +
                          w14 +
                          ((w6 >> 1) ^ (w6 << (64 - 1)) ^ (w6 >> 8) ^ (w6 << (64 - 8)) ^ (w6 >> 7));
                    w6 += ((w4 >> 19) ^ (w4 << (64 - 19)) ^ (w4 >> 61) ^ (w4 << (64 - 61)) ^ (w4 >> 6)) +
                          w15 +
                          ((w7 >> 1) ^ (w7 << (64 - 1)) ^ (w7 >> 8) ^ (w7 << (64 - 8)) ^ (w7 >> 7));
                    w7 += ((w5 >> 19) ^ (w5 << (64 - 19)) ^ (w5 >> 61) ^ (w5 << (64 - 61)) ^ (w5 >> 6)) +
                          w0 +
                          ((w8 >> 1) ^ (w8 << (64 - 1)) ^ (w8 >> 8) ^ (w8 << (64 - 8)) ^ (w8 >> 7));
                    w8 += ((w6 >> 19) ^ (w6 << (64 - 19)) ^ (w6 >> 61) ^ (w6 << (64 - 61)) ^ (w6 >> 6)) +
                          w1 +
                          ((w9 >> 1) ^ (w9 << (64 - 1)) ^ (w9 >> 8) ^ (w9 << (64 - 8)) ^ (w9 >> 7));
                    w9 += ((w7 >> 19) ^ (w7 << (64 - 19)) ^ (w7 >> 61) ^ (w7 << (64 - 61)) ^ (w7 >> 6)) +
                          w2 +
                          ((w10 >> 1) ^ (w10 << (64 - 1)) ^ (w10 >> 8) ^ (w10 << (64 - 8)) ^ (w10 >> 7));
                    w10 += ((w8 >> 19) ^ (w8 << (64 - 19)) ^ (w8 >> 61) ^ (w8 << (64 - 61)) ^ (w8 >> 6)) +
                           w3 +
                           ((w11 >> 1) ^ (w11 << (64 - 1)) ^ (w11 >> 8) ^ (w11 << (64 - 8)) ^ (w11 >> 7));
                    w11 += ((w9 >> 19) ^ (w9 << (64 - 19)) ^ (w9 >> 61) ^ (w9 << (64 - 61)) ^ (w9 >> 6)) +
                           w4 +
                           ((w12 >> 1) ^ (w12 << (64 - 1)) ^ (w12 >> 8) ^ (w12 << (64 - 8)) ^ (w12 >> 7));
                    w12 += ((w10 >> 19) ^ (w10 << (64 - 19)) ^ (w10 >> 61) ^ (w10 << (64 - 61)) ^ (w10 >> 6)) +
                           w5 +
                           ((w13 >> 1) ^ (w13 << (64 - 1)) ^ (w13 >> 8) ^ (w13 << (64 - 8)) ^ (w13 >> 7));
                    w13 += ((w11 >> 19) ^ (w11 << (64 - 19)) ^ (w11 >> 61) ^ (w11 << (64 - 61)) ^ (w11 >> 6)) +
                           w6 +
                           ((w14 >> 1) ^ (w14 << (64 - 1)) ^ (w14 >> 8) ^ (w14 << (64 - 8)) ^ (w14 >> 7));
                    w14 += ((w12 >> 19) ^ (w12 << (64 - 19)) ^ (w12 >> 61) ^ (w12 << (64 - 61)) ^ (w12 >> 6)) +
                           w7 +
                           ((w15 >> 1) ^ (w15 << (64 - 1)) ^ (w15 >> 8) ^ (w15 << (64 - 8)) ^ (w15 >> 7));
                    w15 += ((w13 >> 19) ^ (w13 << (64 - 19)) ^ (w13 >> 61) ^ (w13 << (64 - 61)) ^ (w13 >> 6)) +
                           w8 +
                           ((w0 >> 1) ^ (w0 << (64 - 1)) ^ (w0 >> 8) ^ (w0 << (64 - 8)) ^ (w0 >> 7));
                }

                outputState.x0 = inputState.x0 + a;
                outputState.x1 = inputState.x1 + b;
                outputState.x2 = inputState.x2 + c;
                outputState.x3 = inputState.x3 + d;
                outputState.x4 = inputState.x4 + e;
                outputState.x5 = inputState.x5 + f;
                outputState.x6 = inputState.x6 + g;
                outputState.x7 = inputState.x7 + h;
            }
        }
예제 #4
0
        // written by floodyberry (Andrew M.)
        // original license: MIT or PUBLIC DOMAIN
        // https://github.com/floodyberry/poly1305-donna/blob/master/poly1305-donna-unrolled.c
        public static void poly1305_auth(byte[] output, int outputOffset, byte[] m, int mStart, int mLength, ref Array8 <UInt32> key)
        {
            UInt32 t0, t1, t2, t3;
            UInt32 h0, h1, h2, h3, h4;
            UInt32 r0, r1, r2, r3, r4;
            UInt32 s1, s2, s3, s4;
            UInt32 b, nb;
            int    j;
            UInt64 tt0, tt1, tt2, tt3, tt4;
            UInt64 f0, f1, f2, f3;
            UInt32 g0, g1, g2, g3, g4;
            UInt64 c;

            /* clamp key */
            t0 = key.x0;
            t1 = key.x1;
            t2 = key.x2;
            t3 = key.x3;

            /* precompute multipliers */
            r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6;
            r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12;
            r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18;
            r3 = t2 & 0x3f03fff; t3 >>= 8;
            r4 = t3 & 0x00fffff;

            s1 = r1 * 5;
            s2 = r2 * 5;
            s3 = r3 * 5;
            s4 = r4 * 5;

            /* init state */
            h0 = 0;
            h1 = 0;
            h2 = 0;
            h3 = 0;
            h4 = 0;

            /* full blocks */
            if (mLength < 16)
            {
                goto poly1305_donna_atmost15bytes;
            }

poly1305_donna_16bytes:
            mStart  += 16;
            mLength -= 16;

            t0 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 16);
            t1 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 12);
            t2 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(m, mStart - 4);

            //todo: looks like these can be simplified a bit
            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((UInt64)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((UInt64)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((UInt64)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += (t3 >> 8) | (1 << 24);


poly1305_donna_mul:
            tt0 = (ulong)h0 * r0 + (ulong)h1 * s4 + (ulong)h2 * s3 + (ulong)h3 * s2 + (ulong)h4 * s1;
            tt1 = (ulong)h0 * r1 + (ulong)h1 * r0 + (ulong)h2 * s4 + (ulong)h3 * s3 + (ulong)h4 * s2;
            tt2 = (ulong)h0 * r2 + (ulong)h1 * r1 + (ulong)h2 * r0 + (ulong)h3 * s4 + (ulong)h4 * s3;
            tt3 = (ulong)h0 * r3 + (ulong)h1 * r2 + (ulong)h2 * r1 + (ulong)h3 * r0 + (ulong)h4 * s4;
            tt4 = (ulong)h0 * r4 + (ulong)h1 * r3 + (ulong)h2 * r2 + (ulong)h3 * r1 + (ulong)h4 * r0;

            unchecked
            {
                h0   = (UInt32)tt0 & 0x3ffffff; c = (tt0 >> 26);
                tt1 += c; h1 = (UInt32)tt1 & 0x3ffffff; b = (UInt32)(tt1 >> 26);
                tt2 += b; h2 = (UInt32)tt2 & 0x3ffffff; b = (UInt32)(tt2 >> 26);
                tt3 += b; h3 = (UInt32)tt3 & 0x3ffffff; b = (UInt32)(tt3 >> 26);
                tt4 += b; h4 = (UInt32)tt4 & 0x3ffffff; b = (UInt32)(tt4 >> 26);
            }
            h0 += b * 5;

            if (mLength >= 16)
            {
                goto poly1305_donna_16bytes;
            }

            /* final bytes */
poly1305_donna_atmost15bytes:
            if (mLength == 0)
            {
                goto poly1305_donna_finish;
            }

            byte[] mp = new byte[16];//todo remove allocation

            for (j = 0; j < mLength; j++)
            {
                mp[j] = m[mStart + j];
            }
            mp[j++] = 1;
            for (; j < 16; j++)
            {
                mp[j] = 0;
            }
            mLength = 0;

            t0 = ByteIntegerConverter.LoadLittleEndian32(mp, 0);
            t1 = ByteIntegerConverter.LoadLittleEndian32(mp, 4);
            t2 = ByteIntegerConverter.LoadLittleEndian32(mp, 8);
            t3 = ByteIntegerConverter.LoadLittleEndian32(mp, 12);
            CryptoBytes.Wipe(mp);

            h0 += t0 & 0x3ffffff;
            h1 += (uint)(((((UInt64)t1 << 32) | t0) >> 26) & 0x3ffffff);
            h2 += (uint)(((((UInt64)t2 << 32) | t1) >> 20) & 0x3ffffff);
            h3 += (uint)(((((UInt64)t3 << 32) | t2) >> 14) & 0x3ffffff);
            h4 += t3 >> 8;

            goto poly1305_donna_mul;

poly1305_donna_finish:
            b   = h0 >> 26; h0 = h0 & 0x3ffffff;
            h1 += b; b = h1 >> 26; h1 = h1 & 0x3ffffff;
            h2 += b; b = h2 >> 26; h2 = h2 & 0x3ffffff;
            h3 += b; b = h3 >> 26; h3 = h3 & 0x3ffffff;
            h4 += b; b = h4 >> 26; h4 = h4 & 0x3ffffff;
            h0 += b * 5;

            g0 = h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff;
            g1 = h1 + b; b = g1 >> 26; g1 &= 0x3ffffff;
            g2 = h2 + b; b = g2 >> 26; g2 &= 0x3ffffff;
            g3 = h3 + b; b = g3 >> 26; g3 &= 0x3ffffff;
            g4 = unchecked (h4 + b - (1 << 26));

            b  = (g4 >> 31) - 1;
            nb = ~b;
            h0 = (h0 & nb) | (g0 & b);
            h1 = (h1 & nb) | (g1 & b);
            h2 = (h2 & nb) | (g2 & b);
            h3 = (h3 & nb) | (g3 & b);
            h4 = (h4 & nb) | (g4 & b);

            f0 = ((h0) | (h1 << 26)) + (UInt64)key.x4;
            f1 = ((h1 >> 6) | (h2 << 20)) + (UInt64)key.x5;
            f2 = ((h2 >> 12) | (h3 << 14)) + (UInt64)key.x6;
            f3 = ((h3 >> 18) | (h4 << 8)) + (UInt64)key.x7;

            unchecked
            {
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 0, (uint)f0); f1 += (f0 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 4, (uint)f1); f2 += (f1 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 8, (uint)f2); f3 += (f2 >> 32);
                ByteIntegerConverter.StoreLittleEndian32(output, outputOffset + 12, (uint)f3);
            }
        }