// ToDo: Only used in tests. Remove?
 public static void Array16LoadLittleEndian32(out Array16 <UInt32> output, byte[] input, int inputOffset)
 {
     output.x0  = LoadLittleEndian32(input, inputOffset + 0);
     output.x1  = LoadLittleEndian32(input, inputOffset + 4);
     output.x2  = LoadLittleEndian32(input, inputOffset + 8);
     output.x3  = LoadLittleEndian32(input, inputOffset + 12);
     output.x4  = LoadLittleEndian32(input, inputOffset + 16);
     output.x5  = LoadLittleEndian32(input, inputOffset + 20);
     output.x6  = LoadLittleEndian32(input, inputOffset + 24);
     output.x7  = LoadLittleEndian32(input, inputOffset + 28);
     output.x8  = LoadLittleEndian32(input, inputOffset + 32);
     output.x9  = LoadLittleEndian32(input, inputOffset + 36);
     output.x10 = LoadLittleEndian32(input, inputOffset + 40);
     output.x11 = LoadLittleEndian32(input, inputOffset + 44);
     output.x12 = LoadLittleEndian32(input, inputOffset + 48);
     output.x13 = LoadLittleEndian32(input, inputOffset + 52);
     output.x14 = LoadLittleEndian32(input, inputOffset + 56);
     output.x15 = LoadLittleEndian32(input, inputOffset + 60);
 }
        /*        public static void Array8LoadLittleEndian32(out Array8<uint> output, byte[] input, int inputOffset, int inputLength)
         *      {
         #if DEBUG
         *          if (inputLength <= 0)
         *              throw new ArgumentException();
         #endif
         *          int inputEnd = inputOffset + inputLength;
         *          UInt32 highestInt;
         *          switch (inputLength & 3)
         *          {
         *              case 1:
         *                  highestInt = input[inputEnd - 1];
         *                  break;
         *              case 2:
         *                  highestInt = (uint)(
         *                      (input[inputEnd - 1] << 8) |
         *                      (input[inputEnd - 2]));
         *                  break;
         *              case 3:
         *                  highestInt = (uint)(
         *                      (input[inputEnd - 1] << 16) |
         *                      (input[inputEnd - 2] << 8) |
         *                      (input[inputEnd - 3]));
         *                  break;
         *              case 0:
         *                  highestInt = (uint)(
         *                      (input[inputEnd - 1] << 24) |
         *                      (input[inputEnd - 2] << 16) |
         *                      (input[inputEnd - 3] << 8) |
         *                      (input[inputEnd - 4]));
         *                  break;
         *              default:
         *                  throw new InvalidOperationException();
         *          }
         *          switch ((inputLength - 1) >> 2)
         *          {
         *              case 7:
         *                  output.x7 = highestInt;
         *                  output.x6 = LoadLittleEndian32(input, inputOffset + 6 * 4);
         *                  output.x5 = LoadLittleEndian32(input, inputOffset + 5 * 4);
         *                  output.x4 = LoadLittleEndian32(input, inputOffset + 4 * 4);
         *                  output.x3 = LoadLittleEndian32(input, inputOffset + 3 * 4);
         *                  output.x2 = LoadLittleEndian32(input, inputOffset + 2 * 4);
         *                  output.x1 = LoadLittleEndian32(input, inputOffset + 1 * 4);
         *                  output.x0 = LoadLittleEndian32(input, inputOffset + 0 * 4);
         *                  return;
         *              case 6:
         *                  output.x7 = 0;
         *                  output.x6 = highestInt;
         *                  output.x5 = LoadLittleEndian32(input, inputOffset + 5 * 4);
         *                  output.x4 = LoadLittleEndian32(input, inputOffset + 4 * 4);
         *                  output.x3 = LoadLittleEndian32(input, inputOffset + 3 * 4);
         *                  output.x2 = LoadLittleEndian32(input, inputOffset + 2 * 4);
         *                  output.x1 = LoadLittleEndian32(input, inputOffset + 1 * 4);
         *                  output.x0 = LoadLittleEndian32(input, inputOffset + 0 * 4);
         *                  return;
         *              case 5:
         *                  output.x7 = 0;
         *                  output.x6 = 0;
         *                  output.x5 = highestInt;
         *                  output.x4 = LoadLittleEndian32(input, inputOffset + 4 * 4);
         *                  output.x3 = LoadLittleEndian32(input, inputOffset + 3 * 4);
         *                  output.x2 = LoadLittleEndian32(input, inputOffset + 2 * 4);
         *                  output.x1 = LoadLittleEndian32(input, inputOffset + 1 * 4);
         *                  output.x0 = LoadLittleEndian32(input, inputOffset + 0 * 4);
         *                  return;
         *              case 4:
         *                  output.x7 = 0;
         *                  output.x6 = 0;
         *                  output.x5 = 0;
         *                  output.x4 = highestInt;
         *                  output.x3 = LoadLittleEndian32(input, inputOffset + 3 * 4);
         *                  output.x2 = LoadLittleEndian32(input, inputOffset + 2 * 4);
         *                  output.x1 = LoadLittleEndian32(input, inputOffset + 1 * 4);
         *                  output.x0 = LoadLittleEndian32(input, inputOffset + 0 * 4);
         *                  return;
         *              case 3:
         *                  output.x7 = 0;
         *                  output.x6 = 0;
         *                  output.x5 = 0;
         *                  output.x4 = 0;
         *                  output.x3 = highestInt;
         *                  output.x2 = LoadLittleEndian32(input, inputOffset + 2 * 4);
         *                  output.x1 = LoadLittleEndian32(input, inputOffset + 1 * 4);
         *                  output.x0 = LoadLittleEndian32(input, inputOffset + 0 * 4);
         *                  return;
         *              case 2:
         *                  output.x7 = 0;
         *                  output.x6 = 0;
         *                  output.x5 = 0;
         *                  output.x4 = 0;
         *                  output.x3 = 0;
         *                  output.x2 = highestInt;
         *                  output.x1 = LoadLittleEndian32(input, inputOffset + 1 * 4);
         *                  output.x0 = LoadLittleEndian32(input, inputOffset + 0 * 4);
         *                  return;
         *              case 1:
         *                  output.x7 = 0;
         *                  output.x6 = 0;
         *                  output.x5 = 0;
         *                  output.x4 = 0;
         *                  output.x3 = 0;
         *                  output.x2 = 0;
         *                  output.x1 = highestInt;
         *                  output.x0 = LoadLittleEndian32(input, inputOffset + 0 * 4);
         *                  return;
         *              case 0:
         *                  output.x7 = 0;
         *                  output.x6 = 0;
         *                  output.x5 = 0;
         *                  output.x4 = 0;
         *                  output.x3 = 0;
         *                  output.x2 = 0;
         *                  output.x1 = 0;
         *                  output.x0 = highestInt;
         *                  return;
         *              default:
         *                  throw new InvalidOperationException();
         *          }
         *      }*/

        /*public static void Array8XorLittleEndian(byte[] output, int outputOffset, byte[] input, int inputOffset, ref Array8<uint> keyStream, int length)
         * {
         #if DEBUG
         *  InternalAssert(length > 0);
         #endif
         *  int outputEnd = outputOffset + length;
         *  UInt32 highestInt;
         *  switch ((length - 1) >> 2)
         *  {
         *      case 7:
         *          highestInt = keyStream.x7;
         *          XorLittleEndian32(output, outputOffset + 6 * 4, input, inputOffset + 6 * 4, keyStream.x6);
         *          XorLittleEndian32(output, outputOffset + 5 * 4, input, inputOffset + 6 * 4, keyStream.x5);
         *          XorLittleEndian32(output, outputOffset + 4 * 4, input, inputOffset + 6 * 4, keyStream.x4);
         *          XorLittleEndian32(output, outputOffset + 3 * 4, input, inputOffset + 6 * 4, keyStream.x3);
         *          XorLittleEndian32(output, outputOffset + 2 * 4, input, inputOffset + 6 * 4, keyStream.x2);
         *          XorLittleEndian32(output, outputOffset + 1 * 4, input, inputOffset + 6 * 4, keyStream.x1);
         *          XorLittleEndian32(output, outputOffset + 0 * 4, input, inputOffset + 6 * 4, keyStream.x0);
         *          break;
         *      case 6:
         *          highestInt = keyStream.x6;
         *          XorLittleEndian32(output, outputOffset + 5 * 4, input, inputOffset + 6 * 4, keyStream.x5);
         *          XorLittleEndian32(output, outputOffset + 4 * 4, input, inputOffset + 6 * 4, keyStream.x4);
         *          XorLittleEndian32(output, outputOffset + 3 * 4, input, inputOffset + 6 * 4, keyStream.x3);
         *          XorLittleEndian32(output, outputOffset + 2 * 4, input, inputOffset + 6 * 4, keyStream.x2);
         *          XorLittleEndian32(output, outputOffset + 1 * 4, input, inputOffset + 6 * 4, keyStream.x1);
         *          XorLittleEndian32(output, outputOffset + 0 * 4, input, inputOffset + 6 * 4, keyStream.x0);
         *          break;
         *      case 5:
         *          highestInt = keyStream.x5;
         *          XorLittleEndian32(output, outputOffset + 4 * 4, input, inputOffset + 6 * 4, keyStream.x4);
         *          XorLittleEndian32(output, outputOffset + 3 * 4, input, inputOffset + 6 * 4, keyStream.x3);
         *          XorLittleEndian32(output, outputOffset + 2 * 4, input, inputOffset + 6 * 4, keyStream.x2);
         *          XorLittleEndian32(output, outputOffset + 1 * 4, input, inputOffset + 6 * 4, keyStream.x1);
         *          XorLittleEndian32(output, outputOffset + 0 * 4, input, inputOffset + 6 * 4, keyStream.x0);
         *          break;
         *      case 4:
         *          highestInt = keyStream.x4;
         *          XorLittleEndian32(output, outputOffset + 3 * 4, input, inputOffset + 6 * 4, keyStream.x3);
         *          XorLittleEndian32(output, outputOffset + 2 * 4, input, inputOffset + 6 * 4, keyStream.x2);
         *          XorLittleEndian32(output, outputOffset + 1 * 4, input, inputOffset + 6 * 4, keyStream.x1);
         *          XorLittleEndian32(output, outputOffset + 0 * 4, input, inputOffset + 6 * 4, keyStream.x0);
         *          break;
         *      case 3:
         *          highestInt = keyStream.x3;
         *          XorLittleEndian32(output, outputOffset + 2 * 4, input, inputOffset + 6 * 4, keyStream.x2);
         *          XorLittleEndian32(output, outputOffset + 1 * 4, input, inputOffset + 6 * 4, keyStream.x1);
         *          XorLittleEndian32(output, outputOffset + 0 * 4, input, inputOffset + 6 * 4, keyStream.x0);
         *          break;
         *      case 2:
         *          highestInt = keyStream.x2;
         *          XorLittleEndian32(output, outputOffset + 1 * 4, input, inputOffset + 6 * 4, keyStream.x1);
         *          XorLittleEndian32(output, outputOffset + 0 * 4, input, inputOffset + 6 * 4, keyStream.x0);
         *          break;
         *      case 1:
         *          highestInt = keyStream.x1;
         *          XorLittleEndian32(output, outputOffset + 0 * 4, input, inputOffset + 6 * 4, keyStream.x0);
         *          break;
         *      case 0:
         *          highestInt = keyStream.x0;
         *          break;
         *      default:
         *          throw new InvalidOperationException();
         *  }
         *  switch (length & 3)
         *  {
         *      case 1:
         *          output[outputEnd - 1] ^= (byte)highestInt;
         *          break;
         *      case 2:
         *          output[outputEnd - 1] ^= (byte)(highestInt >> 8);
         *          output[outputEnd - 2] ^= (byte)highestInt;
         *          break;
         *      case 3:
         *          output[outputEnd - 1] ^= (byte)(highestInt >> 16);
         *          output[outputEnd - 2] ^= (byte)(highestInt >> 8);
         *          output[outputEnd - 3] ^= (byte)highestInt;
         *          break;
         *      case 0:
         *          output[outputEnd - 1] ^= (byte)(highestInt >> 24);
         *          output[outputEnd - 2] ^= (byte)(highestInt >> 16);
         *          output[outputEnd - 3] ^= (byte)(highestInt >> 8);
         *          output[outputEnd - 4] ^= (byte)highestInt;
         *          break;
         *      default:
         *          throw new InvalidOperationException();
         *  }
         * }*/

        /*public static void Array8StoreLittleEndian32(byte[] output, int outputOffset, ref Array8<uint> input)
         * {
         *  StoreLittleEndian32(output, outputOffset + 0, input.x0);
         *  StoreLittleEndian32(output, outputOffset + 4, input.x1);
         *  StoreLittleEndian32(output, outputOffset + 8, input.x2);
         *  StoreLittleEndian32(output, outputOffset + 12, input.x3);
         *  StoreLittleEndian32(output, outputOffset + 16, input.x4);
         *  StoreLittleEndian32(output, outputOffset + 20, input.x5);
         *  StoreLittleEndian32(output, outputOffset + 24, input.x6);
         *  StoreLittleEndian32(output, outputOffset + 28, input.x7);
         * }*/
        #endregion

        public static void Array16LoadBigEndian64(out Array16 <UInt64> output, byte[] input, int inputOffset)
        {
            output.x0  = LoadBigEndian64(input, inputOffset + 0);
            output.x1  = LoadBigEndian64(input, inputOffset + 8);
            output.x2  = LoadBigEndian64(input, inputOffset + 16);
            output.x3  = LoadBigEndian64(input, inputOffset + 24);
            output.x4  = LoadBigEndian64(input, inputOffset + 32);
            output.x5  = LoadBigEndian64(input, inputOffset + 40);
            output.x6  = LoadBigEndian64(input, inputOffset + 48);
            output.x7  = LoadBigEndian64(input, inputOffset + 56);
            output.x8  = LoadBigEndian64(input, inputOffset + 64);
            output.x9  = LoadBigEndian64(input, inputOffset + 72);
            output.x10 = LoadBigEndian64(input, inputOffset + 80);
            output.x11 = LoadBigEndian64(input, inputOffset + 88);
            output.x12 = LoadBigEndian64(input, inputOffset + 96);
            output.x13 = LoadBigEndian64(input, inputOffset + 104);
            output.x14 = LoadBigEndian64(input, inputOffset + 112);
            output.x15 = LoadBigEndian64(input, inputOffset + 120);
        }
        internal static void Core(out Array8 <UInt64> outputState, ref Array8 <UInt64> inputState, ref Array16 <UInt64> input)
        {
            unchecked
            {
                UInt64 a = inputState.x0;
                UInt64 b = inputState.x1;
                UInt64 c = inputState.x2;
                UInt64 d = inputState.x3;
                UInt64 e = inputState.x4;
                UInt64 f = inputState.x5;
                UInt64 g = inputState.x6;
                UInt64 h = inputState.x7;

                UInt64 w0  = input.x0;
                UInt64 w1  = input.x1;
                UInt64 w2  = input.x2;
                UInt64 w3  = input.x3;
                UInt64 w4  = input.x4;
                UInt64 w5  = input.x5;
                UInt64 w6  = input.x6;
                UInt64 w7  = input.x7;
                UInt64 w8  = input.x8;
                UInt64 w9  = input.x9;
                UInt64 w10 = input.x10;
                UInt64 w11 = input.x11;
                UInt64 w12 = input.x12;
                UInt64 w13 = input.x13;
                UInt64 w14 = input.x14;
                UInt64 w15 = input.x15;

                int t = 0;
                while (true)
                {
                    ulong t1, t2;

                    {//0
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w0;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//1
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w1;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//2
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w2;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//3
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w3;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//4
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w4;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//5
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w5;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//6
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w6;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//7
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w7;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//8
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w8;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//9
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w9;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//10
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w10;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//11
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w11;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//12
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w12;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//13
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w13;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//14
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w14;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    {//15
                        t1 = h +
                             ((e >> 14) ^ (e << (64 - 14)) ^ (e >> 18) ^ (e << (64 - 18)) ^ (e >> 41) ^ (e << (64 - 41))) +
                             //Sigma1(e)
                             ((e & f) ^ (~e & g)) + //Ch(e,f,g)
                             K[t] + w15;
                        t2 = ((a >> 28) ^ (a << (64 - 28)) ^ (a >> 34) ^ (a << (64 - 34)) ^ (a >> 39) ^ (a << (64 - 39))) +
                             //Sigma0(a)
                             ((a & b) ^ (a & c) ^ (b & c)); //Maj(a,b,c)
                        h = g;
                        g = f;
                        f = e;
                        e = d + t1;
                        d = c;
                        c = b;
                        b = a;
                        a = t1 + t2;
                        t++;
                    }
                    if (t == 80)
                    {
                        break;
                    }

                    w0 += ((w14 >> 19) ^ (w14 << (64 - 19)) ^ (w14 >> 61) ^ (w14 << (64 - 61)) ^ (w14 >> 6)) +
                          w9 +
                          ((w1 >> 1) ^ (w1 << (64 - 1)) ^ (w1 >> 8) ^ (w1 << (64 - 8)) ^ (w1 >> 7));
                    w1 += ((w15 >> 19) ^ (w15 << (64 - 19)) ^ (w15 >> 61) ^ (w15 << (64 - 61)) ^ (w15 >> 6)) +
                          w10 +
                          ((w2 >> 1) ^ (w2 << (64 - 1)) ^ (w2 >> 8) ^ (w2 << (64 - 8)) ^ (w2 >> 7));
                    w2 += ((w0 >> 19) ^ (w0 << (64 - 19)) ^ (w0 >> 61) ^ (w0 << (64 - 61)) ^ (w0 >> 6)) +
                          w11 +
                          ((w3 >> 1) ^ (w3 << (64 - 1)) ^ (w3 >> 8) ^ (w3 << (64 - 8)) ^ (w3 >> 7));
                    w3 += ((w1 >> 19) ^ (w1 << (64 - 19)) ^ (w1 >> 61) ^ (w1 << (64 - 61)) ^ (w1 >> 6)) +
                          w12 +
                          ((w4 >> 1) ^ (w4 << (64 - 1)) ^ (w4 >> 8) ^ (w4 << (64 - 8)) ^ (w4 >> 7));
                    w4 += ((w2 >> 19) ^ (w2 << (64 - 19)) ^ (w2 >> 61) ^ (w2 << (64 - 61)) ^ (w2 >> 6)) +
                          w13 +
                          ((w5 >> 1) ^ (w5 << (64 - 1)) ^ (w5 >> 8) ^ (w5 << (64 - 8)) ^ (w5 >> 7));
                    w5 += ((w3 >> 19) ^ (w3 << (64 - 19)) ^ (w3 >> 61) ^ (w3 << (64 - 61)) ^ (w3 >> 6)) +
                          w14 +
                          ((w6 >> 1) ^ (w6 << (64 - 1)) ^ (w6 >> 8) ^ (w6 << (64 - 8)) ^ (w6 >> 7));
                    w6 += ((w4 >> 19) ^ (w4 << (64 - 19)) ^ (w4 >> 61) ^ (w4 << (64 - 61)) ^ (w4 >> 6)) +
                          w15 +
                          ((w7 >> 1) ^ (w7 << (64 - 1)) ^ (w7 >> 8) ^ (w7 << (64 - 8)) ^ (w7 >> 7));
                    w7 += ((w5 >> 19) ^ (w5 << (64 - 19)) ^ (w5 >> 61) ^ (w5 << (64 - 61)) ^ (w5 >> 6)) +
                          w0 +
                          ((w8 >> 1) ^ (w8 << (64 - 1)) ^ (w8 >> 8) ^ (w8 << (64 - 8)) ^ (w8 >> 7));
                    w8 += ((w6 >> 19) ^ (w6 << (64 - 19)) ^ (w6 >> 61) ^ (w6 << (64 - 61)) ^ (w6 >> 6)) +
                          w1 +
                          ((w9 >> 1) ^ (w9 << (64 - 1)) ^ (w9 >> 8) ^ (w9 << (64 - 8)) ^ (w9 >> 7));
                    w9 += ((w7 >> 19) ^ (w7 << (64 - 19)) ^ (w7 >> 61) ^ (w7 << (64 - 61)) ^ (w7 >> 6)) +
                          w2 +
                          ((w10 >> 1) ^ (w10 << (64 - 1)) ^ (w10 >> 8) ^ (w10 << (64 - 8)) ^ (w10 >> 7));
                    w10 += ((w8 >> 19) ^ (w8 << (64 - 19)) ^ (w8 >> 61) ^ (w8 << (64 - 61)) ^ (w8 >> 6)) +
                           w3 +
                           ((w11 >> 1) ^ (w11 << (64 - 1)) ^ (w11 >> 8) ^ (w11 << (64 - 8)) ^ (w11 >> 7));
                    w11 += ((w9 >> 19) ^ (w9 << (64 - 19)) ^ (w9 >> 61) ^ (w9 << (64 - 61)) ^ (w9 >> 6)) +
                           w4 +
                           ((w12 >> 1) ^ (w12 << (64 - 1)) ^ (w12 >> 8) ^ (w12 << (64 - 8)) ^ (w12 >> 7));
                    w12 += ((w10 >> 19) ^ (w10 << (64 - 19)) ^ (w10 >> 61) ^ (w10 << (64 - 61)) ^ (w10 >> 6)) +
                           w5 +
                           ((w13 >> 1) ^ (w13 << (64 - 1)) ^ (w13 >> 8) ^ (w13 << (64 - 8)) ^ (w13 >> 7));
                    w13 += ((w11 >> 19) ^ (w11 << (64 - 19)) ^ (w11 >> 61) ^ (w11 << (64 - 61)) ^ (w11 >> 6)) +
                           w6 +
                           ((w14 >> 1) ^ (w14 << (64 - 1)) ^ (w14 >> 8) ^ (w14 << (64 - 8)) ^ (w14 >> 7));
                    w14 += ((w12 >> 19) ^ (w12 << (64 - 19)) ^ (w12 >> 61) ^ (w12 << (64 - 61)) ^ (w12 >> 6)) +
                           w7 +
                           ((w15 >> 1) ^ (w15 << (64 - 1)) ^ (w15 >> 8) ^ (w15 << (64 - 8)) ^ (w15 >> 7));
                    w15 += ((w13 >> 19) ^ (w13 << (64 - 19)) ^ (w13 >> 61) ^ (w13 << (64 - 61)) ^ (w13 >> 6)) +
                           w8 +
                           ((w0 >> 1) ^ (w0 << (64 - 1)) ^ (w0 >> 8) ^ (w0 << (64 - 8)) ^ (w0 >> 7));
                }

                outputState.x0 = inputState.x0 + a;
                outputState.x1 = inputState.x1 + b;
                outputState.x2 = inputState.x2 + c;
                outputState.x3 = inputState.x3 + d;
                outputState.x4 = inputState.x4 + e;
                outputState.x5 = inputState.x5 + f;
                outputState.x6 = inputState.x6 + g;
                outputState.x7 = inputState.x7 + h;
            }
        }
        /*public static void Array16LoadLittleEndian32(out Array16<UInt32> output, byte[] input, int inputOffset, int inputLength)
         * {
         *  Array8<UInt32> temp;
         *  if (inputLength > 32)
         *  {
         *      output.x0 = LoadLittleEndian32(input, inputOffset + 0);
         *      output.x1 = LoadLittleEndian32(input, inputOffset + 4);
         *      output.x2 = LoadLittleEndian32(input, inputOffset + 8);
         *      output.x3 = LoadLittleEndian32(input, inputOffset + 12);
         *      output.x4 = LoadLittleEndian32(input, inputOffset + 16);
         *      output.x5 = LoadLittleEndian32(input, inputOffset + 20);
         *      output.x6 = LoadLittleEndian32(input, inputOffset + 24);
         *      output.x7 = LoadLittleEndian32(input, inputOffset + 28);
         *      Array8LoadLittleEndian32(out temp, input, inputOffset + 32, inputLength - 32);
         *      output.x8 = temp.x0;
         *      output.x9 = temp.x1;
         *      output.x10 = temp.x2;
         *      output.x11 = temp.x3;
         *      output.x12 = temp.x4;
         *      output.x13 = temp.x5;
         *      output.x14 = temp.x6;
         *      output.x15 = temp.x7;
         *  }
         *  else
         *  {
         *      Array8LoadLittleEndian32(out temp, input, inputOffset, inputLength);
         *      output.x0 = temp.x0;
         *      output.x1 = temp.x1;
         *      output.x2 = temp.x2;
         *      output.x3 = temp.x3;
         *      output.x4 = temp.x4;
         *      output.x5 = temp.x5;
         *      output.x6 = temp.x6;
         *      output.x7 = temp.x7;
         *      output.x8 = 0;
         *      output.x9 = 0;
         *      output.x10 = 0;
         *      output.x11 = 0;
         *      output.x12 = 0;
         *      output.x13 = 0;
         *      output.x14 = 0;
         *      output.x15 = 0;
         *  }
         * }*/

        public static void Array16StoreLittleEndian32(byte[] output, int outputOffset, ref Array16 <UInt32> input)
        {
            StoreLittleEndian32(output, outputOffset + 0, input.x0);
            StoreLittleEndian32(output, outputOffset + 4, input.x1);
            StoreLittleEndian32(output, outputOffset + 8, input.x2);
            StoreLittleEndian32(output, outputOffset + 12, input.x3);
            StoreLittleEndian32(output, outputOffset + 16, input.x4);
            StoreLittleEndian32(output, outputOffset + 20, input.x5);
            StoreLittleEndian32(output, outputOffset + 24, input.x6);
            StoreLittleEndian32(output, outputOffset + 28, input.x7);
            StoreLittleEndian32(output, outputOffset + 32, input.x8);
            StoreLittleEndian32(output, outputOffset + 36, input.x9);
            StoreLittleEndian32(output, outputOffset + 40, input.x10);
            StoreLittleEndian32(output, outputOffset + 44, input.x11);
            StoreLittleEndian32(output, outputOffset + 48, input.x12);
            StoreLittleEndian32(output, outputOffset + 52, input.x13);
            StoreLittleEndian32(output, outputOffset + 56, input.x14);
            StoreLittleEndian32(output, outputOffset + 60, input.x15);
        }