unsafe private static void compress(Blake2bContext *s, byte *input) { ulong *m = (ulong *)input; #if FAST_SPAN if (!BitConverter.IsLittleEndian) { var span = new ReadOnlySpan <byte>(input, BlockBytes); m = (ulong *)s->b; for (int i = 0; i < BlockWords; i++) { m[i] = BinaryPrimitives.ReadUInt64LittleEndian(span.Slice(i * WordSize, WordSize)); } } #endif #if USE_INTRINSICS #if USE_AVX2 if (Avx2.IsSupported) { mixAvx2(s, m); } else #endif if (Sse41.IsSupported) { mixSse41(s, m); } else #endif mixScalar(s, m); }
unsafe private static void compress(Blake2bContext *s, byte *input) { ulong *m = (ulong *)input; #if NETCOREAPP3_0 if (false && Sse41.IsSupported) { mixSse41(s, m); } else #endif { mixScalar(s, m); } }
private static void mixSimplified(Blake2bContext *s, ulong *m) { ulong m00 = m[00]; ulong m01 = m[01]; ulong m02 = m[02]; ulong m03 = m[03]; ulong m04 = m[04]; ulong m05 = m[05]; ulong m06 = m[06]; ulong m07 = m[07]; ulong m08 = m[08]; ulong m09 = m[09]; ulong m10 = m[10]; ulong m11 = m[11]; ulong m12 = m[12]; ulong m13 = m[13]; ulong m14 = m[14]; ulong m15 = m[15]; ulong v00 = s->h[0]; ulong v01 = s->h[1]; ulong v02 = s->h[2]; ulong v03 = s->h[3]; ulong v04 = s->h[4]; ulong v05 = s->h[5]; ulong v06 = s->h[6]; ulong v07 = s->h[7]; ulong v08 = 0x6A09E667F3BCC908ul; ulong v09 = 0xBB67AE8584CAA73Bul; ulong v10 = 0x3C6EF372FE94F82Bul; ulong v11 = 0xA54FF53A5F1D36F1ul; ulong v12 = 0x510E527FADE682D1ul; ulong v13 = 0x9B05688C2B3E6C1Ful; ulong v14 = 0x1F83D9ABFB41BD6Bul; ulong v15 = 0x5BE0CD19137E2179ul; v12 ^= s->t[0]; v13 ^= s->t[1]; v14 ^= s->f[0]; //ROUND 1 (first half) v00 += m00; v00 += v04; v12 ^= v00; v08 += v12; v04 ^= v08; v01 += m02; v01 += v05; v13 ^= v01; v09 += v13; v05 ^= v09; v02 += m04; v02 += v06; v14 ^= v02; v10 += v14; v06 ^= v10; v03 += m06; v03 += v07; v15 ^= v03; v11 += v15; v07 ^= v11; v00 += m08; v00 += v05; v15 ^= v00; v10 += v15; v05 ^= v10; v01 += m10; v01 += v06; v12 ^= v01; v11 += v12; v06 ^= v11; v02 += m12; v02 += v07; v13 ^= v02; v08 += v13; v07 ^= v08; v03 += m14; v03 += v04; v14 ^= v03; v09 += v14; v04 ^= v09; s->h[0] ^= v00 ^ v08; s->h[1] ^= v01 ^ v09; s->h[2] ^= v02 ^ v10; s->h[3] ^= v03 ^ v11; s->h[4] ^= v04 ^ v12; s->h[5] ^= v05 ^ v13; s->h[6] ^= v06 ^ v14; s->h[7] ^= v07 ^ v15; }
private void compress(Blake2bContext *s, byte *data) { ulong *m = (ulong *)data; mixSimplified(s, m); }
unsafe private static void mixSse41(Blake2bContext *s, ulong *m) { var hptr = s->htf.h; var row1l = Unsafe.As <ulong, Vector128 <ulong> >(ref hptr[0]); var row1h = Unsafe.As <ulong, Vector128 <ulong> >(ref hptr[2]); var row2l = Unsafe.As <ulong, Vector128 <ulong> >(ref hptr[4]); var row2h = Unsafe.As <ulong, Vector128 <ulong> >(ref hptr[6]); var row3l = Unsafe.As <ulong, Vector128 <ulong> >(ref V.iv[0]); var row3h = Unsafe.As <ulong, Vector128 <ulong> >(ref V.iv[2]); var row4l = Unsafe.As <ulong, Vector128 <ulong> >(ref V.iv[4]); var row4h = Unsafe.As <ulong, Vector128 <ulong> >(ref V.iv[6]); row4l = Sse2.Xor(row4l, Sse2.LoadVector128(s->htf.t)); row4h = Sse2.Xor(row4h, Sse2.LoadVector128(s->htf.f)); //ROUND 1 var m0 = Sse2.LoadVector128(m); var m1 = Sse2.LoadVector128(m + 2); var m2 = Sse2.LoadVector128(m + 4); var m3 = Sse2.LoadVector128(m + 6); var b0 = Sse2.UnpackLow(m0, m1); var b1 = Sse2.UnpackLow(m2, m3); var r16 = Sse2.LoadVector128((sbyte *)V.rm); var r24 = Sse2.LoadVector128((sbyte *)V.rm + 16); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m0, m1); b1 = Sse2.UnpackHigh(m2, m3); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); var m4 = Sse2.LoadVector128(m + 8); var m5 = Sse2.LoadVector128(m + 10); var m6 = Sse2.LoadVector128(m + 12); var m7 = Sse2.LoadVector128(m + 14); b0 = Sse2.UnpackLow(m4, m5); b1 = Sse2.UnpackLow(m6, m7); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m4, m5); b1 = Sse2.UnpackHigh(m6, m7); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 2 b0 = Sse2.UnpackLow(m7, m2); b1 = Sse2.UnpackHigh(m4, m6); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m5, m4); b1 = alignr_ulong(ref m3, ref m7, 8); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = shuffle_ulong(ref m0, 0b_01_00_11_10); b1 = Sse2.UnpackHigh(m5, m2); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m6, m1); b1 = Sse2.UnpackHigh(m3, m1); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 3 b0 = alignr_ulong(ref m6, ref m5, 8); b1 = Sse2.UnpackHigh(m2, m7); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m4, m0); b1 = blend_ulong(ref m1, ref m6, 0b_1111_0000); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = blend_ulong(ref m5, ref m1, 0b_1111_0000); b1 = Sse2.UnpackHigh(m3, m4); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m7, m3); b1 = alignr_ulong(ref m2, ref m0, 8); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 4 b0 = Sse2.UnpackHigh(m3, m1); b1 = Sse2.UnpackHigh(m6, m5); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m4, m0); b1 = Sse2.UnpackLow(m6, m7); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = blend_ulong(ref m1, ref m2, 0b_1111_0000); b1 = blend_ulong(ref m2, ref m7, 0b_1111_0000); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m3, m5); b1 = Sse2.UnpackLow(m0, m4); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 5 b0 = Sse2.UnpackHigh(m4, m2); b1 = Sse2.UnpackLow(m1, m5); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = blend_ulong(ref m0, ref m3, 0b_1111_0000); b1 = blend_ulong(ref m2, ref m7, 0b_1111_0000); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = blend_ulong(ref m7, ref m5, 0b_1111_0000); b1 = blend_ulong(ref m3, ref m1, 0b_1111_0000); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = alignr_ulong(ref m6, ref m0, 8); b1 = blend_ulong(ref m4, ref m6, 0b_1111_0000); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 6 b0 = Sse2.UnpackLow(m1, m3); b1 = Sse2.UnpackLow(m0, m4); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m6, m5); b1 = Sse2.UnpackHigh(m5, m1); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = blend_ulong(ref m2, ref m3, 0b_1111_0000); b1 = Sse2.UnpackHigh(m7, m0); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m6, m2); b1 = blend_ulong(ref m7, ref m4, 0b_1111_0000); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 7 b0 = blend_ulong(ref m6, ref m0, 0b_1111_0000); b1 = Sse2.UnpackLow(m7, m2); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m2, m7); b1 = alignr_ulong(ref m5, ref m6, 8); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = Sse2.UnpackLow(m0, m3); b1 = shuffle_ulong(ref m4, 0b_01_00_11_10); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m3, m1); b1 = blend_ulong(ref m1, ref m5, 0b_1111_0000); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 8 b0 = Sse2.UnpackHigh(m6, m3); b1 = blend_ulong(ref m6, ref m1, 0b_1111_0000); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = alignr_ulong(ref m7, ref m5, 8); b1 = Sse2.UnpackHigh(m0, m4); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = Sse2.UnpackHigh(m2, m7); b1 = Sse2.UnpackLow(m4, m1); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m0, m2); b1 = Sse2.UnpackLow(m3, m5); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 9 b0 = Sse2.UnpackLow(m3, m7); b1 = alignr_ulong(ref m0, ref m5, 8); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m7, m4); b1 = alignr_ulong(ref m4, ref m1, 8); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = m6; b1 = alignr_ulong(ref m5, ref m0, 8); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = blend_ulong(ref m1, ref m3, 0b_1111_0000); b1 = m2; g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 10 b0 = Sse2.UnpackLow(m5, m4); b1 = Sse2.UnpackHigh(m3, m0); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m1, m2); b1 = blend_ulong(ref m3, ref m2, 0b_1111_0000); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = Sse2.UnpackHigh(m7, m4); b1 = Sse2.UnpackHigh(m1, m6); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = alignr_ulong(ref m7, ref m5, 8); b1 = Sse2.UnpackLow(m6, m0); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 11 b0 = Sse2.UnpackLow(m0, m1); b1 = Sse2.UnpackLow(m2, m3); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m0, m1); b1 = Sse2.UnpackHigh(m2, m3); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = Sse2.UnpackLow(m4, m5); b1 = Sse2.UnpackLow(m6, m7); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackHigh(m4, m5); b1 = Sse2.UnpackHigh(m6, m7); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); //ROUND 12 b0 = Sse2.UnpackLow(m7, m2); b1 = Sse2.UnpackHigh(m4, m6); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m5, m4); b1 = alignr_ulong(ref m3, ref m7, 8); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); diagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); b0 = shuffle_ulong(ref m0, 0b_01_00_11_10); b1 = Sse2.UnpackHigh(m5, m2); g1(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r24); b0 = Sse2.UnpackLow(m6, m1); b1 = Sse2.UnpackHigh(m3, m1); g2(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0, ref b1, ref r16); undiagonalize(ref row1l, ref row2l, ref row3l, ref row4l, ref row1h, ref row2h, ref row3h, ref row4h, ref b0); row1l = Sse2.Xor(row1l, row3l); row1h = Sse2.Xor(row1h, row3h); row1l = Sse2.Xor(row1l, Sse2.LoadVector128(hptr)); row1h = Sse2.Xor(row1h, Sse2.LoadVector128(hptr + 2)); Sse2.Store(hptr, row1l); Sse2.Store(hptr + 2, row1h); row2l = Sse2.Xor(row2l, row4l); row2h = Sse2.Xor(row2h, row4h); row2l = Sse2.Xor(row2l, Sse2.LoadVector128(hptr + 4)); row2h = Sse2.Xor(row2h, Sse2.LoadVector128(hptr + 6)); Sse2.Store(hptr + 4, row2l); Sse2.Store(hptr + 6, row2h); }
unsafe private static void mixAvx2(Blake2bContext *s, ulong *m) { var row1 = Avx.LoadVector256(s->h); var row2 = Avx.LoadVector256(s->h + 4); var row3 = v256iv0; var row4 = v256iv1; row4 = Avx2.Xor(row4, Avx.LoadVector256(s->t)); // reads into f[] as well //ROUND 1 var m0 = Avx2.BroadcastVector128ToVector256(m); var m1 = Avx2.BroadcastVector128ToVector256(m + 2); var m2 = Avx2.BroadcastVector128ToVector256(m + 4); var m3 = Avx2.BroadcastVector128ToVector256(m + 6); var r24 = v256rm0; var r16 = v256rm1; var t0 = Avx2.UnpackLow(m0, m1); var t1 = Avx2.UnpackLow(m2, m3); var b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m0, m1); t1 = Avx2.UnpackHigh(m2, m3); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); var m4 = Avx2.BroadcastVector128ToVector256(m + 8); var m5 = Avx2.BroadcastVector128ToVector256(m + 10); var m6 = Avx2.BroadcastVector128ToVector256(m + 12); var m7 = Avx2.BroadcastVector128ToVector256(m + 14); t0 = Avx2.UnpackLow(m4, m5); t1 = Avx2.UnpackLow(m6, m7); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m4, m5); t1 = Avx2.UnpackHigh(m6, m7); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 2 t0 = Avx2.UnpackLow(m7, m2); t1 = Avx2.UnpackHigh(m4, m6); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m5, m4); t1 = Avx2.AlignRight(m3, m7, 8); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.Shuffle(m0.AsUInt32(), 0b_01_00_11_10).AsUInt64(); t1 = Avx2.UnpackHigh(m5, m2); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m6, m1); t1 = Avx2.UnpackHigh(m3, m1); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 3 t0 = Avx2.AlignRight(m6, m5, 8); t1 = Avx2.UnpackHigh(m2, m7); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m4, m0); t1 = Avx2.Blend(m1.AsUInt32(), m6.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.Blend(m5.AsUInt32(), m1.AsUInt32(), 0b_1100_1100).AsUInt64(); t1 = Avx2.UnpackHigh(m3, m4); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m7, m3); t1 = Avx2.AlignRight(m2, m0, 8); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 4 t0 = Avx2.UnpackHigh(m3, m1); t1 = Avx2.UnpackHigh(m6, m5); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m4, m0); t1 = Avx2.UnpackLow(m6, m7); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.Blend(m1.AsUInt32(), m2.AsUInt32(), 0b_1100_1100).AsUInt64(); t1 = Avx2.Blend(m2.AsUInt32(), m7.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m3, m5); t1 = Avx2.UnpackLow(m0, m4); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 5 t0 = Avx2.UnpackHigh(m4, m2); t1 = Avx2.UnpackLow(m1, m5); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.Blend(m0.AsUInt32(), m3.AsUInt32(), 0b_1100_1100).AsUInt64(); t1 = Avx2.Blend(m2.AsUInt32(), m7.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.Blend(m7.AsUInt32(), m5.AsUInt32(), 0b_1100_1100).AsUInt64(); t1 = Avx2.Blend(m3.AsUInt32(), m1.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.AlignRight(m6, m0, 8); t1 = Avx2.Blend(m4.AsUInt32(), m6.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 6 t0 = Avx2.UnpackLow(m1, m3); t1 = Avx2.UnpackLow(m0, m4); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m6, m5); t1 = Avx2.UnpackHigh(m5, m1); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.Blend(m2.AsUInt32(), m3.AsUInt32(), 0b_1100_1100).AsUInt64(); t1 = Avx2.UnpackHigh(m7, m0); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m6, m2); t1 = Avx2.Blend(m7.AsUInt32(), m4.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 7 t0 = Avx2.Blend(m6.AsUInt32(), m0.AsUInt32(), 0b_1100_1100).AsUInt64(); t1 = Avx2.UnpackLow(m7, m2); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m2, m7); t1 = Avx2.AlignRight(m5, m6, 8); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.UnpackLow(m0, m3); t1 = Avx2.Shuffle(m4.AsUInt32(), 0b_01_00_11_10).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m3, m1); t1 = Avx2.Blend(m1.AsUInt32(), m5.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 8 t0 = Avx2.UnpackHigh(m6, m3); t1 = Avx2.Blend(m6.AsUInt32(), m1.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.AlignRight(m7, m5, 8); t1 = Avx2.UnpackHigh(m0, m4); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.UnpackHigh(m2, m7); t1 = Avx2.UnpackLow(m4, m1); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m0, m2); t1 = Avx2.UnpackLow(m3, m5); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 9 t0 = Avx2.UnpackLow(m3, m7); t1 = Avx2.AlignRight(m0, m5, 8); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m7, m4); t1 = Avx2.AlignRight(m4, m1, 8); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = m6; t1 = Avx2.AlignRight(m5, m0, 8); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.Blend(m1.AsUInt32(), m3.AsUInt32(), 0b_1100_1100).AsUInt64(); t1 = m2; b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 10 t0 = Avx2.UnpackLow(m5, m4); t1 = Avx2.UnpackHigh(m3, m0); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m1, m2); t1 = Avx2.Blend(m3.AsUInt32(), m2.AsUInt32(), 0b_1100_1100).AsUInt64(); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.UnpackHigh(m7, m4); t1 = Avx2.UnpackHigh(m1, m6); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.AlignRight(m7, m5, 8); t1 = Avx2.UnpackLow(m6, m0); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 11 t0 = Avx2.UnpackLow(m0, m1); t1 = Avx2.UnpackLow(m2, m3); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m0, m1); t1 = Avx2.UnpackHigh(m2, m3); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.UnpackLow(m4, m5); t1 = Avx2.UnpackLow(m6, m7); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackHigh(m4, m5); t1 = Avx2.UnpackHigh(m6, m7); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); //ROUND 12 t0 = Avx2.UnpackLow(m7, m2); t1 = Avx2.UnpackHigh(m4, m6); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m5, m4); t1 = Avx2.AlignRight(m3, m7, 8); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //DIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_10_01_00_11); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_00_11_10_01); t0 = Avx2.Shuffle(m0.AsUInt32(), 0b_01_00_11_10).AsUInt64(); t1 = Avx2.UnpackHigh(m5, m2); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G1 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsUInt32(), 0b_10_11_00_01).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Shuffle(row2.AsSByte(), r24).AsUInt64(); t0 = Avx2.UnpackLow(m6, m1); t1 = Avx2.UnpackHigh(m3, m1); b0 = Avx2.Blend(t0.AsUInt32(), t1.AsUInt32(), 0b_1111_0000).AsUInt64(); //G2 row1 = Avx2.Add(Avx2.Add(row1, b0), row2); row4 = Avx2.Xor(row4, row1); row4 = Avx2.Shuffle(row4.AsSByte(), r16).AsUInt64(); row3 = Avx2.Add(row3, row4); row2 = Avx2.Xor(row2, row3); row2 = Avx2.Xor(Avx2.ShiftRightLogical(row2, 63), Avx2.Add(row2, row2)); //UNDIAGONALIZE row4 = Avx2.Permute4x64(row4, 0b_00_11_10_01); row3 = Avx2.Permute4x64(row3, 0b_01_00_11_10); row2 = Avx2.Permute4x64(row2, 0b_10_01_00_11); row1 = Avx2.Xor(row1, row3); row2 = Avx2.Xor(row2, row4); row1 = Avx2.Xor(row1, Avx2.LoadVector256(s->h)); row2 = Avx2.Xor(row2, Avx2.LoadVector256(s->h + 4)); Avx2.Store(s->h, row1); Avx2.Store(s->h + 4, row2); }
unsafe private static void mixScalar(Blake2bContext *s, ulong *m) { unchecked { ulong m00 = m[00]; ulong m01 = m[01]; ulong m02 = m[02]; ulong m03 = m[03]; ulong m04 = m[04]; ulong m05 = m[05]; ulong m06 = m[06]; ulong m07 = m[07]; ulong m08 = m[08]; ulong m09 = m[09]; ulong m10 = m[10]; ulong m11 = m[11]; ulong m12 = m[12]; ulong m13 = m[13]; ulong m14 = m[14]; ulong m15 = m[15]; ulong v00 = s->htf.h[0]; ulong v01 = s->htf.h[1]; ulong v02 = s->htf.h[2]; ulong v03 = s->htf.h[3]; ulong v04 = s->htf.h[4]; ulong v05 = s->htf.h[5]; ulong v06 = s->htf.h[6]; ulong v07 = s->htf.h[7]; ulong v08 = 0x6A09E667F3BCC908ul; ulong v09 = 0xBB67AE8584CAA73Bul; ulong v10 = 0x3C6EF372FE94F82Bul; ulong v11 = 0xA54FF53A5F1D36F1ul; ulong v12 = 0x510E527FADE682D1ul; ulong v13 = 0x9B05688C2B3E6C1Ful; ulong v14 = 0x1F83D9ABFB41BD6Bul; ulong v15 = 0x5BE0CD19137E2179ul; v12 ^= s->htf.t[0]; v13 ^= s->htf.t[1]; v14 ^= s->htf.f[0]; //ROUND 1 v00 += m00; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m02; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m04; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m06; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m05; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m07; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m01; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m03; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m08; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m10; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m12; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m14; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m13; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m15; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m09; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m11; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 2 v00 += m14; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m04; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m09; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m13; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m15; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m06; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m10; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m08; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m01; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m00; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m11; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m05; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m07; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m03; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m12; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m02; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 3 v00 += m11; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m12; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m05; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m15; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m02; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m13; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m08; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m00; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m10; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m03; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m07; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m09; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m01; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m04; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m14; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m06; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 4 v00 += m07; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m03; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m13; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m11; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m12; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m14; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m09; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m01; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m02; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m05; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m04; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m15; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m00; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m08; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m06; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m10; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 5 v00 += m09; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m05; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m02; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m10; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m04; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m15; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m00; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m07; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m14; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m11; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m06; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m03; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m08; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m13; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m01; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m12; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 6 v00 += m02; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m06; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m00; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m08; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m11; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m03; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m12; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m10; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m04; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m07; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m15; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m01; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m14; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m09; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m13; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m05; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 7 v00 += m12; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m01; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m14; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m04; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m13; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m10; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m05; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m15; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m00; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m06; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m09; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m08; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m02; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m11; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m07; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m03; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 8 v00 += m13; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m07; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m12; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m03; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m01; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m09; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m11; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m14; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m05; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m15; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m08; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m02; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m06; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m10; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m00; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m04; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 9 v00 += m06; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m14; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m11; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m00; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m03; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m08; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m15; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m09; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m12; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m13; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m01; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m10; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m04; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m05; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m02; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m07; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 10 v00 += m10; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m08; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m07; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m01; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m06; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m05; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m02; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m04; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m15; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m09; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m03; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m13; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m12; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m00; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m11; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m14; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 11 v00 += m00; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m02; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m04; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m06; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m05; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m07; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m01; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m03; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m08; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m10; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m12; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m14; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m13; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m15; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m09; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m11; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); //ROUND 12 v00 += m14; v00 += v04; v12 ^= v00; v12 = ror(v12, 32); v08 += v12; v04 ^= v08; v04 = ror(v04, 24); v01 += m04; v01 += v05; v13 ^= v01; v13 = ror(v13, 32); v09 += v13; v05 ^= v09; v05 = ror(v05, 24); v02 += m09; v02 += v06; v14 ^= v02; v14 = ror(v14, 32); v10 += v14; v06 ^= v10; v06 = ror(v06, 24); v03 += m13; v03 += v07; v15 ^= v03; v15 = ror(v15, 32); v11 += v15; v07 ^= v11; v07 = ror(v07, 24); v02 += m15; v02 += v06; v14 ^= v02; v14 = ror(v14, 16); v10 += v14; v06 ^= v10; v06 = ror(v06, 63); v03 += m06; v03 += v07; v15 ^= v03; v15 = ror(v15, 16); v11 += v15; v07 ^= v11; v07 = ror(v07, 63); v00 += m10; v00 += v04; v12 ^= v00; v12 = ror(v12, 16); v08 += v12; v04 ^= v08; v04 = ror(v04, 63); v01 += m08; v01 += v05; v13 ^= v01; v13 = ror(v13, 16); v09 += v13; v05 ^= v09; v05 = ror(v05, 63); v00 += m01; v00 += v05; v15 ^= v00; v15 = ror(v15, 32); v10 += v15; v05 ^= v10; v05 = ror(v05, 24); v01 += m00; v01 += v06; v12 ^= v01; v12 = ror(v12, 32); v11 += v12; v06 ^= v11; v06 = ror(v06, 24); v02 += m11; v02 += v07; v13 ^= v02; v13 = ror(v13, 32); v08 += v13; v07 ^= v08; v07 = ror(v07, 24); v03 += m05; v03 += v04; v14 ^= v03; v14 = ror(v14, 32); v09 += v14; v04 ^= v09; v04 = ror(v04, 24); v02 += m07; v02 += v07; v13 ^= v02; v13 = ror(v13, 16); v08 += v13; v07 ^= v08; v07 = ror(v07, 63); v03 += m03; v03 += v04; v14 ^= v03; v14 = ror(v14, 16); v09 += v14; v04 ^= v09; v04 = ror(v04, 63); v00 += m12; v00 += v05; v15 ^= v00; v15 = ror(v15, 16); v10 += v15; v05 ^= v10; v05 = ror(v05, 63); v01 += m02; v01 += v06; v12 ^= v01; v12 = ror(v12, 16); v11 += v12; v06 ^= v11; v06 = ror(v06, 63); s->htf.h[0] ^= v00 ^ v08; s->htf.h[1] ^= v01 ^ v09; s->htf.h[2] ^= v02 ^ v10; s->htf.h[3] ^= v03 ^ v11; s->htf.h[4] ^= v04 ^ v12; s->htf.h[5] ^= v05 ^ v13; s->htf.h[6] ^= v06 ^ v14; s->htf.h[7] ^= v07 ^ v15; } }
unsafe private static void mixScalar(Blake2bContext *s, ulong *m) { ulong m00 = m[00]; ulong m01 = m[01]; ulong m02 = m[02]; ulong m03 = m[03]; ulong m04 = m[04]; ulong m05 = m[05]; ulong m06 = m[06]; ulong m07 = m[07]; ulong m08 = m[08]; ulong m09 = m[09]; ulong m10 = m[10]; ulong m11 = m[11]; ulong m12 = m[12]; ulong m13 = m[13]; ulong m14 = m[14]; ulong m15 = m[15]; ulong v00 = s->h[0]; ulong v01 = s->h[1]; ulong v02 = s->h[2]; ulong v03 = s->h[3]; ulong v04 = s->h[4]; ulong v05 = s->h[5]; ulong v06 = s->h[6]; ulong v07 = s->h[7]; ulong v08 = 0x6A09E667F3BCC908ul; ulong v09 = 0xBB67AE8584CAA73Bul; ulong v10 = 0x3C6EF372FE94F82Bul; ulong v11 = 0xA54FF53A5F1D36F1ul; ulong v12 = 0x510E527FADE682D1ul; ulong v13 = 0x9B05688C2B3E6C1Ful; ulong v14 = 0x1F83D9ABFB41BD6Bul; ulong v15 = 0x5BE0CD19137E2179ul; v12 ^= s->t[0]; v13 ^= s->t[1]; v14 ^= s->f[0]; //ROUND 1 v00 += m00; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m02; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m04; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m06; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m05; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m07; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m01; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m03; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m08; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m10; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m12; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m14; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m13; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m15; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m09; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m11; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 2 v00 += m14; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m04; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m09; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m13; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m15; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m06; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m10; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m08; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m01; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m00; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m11; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m05; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m07; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m03; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m12; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m02; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 3 v00 += m11; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m12; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m05; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m15; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m02; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m13; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m08; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m00; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m10; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m03; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m07; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m09; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m01; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m04; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m14; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m06; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 4 v00 += m07; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m03; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m13; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m11; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m12; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m14; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m09; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m01; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m02; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m05; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m04; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m15; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m00; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m08; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m06; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m10; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 5 v00 += m09; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m05; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m02; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m10; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m04; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m15; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m00; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m07; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m14; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m11; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m06; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m03; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m08; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m13; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m01; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m12; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 6 v00 += m02; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m06; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m00; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m08; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m11; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m03; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m12; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m10; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m04; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m07; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m15; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m01; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m14; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m09; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m13; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m05; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 7 v00 += m12; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m01; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m14; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m04; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m13; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m10; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m05; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m15; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m00; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m06; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m09; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m08; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m02; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m11; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m07; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m03; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 8 v00 += m13; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m07; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m12; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m03; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m01; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m09; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m11; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m14; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m05; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m15; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m08; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m02; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m06; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m10; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m00; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m04; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 9 v00 += m06; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m14; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m11; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m00; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m03; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m08; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m15; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m09; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m12; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m13; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m01; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m10; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m04; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m05; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m02; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m07; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 10 v00 += m10; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m08; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m07; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m01; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m06; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m05; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m02; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m04; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m15; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m09; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m03; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m13; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m12; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m00; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m11; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m14; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 11 v00 += m00; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m02; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m04; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m06; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m05; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m07; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m01; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m03; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m08; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m10; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m12; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m14; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m13; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m15; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m09; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m11; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); //ROUND 12 v00 += m14; v00 += v04; v12 ^= v00; v12 = (v12 >> 32) ^ (v12 << 32); v08 += v12; v04 ^= v08; v04 = (v04 >> 24) ^ (v04 << 40); v01 += m04; v01 += v05; v13 ^= v01; v13 = (v13 >> 32) ^ (v13 << 32); v09 += v13; v05 ^= v09; v05 = (v05 >> 24) ^ (v05 << 40); v02 += m09; v02 += v06; v14 ^= v02; v14 = (v14 >> 32) ^ (v14 << 32); v10 += v14; v06 ^= v10; v06 = (v06 >> 24) ^ (v06 << 40); v03 += m13; v03 += v07; v15 ^= v03; v15 = (v15 >> 32) ^ (v15 << 32); v11 += v15; v07 ^= v11; v07 = (v07 >> 24) ^ (v07 << 40); v02 += m15; v02 += v06; v14 ^= v02; v14 = (v14 >> 16) ^ (v14 << 48); v10 += v14; v06 ^= v10; v06 = (v06 >> 63) ^ (v06 << 1); v03 += m06; v03 += v07; v15 ^= v03; v15 = (v15 >> 16) ^ (v15 << 48); v11 += v15; v07 ^= v11; v07 = (v07 >> 63) ^ (v07 << 1); v00 += m10; v00 += v04; v12 ^= v00; v12 = (v12 >> 16) ^ (v12 << 48); v08 += v12; v04 ^= v08; v04 = (v04 >> 63) ^ (v04 << 1); v01 += m08; v01 += v05; v13 ^= v01; v13 = (v13 >> 16) ^ (v13 << 48); v09 += v13; v05 ^= v09; v05 = (v05 >> 63) ^ (v05 << 1); v00 += m01; v00 += v05; v15 ^= v00; v15 = (v15 >> 32) ^ (v15 << 32); v10 += v15; v05 ^= v10; v05 = (v05 >> 24) ^ (v05 << 40); v01 += m00; v01 += v06; v12 ^= v01; v12 = (v12 >> 32) ^ (v12 << 32); v11 += v12; v06 ^= v11; v06 = (v06 >> 24) ^ (v06 << 40); v02 += m11; v02 += v07; v13 ^= v02; v13 = (v13 >> 32) ^ (v13 << 32); v08 += v13; v07 ^= v08; v07 = (v07 >> 24) ^ (v07 << 40); v03 += m05; v03 += v04; v14 ^= v03; v14 = (v14 >> 32) ^ (v14 << 32); v09 += v14; v04 ^= v09; v04 = (v04 >> 24) ^ (v04 << 40); v02 += m07; v02 += v07; v13 ^= v02; v13 = (v13 >> 16) ^ (v13 << 48); v08 += v13; v07 ^= v08; v07 = (v07 >> 63) ^ (v07 << 1); v03 += m03; v03 += v04; v14 ^= v03; v14 = (v14 >> 16) ^ (v14 << 48); v09 += v14; v04 ^= v09; v04 = (v04 >> 63) ^ (v04 << 1); v00 += m12; v00 += v05; v15 ^= v00; v15 = (v15 >> 16) ^ (v15 << 48); v10 += v15; v05 ^= v10; v05 = (v05 >> 63) ^ (v05 << 1); v01 += m02; v01 += v06; v12 ^= v01; v12 = (v12 >> 16) ^ (v12 << 48); v11 += v12; v06 ^= v11; v06 = (v06 >> 63) ^ (v06 << 1); s->h[0] ^= v00 ^ v08; s->h[1] ^= v01 ^ v09; s->h[2] ^= v02 ^ v10; s->h[3] ^= v03 ^ v11; s->h[4] ^= v04 ^ v12; s->h[5] ^= v05 ^ v13; s->h[6] ^= v06 ^ v14; s->h[7] ^= v07 ^ v15; }