Beispiel #1
0
        public static unsafe void SalsaCore512(byte rounds, uint *state, ref byte *source, ref byte *destination, ref int length)
        {
            var o0  = Vector256.Create(*(state + 0));
            var o1  = Vector256.Create(*(state + 1));
            var o2  = Vector256.Create(*(state + 2));
            var o3  = Vector256.Create(*(state + 3));
            var o4  = Vector256.Create(*(state + 4));
            var o5  = Vector256.Create(*(state + 5));
            var o6  = Vector256.Create(*(state + 6));
            var o7  = Vector256.Create(*(state + 7));
            var o10 = Vector256.Create(*(state + 10));
            var o11 = Vector256.Create(*(state + 11));
            var o12 = Vector256.Create(*(state + 12));
            var o13 = Vector256.Create(*(state + 13));
            var o14 = Vector256.Create(*(state + 14));
            var o15 = Vector256.Create(*(state + 15));

            while (length >= 512)
            {
                var x0  = o0;
                var x1  = o1;
                var x2  = o2;
                var x3  = o3;
                var x4  = o4;
                var x5  = o5;
                var x6  = o6;
                var x7  = o7;
                var x10 = o10;
                var x11 = o11;
                var x12 = o12;
                var x13 = o13;
                var x14 = o14;
                var x15 = o15;

                var counter = *(state + 8) | (ulong)*(state + 9) << 32;
                var x8      = Vector256.Create(counter).AsUInt32();
                var x9      = x8;

                var t0 = Avx2.Add(ChaCha20Utils.IncCounter0123, x8.AsUInt64()).AsUInt32();
                var t1 = Avx2.Add(ChaCha20Utils.IncCounter4567, x9.AsUInt64()).AsUInt32();

                x8 = Avx2.UnpackLow(t0, t1);
                x9 = Avx2.UnpackHigh(t0, t1);

                t0 = Avx2.UnpackLow(x8, x9);
                t1 = Avx2.UnpackHigh(x8, x9);

                x8 = Avx2.PermuteVar8x32(t0, ChaCha20Utils.Permute3);
                x9 = Avx2.PermuteVar8x32(t1, ChaCha20Utils.Permute3);

                var o8 = x8;
                var o9 = x9;

                counter += 8;

                *(state + 8) = (uint)(counter & 0xFFFFFFFF);
                *(state + 9) = (uint)(counter >> 32 & 0xFFFFFFFF);

                for (var i = 0; i < rounds; i += 2)
                {
                    QuarterRound(ref x4, ref x0, ref x12, ref x8);
                    QuarterRound(ref x9, ref x5, ref x1, ref x13);
                    QuarterRound(ref x14, ref x10, ref x6, ref x2);
                    QuarterRound(ref x3, ref x15, ref x11, ref x7);

                    QuarterRound(ref x1, ref x0, ref x3, ref x2);
                    QuarterRound(ref x6, ref x5, ref x4, ref x7);
                    QuarterRound(ref x11, ref x10, ref x9, ref x8);
                    QuarterRound(ref x12, ref x15, ref x14, ref x13);
                }

                ChaCha20Utils.AddTransposeXor(
                    ref x0, ref x1, ref x2, ref x3,
                    ref x4, ref x5, ref x6, ref x7,
                    ref o0, ref o1, ref o2, ref o3,
                    ref o4, ref o5, ref o6, ref o7,
                    source, destination);

                ChaCha20Utils.AddTransposeXor(
                    ref x8, ref x9, ref x10, ref x11,
                    ref x12, ref x13, ref x14, ref x15,
                    ref o8, ref o9, ref o10, ref o11,
                    ref o12, ref o13, ref o14, ref o15,
                    source + 32, destination + 32);

                length      -= 512;
                destination += 512;
                source      += 512;
            }
        }
Beispiel #2
0
        public static unsafe void SalsaCore256(byte rounds, uint *state, ref byte *source, ref byte *destination, ref int length)
        {
            #region s

            var s0 = Vector128.Create(*(state + 0));
            var s1 = Vector128.Create(*(state + 1));
            var s2 = Vector128.Create(*(state + 2));
            var s3 = Vector128.Create(*(state + 3));
            var s4 = Vector128.Create(*(state + 4));
            var s5 = Vector128.Create(*(state + 5));
            var s6 = Vector128.Create(*(state + 6));
            var s7 = Vector128.Create(*(state + 7));
            // s8
            // s9
            var s10 = Vector128.Create(*(state + 10));
            var s11 = Vector128.Create(*(state + 11));
            var s12 = Vector128.Create(*(state + 12));
            var s13 = Vector128.Create(*(state + 13));
            var s14 = Vector128.Create(*(state + 14));
            var s15 = Vector128.Create(*(state + 15));

            #endregion

            while (length >= 256)
            {
                #region x

                var x0  = s0;
                var x1  = s1;
                var x2  = s2;
                var x3  = s3;
                var x4  = s4;
                var x5  = s5;
                var x6  = s6;
                var x7  = s7;
                var x10 = s10;
                var x11 = s11;
                var x12 = s12;
                var x13 = s13;
                var x14 = s14;
                var x15 = s15;

                #endregion

                #region 8 9 位分别加 0 1 2 3

                var o  = *(state + 8) | (ulong)*(state + 9) << 32;
                var vo = Vector128.Create(o);

                var x8 = Sse2.Add(ChaCha20Utils.IncCounter01, vo).AsUInt32();
                var x9 = Sse2.Add(ChaCha20Utils.IncCounter23, vo).AsUInt32();

                var t8 = Sse2.UnpackLow(x8, x9);
                var t9 = Sse2.UnpackHigh(x8, x9);

                x8 = Sse2.UnpackLow(t8, t9);
                x9 = Sse2.UnpackHigh(t8, t9);

                var s8 = x8;
                var s9 = x9;

                o           += 4;
                *(state + 8) = (uint)(o & 0xFFFFFFFF);
                *(state + 9) = (uint)(o >> 32 & 0xFFFFFFFF);

                #endregion

                for (var i = 0; i < rounds; i += 2)
                {
                    QuarterRound(ref x4, ref x0, ref x12, ref x8);
                    QuarterRound(ref x9, ref x5, ref x1, ref x13);
                    QuarterRound(ref x14, ref x10, ref x6, ref x2);
                    QuarterRound(ref x3, ref x15, ref x11, ref x7);

                    QuarterRound(ref x1, ref x0, ref x3, ref x2);
                    QuarterRound(ref x6, ref x5, ref x4, ref x7);
                    QuarterRound(ref x11, ref x10, ref x9, ref x8);
                    QuarterRound(ref x12, ref x15, ref x14, ref x13);
                }

                ChaCha20Utils.AddTransposeXor(ref x0, ref x1, ref x2, ref x3, ref s0, ref s1, ref s2, ref s3, source, destination);
                ChaCha20Utils.AddTransposeXor(ref x4, ref x5, ref x6, ref x7, ref s4, ref s5, ref s6, ref s7, source + 16, destination + 16);
                ChaCha20Utils.AddTransposeXor(ref x8, ref x9, ref x10, ref x11, ref s8, ref s9, ref s10, ref s11, source + 32, destination + 32);
                ChaCha20Utils.AddTransposeXor(ref x12, ref x13, ref x14, ref x15, ref s12, ref s13, ref s14, ref s15, source + 48, destination + 48);

                source      += 256;
                destination += 256;
                length      -= 256;
            }
        }