Exemplo n.º 1
0
        public void Negate_NegateZero_Passes()
        {
            Vector256 <double> result = Vector.Negate4D(Vector256.Create(0d));

            Vector256 <long> expected = Vector256.Create(long.MinValue);

            Assert.True(result.AsInt64().Equals(expected), $"Expected {expected}, got {result}");
        }
Exemplo n.º 2
0
    public static Vector256 <T> IncUpper128Le <T>(this Vector256 <T> nonce) where T : struct
    {
        Vector256 <long> v = nonce.AsInt64();
        Vector256 <long> t = Avx2.CompareEqual(v, VMinusUpper128Le);

        v = Avx2.Subtract(v, VMinusUpper128Le);
        t = Avx2.ShiftLeftLogical128BitLane(t, 8);
        return(Avx2.Subtract(v, t).As <long, T>());
    }
Exemplo n.º 3
0
        public static Vector256 <sbyte> ReadVector256(this ref char src)
        {
            Vector256 <short> c0 = Unsafe.As <char, Vector256 <short> >(ref Unsafe.Add(ref src, 0));
            Vector256 <short> c1 = Unsafe.As <char, Vector256 <short> >(ref Unsafe.Add(ref src, 16));

            Vector256 <byte> t0 = Avx2.PackUnsignedSaturate(c0, c1);
            Vector256 <long> t1 = Avx2.Permute4x64(t0.AsInt64(), 0b_11_01_10_00);

            return(t1.AsSByte());
        }
Exemplo n.º 4
0
        public static bool AreEqual(Vector256 <double> left, Vector256 <double> right)
        {
            for (int i = 0; i < Vector256 <double> .Count; i++)
            {
                long l = left.AsInt64().GetElement(i);
                long r = right.AsInt64().GetElement(i);

                if (l != r)
                {
                    return(false);
                }
            }

            return(true);
        }
Exemplo n.º 5
0
 public static Vector256 <T> Vector256Add <T>(Vector256 <T> left, Vector256 <T> right) where T : struct
 {
     if (typeof(T) == typeof(byte))
     {
         return(Avx2.Add(left.AsByte(), right.AsByte()).As <byte, T>());
     }
     else if (typeof(T) == typeof(sbyte))
     {
         return(Avx2.Add(left.AsSByte(), right.AsSByte()).As <sbyte, T>());
     }
     else if (typeof(T) == typeof(short))
     {
         return(Avx2.Add(left.AsInt16(), right.AsInt16()).As <short, T>());
     }
     else if (typeof(T) == typeof(ushort))
     {
         return(Avx2.Add(left.AsUInt16(), right.AsUInt16()).As <ushort, T>());
     }
     else if (typeof(T) == typeof(int))
     {
         return(Avx2.Add(left.AsInt32(), right.AsInt32()).As <int, T>());
     }
     else if (typeof(T) == typeof(uint))
     {
         return(Avx2.Add(left.AsUInt32(), right.AsUInt32()).As <uint, T>());
     }
     else if (typeof(T) == typeof(long))
     {
         return(Avx2.Add(left.AsInt64(), right.AsInt64()).As <long, T>());
     }
     else if (typeof(T) == typeof(ulong))
     {
         return(Avx2.Add(left.AsUInt64(), right.AsUInt64()).As <ulong, T>());
     }
     else if (typeof(T) == typeof(float))
     {
         return(Avx.Add(left.AsSingle(), right.AsSingle()).As <float, T>());
     }
     else if (typeof(T) == typeof(double))
     {
         return(Avx.Add(left.AsDouble(), right.AsDouble()).As <double, T>());
     }
     else
     {
         throw new NotSupportedException();
     }
 }
            private static void MulSimd(Span <VectorizedStaticModInt <T> > s, Span <VectorizedStaticModInt <T> > t, Span <VectorizedStaticModInt <T> > u)
            {
                for (int i = 0; i < B * B8; i++)
                {
                    var cmpS = Avx2.CompareGreaterThan(s[i].Value.AsInt32(), VectorizedStaticModInt <T> .M1.AsInt32()).AsUInt32();
                    var cmpT = Avx2.CompareGreaterThan(t[i].Value.AsInt32(), VectorizedStaticModInt <T> .M1.AsInt32()).AsUInt32();
                    var difS = Avx2.And(cmpS, VectorizedStaticModInt <T> .M1);
                    var difT = Avx2.And(cmpT, VectorizedStaticModInt <T> .M1);
                    s[i] = Avx2.Subtract(s[i].Value, difS);
                    t[i] = Avx2.Subtract(t[i].Value, difT);
                }

                var m1v = VectorizedStaticModInt <T> .M1.GetElement(0);

                var m2v = VectorizedStaticModInt <T> .M2.GetElement(0);

                var zero = new VectorizedStaticModInt <T>().Value;
                var th1  = new VectorizedStaticModInt <T>(0, m1v, 0, m1v, 0, m1v, 0, m1v).Value.AsInt64();
                var th2  = new VectorizedStaticModInt <T>(0, m2v, 0, m2v, 0, m2v, 0, m2v).Value.AsInt64();


                for (int i = 0; i < B; i += 8)
                {
                    for (int j = 0; j < B8; j += 1)
                    {
                        Vector256 <ulong> prod0200 = default; Vector256 <ulong> prod1300 = default;
                        Vector256 <ulong> prod0210 = default; Vector256 <ulong> prod1310 = default;
                        Vector256 <ulong> prod0220 = default; Vector256 <ulong> prod1320 = default;
                        Vector256 <ulong> prod0230 = default; Vector256 <ulong> prod1330 = default;
                        Vector256 <ulong> prod0240 = default; Vector256 <ulong> prod1340 = default;
                        Vector256 <ulong> prod0250 = default; Vector256 <ulong> prod1350 = default;
                        Vector256 <ulong> prod0260 = default; Vector256 <ulong> prod1360 = default;
                        Vector256 <ulong> prod0270 = default; Vector256 <ulong> prod1370 = default;
                        for (int k = 0; k < B; k += 8)
                        {
                            for (int l = 0; l < 8; l++)
                            {
                                Vector256 <uint> T0 = t[j * B + k + l].Value; var T130 = Avx2.Shuffle(T0, 0xF5);
                                var S00    = Vector256.Create(s[(i + 0) * B8 + k / 8].Value.GetElement(l));
                                var ST0200 = Avx2.Multiply(S00, T0);
                                var ST1300 = Avx2.Multiply(S00, T130);
                                prod0200 = Avx2.Add(prod0200, ST0200);
                                prod1300 = Avx2.Add(prod1300, ST1300);
                                var S10    = Vector256.Create(s[(i + 1) * B8 + k / 8].Value.GetElement(l));
                                var ST0210 = Avx2.Multiply(S10, T0);
                                var ST1310 = Avx2.Multiply(S10, T130);
                                prod0210 = Avx2.Add(prod0210, ST0210);
                                prod1310 = Avx2.Add(prod1310, ST1310);
                                var S20    = Vector256.Create(s[(i + 2) * B8 + k / 8].Value.GetElement(l));
                                var ST0220 = Avx2.Multiply(S20, T0);
                                var ST1320 = Avx2.Multiply(S20, T130);
                                prod0220 = Avx2.Add(prod0220, ST0220);
                                prod1320 = Avx2.Add(prod1320, ST1320);
                                var S30    = Vector256.Create(s[(i + 3) * B8 + k / 8].Value.GetElement(l));
                                var ST0230 = Avx2.Multiply(S30, T0);
                                var ST1330 = Avx2.Multiply(S30, T130);
                                prod0230 = Avx2.Add(prod0230, ST0230);
                                prod1330 = Avx2.Add(prod1330, ST1330);
                                var S40    = Vector256.Create(s[(i + 4) * B8 + k / 8].Value.GetElement(l));
                                var ST0240 = Avx2.Multiply(S40, T0);
                                var ST1340 = Avx2.Multiply(S40, T130);
                                prod0240 = Avx2.Add(prod0240, ST0240);
                                prod1340 = Avx2.Add(prod1340, ST1340);
                                var S50    = Vector256.Create(s[(i + 5) * B8 + k / 8].Value.GetElement(l));
                                var ST0250 = Avx2.Multiply(S50, T0);
                                var ST1350 = Avx2.Multiply(S50, T130);
                                prod0250 = Avx2.Add(prod0250, ST0250);
                                prod1350 = Avx2.Add(prod1350, ST1350);
                                var S60    = Vector256.Create(s[(i + 6) * B8 + k / 8].Value.GetElement(l));
                                var ST0260 = Avx2.Multiply(S60, T0);
                                var ST1360 = Avx2.Multiply(S60, T130);
                                prod0260 = Avx2.Add(prod0260, ST0260);
                                prod1360 = Avx2.Add(prod1360, ST1360);
                                var S70    = Vector256.Create(s[(i + 7) * B8 + k / 8].Value.GetElement(l));
                                var ST0270 = Avx2.Multiply(S70, T0);
                                var ST1370 = Avx2.Multiply(S70, T130);
                                prod0270 = Avx2.Add(prod0270, ST0270);
                                prod1370 = Avx2.Add(prod1370, ST1370);
                            }
                            var cmp0200 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0200.AsInt64());
                            var cmp1300 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1300.AsInt64());
                            var dif0200 = Avx2.And(cmp0200, th2);
                            var dif1300 = Avx2.And(cmp1300, th2);
                            prod0200 = Avx2.Subtract(prod0200, dif0200.AsUInt64());
                            prod1300 = Avx2.Subtract(prod1300, dif1300.AsUInt64());
                            var cmp0210 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0210.AsInt64());
                            var cmp1310 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1310.AsInt64());
                            var dif0210 = Avx2.And(cmp0210, th2);
                            var dif1310 = Avx2.And(cmp1310, th2);
                            prod0210 = Avx2.Subtract(prod0210, dif0210.AsUInt64());
                            prod1310 = Avx2.Subtract(prod1310, dif1310.AsUInt64());
                            var cmp0220 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0220.AsInt64());
                            var cmp1320 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1320.AsInt64());
                            var dif0220 = Avx2.And(cmp0220, th2);
                            var dif1320 = Avx2.And(cmp1320, th2);
                            prod0220 = Avx2.Subtract(prod0220, dif0220.AsUInt64());
                            prod1320 = Avx2.Subtract(prod1320, dif1320.AsUInt64());
                            var cmp0230 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0230.AsInt64());
                            var cmp1330 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1330.AsInt64());
                            var dif0230 = Avx2.And(cmp0230, th2);
                            var dif1330 = Avx2.And(cmp1330, th2);
                            prod0230 = Avx2.Subtract(prod0230, dif0230.AsUInt64());
                            prod1330 = Avx2.Subtract(prod1330, dif1330.AsUInt64());
                            var cmp0240 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0240.AsInt64());
                            var cmp1340 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1340.AsInt64());
                            var dif0240 = Avx2.And(cmp0240, th2);
                            var dif1340 = Avx2.And(cmp1340, th2);
                            prod0240 = Avx2.Subtract(prod0240, dif0240.AsUInt64());
                            prod1340 = Avx2.Subtract(prod1340, dif1340.AsUInt64());
                            var cmp0250 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0250.AsInt64());
                            var cmp1350 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1350.AsInt64());
                            var dif0250 = Avx2.And(cmp0250, th2);
                            var dif1350 = Avx2.And(cmp1350, th2);
                            prod0250 = Avx2.Subtract(prod0250, dif0250.AsUInt64());
                            prod1350 = Avx2.Subtract(prod1350, dif1350.AsUInt64());
                            var cmp0260 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0260.AsInt64());
                            var cmp1360 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1360.AsInt64());
                            var dif0260 = Avx2.And(cmp0260, th2);
                            var dif1360 = Avx2.And(cmp1360, th2);
                            prod0260 = Avx2.Subtract(prod0260, dif0260.AsUInt64());
                            prod1360 = Avx2.Subtract(prod1360, dif1360.AsUInt64());
                            var cmp0270 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0270.AsInt64());
                            var cmp1370 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1370.AsInt64());
                            var dif0270 = Avx2.And(cmp0270, th2);
                            var dif1370 = Avx2.And(cmp1370, th2);
                            prod0270 = Avx2.Subtract(prod0270, dif0270.AsUInt64());
                            prod1370 = Avx2.Subtract(prod1370, dif1370.AsUInt64());
                        }

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0200.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1300.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0200 = Avx2.Subtract(prod0200, dif02.AsUInt64());
                            prod1300 = Avx2.Subtract(prod1300, dif13.AsUInt64());
                        }
                        u[(i + 0) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0200.AsUInt32(), prod1300.AsUInt32());

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0210.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1310.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0210 = Avx2.Subtract(prod0210, dif02.AsUInt64());
                            prod1310 = Avx2.Subtract(prod1310, dif13.AsUInt64());
                        }
                        u[(i + 1) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0210.AsUInt32(), prod1310.AsUInt32());

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0220.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1320.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0220 = Avx2.Subtract(prod0220, dif02.AsUInt64());
                            prod1320 = Avx2.Subtract(prod1320, dif13.AsUInt64());
                        }
                        u[(i + 2) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0220.AsUInt32(), prod1320.AsUInt32());

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0230.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1330.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0230 = Avx2.Subtract(prod0230, dif02.AsUInt64());
                            prod1330 = Avx2.Subtract(prod1330, dif13.AsUInt64());
                        }
                        u[(i + 3) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0230.AsUInt32(), prod1330.AsUInt32());

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0240.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1340.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0240 = Avx2.Subtract(prod0240, dif02.AsUInt64());
                            prod1340 = Avx2.Subtract(prod1340, dif13.AsUInt64());
                        }
                        u[(i + 4) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0240.AsUInt32(), prod1340.AsUInt32());

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0250.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1350.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0250 = Avx2.Subtract(prod0250, dif02.AsUInt64());
                            prod1350 = Avx2.Subtract(prod1350, dif13.AsUInt64());
                        }
                        u[(i + 5) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0250.AsUInt32(), prod1350.AsUInt32());

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0260.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1360.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0260 = Avx2.Subtract(prod0260, dif02.AsUInt64());
                            prod1360 = Avx2.Subtract(prod1360, dif13.AsUInt64());
                        }
                        u[(i + 6) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0260.AsUInt32(), prod1360.AsUInt32());

                        for (int _ = 0; _ < 2; _++)
                        {
                            var cmp02 = Avx2.CompareGreaterThan(prod0270.AsInt64(), th1);
                            var cmp13 = Avx2.CompareGreaterThan(prod1370.AsInt64(), th1);
                            var dif02 = Avx2.And(cmp02, th1);
                            var dif13 = Avx2.And(cmp13, th1);
                            prod0270 = Avx2.Subtract(prod0270, dif02.AsUInt64());
                            prod1370 = Avx2.Subtract(prod1370, dif13.AsUInt64());
                        }
                        u[(i + 7) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0270.AsUInt32(), prod1370.AsUInt32());
                    }
                }
            }