public void Negate_NegateZero_Passes() { Vector256 <double> result = Vector.Negate4D(Vector256.Create(0d)); Vector256 <long> expected = Vector256.Create(long.MinValue); Assert.True(result.AsInt64().Equals(expected), $"Expected {expected}, got {result}"); }
public static Vector256 <T> IncUpper128Le <T>(this Vector256 <T> nonce) where T : struct { Vector256 <long> v = nonce.AsInt64(); Vector256 <long> t = Avx2.CompareEqual(v, VMinusUpper128Le); v = Avx2.Subtract(v, VMinusUpper128Le); t = Avx2.ShiftLeftLogical128BitLane(t, 8); return(Avx2.Subtract(v, t).As <long, T>()); }
public static Vector256 <sbyte> ReadVector256(this ref char src) { Vector256 <short> c0 = Unsafe.As <char, Vector256 <short> >(ref Unsafe.Add(ref src, 0)); Vector256 <short> c1 = Unsafe.As <char, Vector256 <short> >(ref Unsafe.Add(ref src, 16)); Vector256 <byte> t0 = Avx2.PackUnsignedSaturate(c0, c1); Vector256 <long> t1 = Avx2.Permute4x64(t0.AsInt64(), 0b_11_01_10_00); return(t1.AsSByte()); }
public static bool AreEqual(Vector256 <double> left, Vector256 <double> right) { for (int i = 0; i < Vector256 <double> .Count; i++) { long l = left.AsInt64().GetElement(i); long r = right.AsInt64().GetElement(i); if (l != r) { return(false); } } return(true); }
public static Vector256 <T> Vector256Add <T>(Vector256 <T> left, Vector256 <T> right) where T : struct { if (typeof(T) == typeof(byte)) { return(Avx2.Add(left.AsByte(), right.AsByte()).As <byte, T>()); } else if (typeof(T) == typeof(sbyte)) { return(Avx2.Add(left.AsSByte(), right.AsSByte()).As <sbyte, T>()); } else if (typeof(T) == typeof(short)) { return(Avx2.Add(left.AsInt16(), right.AsInt16()).As <short, T>()); } else if (typeof(T) == typeof(ushort)) { return(Avx2.Add(left.AsUInt16(), right.AsUInt16()).As <ushort, T>()); } else if (typeof(T) == typeof(int)) { return(Avx2.Add(left.AsInt32(), right.AsInt32()).As <int, T>()); } else if (typeof(T) == typeof(uint)) { return(Avx2.Add(left.AsUInt32(), right.AsUInt32()).As <uint, T>()); } else if (typeof(T) == typeof(long)) { return(Avx2.Add(left.AsInt64(), right.AsInt64()).As <long, T>()); } else if (typeof(T) == typeof(ulong)) { return(Avx2.Add(left.AsUInt64(), right.AsUInt64()).As <ulong, T>()); } else if (typeof(T) == typeof(float)) { return(Avx.Add(left.AsSingle(), right.AsSingle()).As <float, T>()); } else if (typeof(T) == typeof(double)) { return(Avx.Add(left.AsDouble(), right.AsDouble()).As <double, T>()); } else { throw new NotSupportedException(); } }
private static void MulSimd(Span <VectorizedStaticModInt <T> > s, Span <VectorizedStaticModInt <T> > t, Span <VectorizedStaticModInt <T> > u) { for (int i = 0; i < B * B8; i++) { var cmpS = Avx2.CompareGreaterThan(s[i].Value.AsInt32(), VectorizedStaticModInt <T> .M1.AsInt32()).AsUInt32(); var cmpT = Avx2.CompareGreaterThan(t[i].Value.AsInt32(), VectorizedStaticModInt <T> .M1.AsInt32()).AsUInt32(); var difS = Avx2.And(cmpS, VectorizedStaticModInt <T> .M1); var difT = Avx2.And(cmpT, VectorizedStaticModInt <T> .M1); s[i] = Avx2.Subtract(s[i].Value, difS); t[i] = Avx2.Subtract(t[i].Value, difT); } var m1v = VectorizedStaticModInt <T> .M1.GetElement(0); var m2v = VectorizedStaticModInt <T> .M2.GetElement(0); var zero = new VectorizedStaticModInt <T>().Value; var th1 = new VectorizedStaticModInt <T>(0, m1v, 0, m1v, 0, m1v, 0, m1v).Value.AsInt64(); var th2 = new VectorizedStaticModInt <T>(0, m2v, 0, m2v, 0, m2v, 0, m2v).Value.AsInt64(); for (int i = 0; i < B; i += 8) { for (int j = 0; j < B8; j += 1) { Vector256 <ulong> prod0200 = default; Vector256 <ulong> prod1300 = default; Vector256 <ulong> prod0210 = default; Vector256 <ulong> prod1310 = default; Vector256 <ulong> prod0220 = default; Vector256 <ulong> prod1320 = default; Vector256 <ulong> prod0230 = default; Vector256 <ulong> prod1330 = default; Vector256 <ulong> prod0240 = default; Vector256 <ulong> prod1340 = default; Vector256 <ulong> prod0250 = default; Vector256 <ulong> prod1350 = default; Vector256 <ulong> prod0260 = default; Vector256 <ulong> prod1360 = default; Vector256 <ulong> prod0270 = default; Vector256 <ulong> prod1370 = default; for (int k = 0; k < B; k += 8) { for (int l = 0; l < 8; l++) { Vector256 <uint> T0 = t[j * B + k + l].Value; var T130 = Avx2.Shuffle(T0, 0xF5); var S00 = Vector256.Create(s[(i + 0) * B8 + k / 8].Value.GetElement(l)); var ST0200 = Avx2.Multiply(S00, T0); var ST1300 = Avx2.Multiply(S00, T130); prod0200 = Avx2.Add(prod0200, ST0200); prod1300 = Avx2.Add(prod1300, ST1300); var S10 = Vector256.Create(s[(i + 1) * B8 + k / 8].Value.GetElement(l)); var ST0210 = Avx2.Multiply(S10, T0); var ST1310 = Avx2.Multiply(S10, T130); prod0210 = Avx2.Add(prod0210, ST0210); prod1310 = Avx2.Add(prod1310, ST1310); var S20 = Vector256.Create(s[(i + 2) * B8 + k / 8].Value.GetElement(l)); var ST0220 = Avx2.Multiply(S20, T0); var ST1320 = Avx2.Multiply(S20, T130); prod0220 = Avx2.Add(prod0220, ST0220); prod1320 = Avx2.Add(prod1320, ST1320); var S30 = Vector256.Create(s[(i + 3) * B8 + k / 8].Value.GetElement(l)); var ST0230 = Avx2.Multiply(S30, T0); var ST1330 = Avx2.Multiply(S30, T130); prod0230 = Avx2.Add(prod0230, ST0230); prod1330 = Avx2.Add(prod1330, ST1330); var S40 = Vector256.Create(s[(i + 4) * B8 + k / 8].Value.GetElement(l)); var ST0240 = Avx2.Multiply(S40, T0); var ST1340 = Avx2.Multiply(S40, T130); prod0240 = Avx2.Add(prod0240, ST0240); prod1340 = Avx2.Add(prod1340, ST1340); var S50 = Vector256.Create(s[(i + 5) * B8 + k / 8].Value.GetElement(l)); var ST0250 = Avx2.Multiply(S50, T0); var ST1350 = Avx2.Multiply(S50, T130); prod0250 = Avx2.Add(prod0250, ST0250); prod1350 = Avx2.Add(prod1350, ST1350); var S60 = Vector256.Create(s[(i + 6) * B8 + k / 8].Value.GetElement(l)); var ST0260 = Avx2.Multiply(S60, T0); var ST1360 = Avx2.Multiply(S60, T130); prod0260 = Avx2.Add(prod0260, ST0260); prod1360 = Avx2.Add(prod1360, ST1360); var S70 = Vector256.Create(s[(i + 7) * B8 + k / 8].Value.GetElement(l)); var ST0270 = Avx2.Multiply(S70, T0); var ST1370 = Avx2.Multiply(S70, T130); prod0270 = Avx2.Add(prod0270, ST0270); prod1370 = Avx2.Add(prod1370, ST1370); } var cmp0200 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0200.AsInt64()); var cmp1300 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1300.AsInt64()); var dif0200 = Avx2.And(cmp0200, th2); var dif1300 = Avx2.And(cmp1300, th2); prod0200 = Avx2.Subtract(prod0200, dif0200.AsUInt64()); prod1300 = Avx2.Subtract(prod1300, dif1300.AsUInt64()); var cmp0210 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0210.AsInt64()); var cmp1310 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1310.AsInt64()); var dif0210 = Avx2.And(cmp0210, th2); var dif1310 = Avx2.And(cmp1310, th2); prod0210 = Avx2.Subtract(prod0210, dif0210.AsUInt64()); prod1310 = Avx2.Subtract(prod1310, dif1310.AsUInt64()); var cmp0220 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0220.AsInt64()); var cmp1320 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1320.AsInt64()); var dif0220 = Avx2.And(cmp0220, th2); var dif1320 = Avx2.And(cmp1320, th2); prod0220 = Avx2.Subtract(prod0220, dif0220.AsUInt64()); prod1320 = Avx2.Subtract(prod1320, dif1320.AsUInt64()); var cmp0230 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0230.AsInt64()); var cmp1330 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1330.AsInt64()); var dif0230 = Avx2.And(cmp0230, th2); var dif1330 = Avx2.And(cmp1330, th2); prod0230 = Avx2.Subtract(prod0230, dif0230.AsUInt64()); prod1330 = Avx2.Subtract(prod1330, dif1330.AsUInt64()); var cmp0240 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0240.AsInt64()); var cmp1340 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1340.AsInt64()); var dif0240 = Avx2.And(cmp0240, th2); var dif1340 = Avx2.And(cmp1340, th2); prod0240 = Avx2.Subtract(prod0240, dif0240.AsUInt64()); prod1340 = Avx2.Subtract(prod1340, dif1340.AsUInt64()); var cmp0250 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0250.AsInt64()); var cmp1350 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1350.AsInt64()); var dif0250 = Avx2.And(cmp0250, th2); var dif1350 = Avx2.And(cmp1350, th2); prod0250 = Avx2.Subtract(prod0250, dif0250.AsUInt64()); prod1350 = Avx2.Subtract(prod1350, dif1350.AsUInt64()); var cmp0260 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0260.AsInt64()); var cmp1360 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1360.AsInt64()); var dif0260 = Avx2.And(cmp0260, th2); var dif1360 = Avx2.And(cmp1360, th2); prod0260 = Avx2.Subtract(prod0260, dif0260.AsUInt64()); prod1360 = Avx2.Subtract(prod1360, dif1360.AsUInt64()); var cmp0270 = Avx2.CompareGreaterThan(zero.AsInt64(), prod0270.AsInt64()); var cmp1370 = Avx2.CompareGreaterThan(zero.AsInt64(), prod1370.AsInt64()); var dif0270 = Avx2.And(cmp0270, th2); var dif1370 = Avx2.And(cmp1370, th2); prod0270 = Avx2.Subtract(prod0270, dif0270.AsUInt64()); prod1370 = Avx2.Subtract(prod1370, dif1370.AsUInt64()); } for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0200.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1300.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0200 = Avx2.Subtract(prod0200, dif02.AsUInt64()); prod1300 = Avx2.Subtract(prod1300, dif13.AsUInt64()); } u[(i + 0) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0200.AsUInt32(), prod1300.AsUInt32()); for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0210.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1310.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0210 = Avx2.Subtract(prod0210, dif02.AsUInt64()); prod1310 = Avx2.Subtract(prod1310, dif13.AsUInt64()); } u[(i + 1) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0210.AsUInt32(), prod1310.AsUInt32()); for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0220.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1320.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0220 = Avx2.Subtract(prod0220, dif02.AsUInt64()); prod1320 = Avx2.Subtract(prod1320, dif13.AsUInt64()); } u[(i + 2) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0220.AsUInt32(), prod1320.AsUInt32()); for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0230.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1330.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0230 = Avx2.Subtract(prod0230, dif02.AsUInt64()); prod1330 = Avx2.Subtract(prod1330, dif13.AsUInt64()); } u[(i + 3) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0230.AsUInt32(), prod1330.AsUInt32()); for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0240.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1340.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0240 = Avx2.Subtract(prod0240, dif02.AsUInt64()); prod1340 = Avx2.Subtract(prod1340, dif13.AsUInt64()); } u[(i + 4) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0240.AsUInt32(), prod1340.AsUInt32()); for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0250.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1350.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0250 = Avx2.Subtract(prod0250, dif02.AsUInt64()); prod1350 = Avx2.Subtract(prod1350, dif13.AsUInt64()); } u[(i + 5) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0250.AsUInt32(), prod1350.AsUInt32()); for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0260.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1360.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0260 = Avx2.Subtract(prod0260, dif02.AsUInt64()); prod1360 = Avx2.Subtract(prod1360, dif13.AsUInt64()); } u[(i + 6) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0260.AsUInt32(), prod1360.AsUInt32()); for (int _ = 0; _ < 2; _++) { var cmp02 = Avx2.CompareGreaterThan(prod0270.AsInt64(), th1); var cmp13 = Avx2.CompareGreaterThan(prod1370.AsInt64(), th1); var dif02 = Avx2.And(cmp02, th1); var dif13 = Avx2.And(cmp13, th1); prod0270 = Avx2.Subtract(prod0270, dif02.AsUInt64()); prod1370 = Avx2.Subtract(prod1370, dif13.AsUInt64()); } u[(i + 7) * B8 + j + 0] = VectorizedStaticModInt <T> .Reduce(prod0270.AsUInt32(), prod1370.AsUInt32()); } } }