public static Vector128 <T> And <T>(Vector128 <T> left, Vector128 <T> right) where T : struct { if (typeof(T) == typeof(float)) { if (Sse.IsSupported) { return(Sse.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } } if (typeof(T) == typeof(double)) { if (Sse2.IsSupported) { return(Sse2.And(left.AsDouble(), right.AsDouble()).As <double, T>()); } if (Sse.IsSupported) { return(Sse.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } } if (Sse2.IsSupported) { return(Sse2.And(left.AsByte(), right.AsByte()).As <byte, T>()); } if (Sse.IsSupported) { return(Sse.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } return(SoftwareFallbacks.And_Software(left, right)); }
private static Vector128 <int> MultiplyAddAdjacent( Vector128 <short> vsrc0, Vector128 <short> vsrc1, Vector128 <short> vsrc2, Vector128 <short> vsrc3, Vector128 <short> vfilter, Vector128 <int> zero) { // < sumN, sumN, sumN, sumN > Vector128 <int> sum0 = Sse2.MultiplyAddAdjacent(vsrc0, vfilter); Vector128 <int> sum1 = Sse2.MultiplyAddAdjacent(vsrc1, vfilter); Vector128 <int> sum2 = Sse2.MultiplyAddAdjacent(vsrc2, vfilter); Vector128 <int> sum3 = Sse2.MultiplyAddAdjacent(vsrc3, vfilter); // < 0, 0, sumN, sumN > sum0 = Ssse3.HorizontalAdd(sum0, zero); sum1 = Ssse3.HorizontalAdd(sum1, zero); sum2 = Ssse3.HorizontalAdd(sum2, zero); sum3 = Ssse3.HorizontalAdd(sum3, zero); // < 0, 0, 0, sumN > sum0 = Ssse3.HorizontalAdd(sum0, zero); sum1 = Ssse3.HorizontalAdd(sum1, zero); sum2 = Ssse3.HorizontalAdd(sum2, zero); sum3 = Ssse3.HorizontalAdd(sum3, zero); // < 0, 0, sum1, sum0 > Vector128 <int> sum01 = Sse2.UnpackLow(sum0, sum1); // < 0, 0, sum3, sum2 > Vector128 <int> sum23 = Sse2.UnpackLow(sum2, sum3); // < sum3, sum2, sum1, sum0 > return(Sse.MoveLowToHigh(sum01.AsSingle(), sum23.AsSingle()).AsInt32()); }
public static Vector128 <T> Vector128Add <T>(Vector128 <T> left, Vector128 <T> right) where T : struct { if (typeof(T) == typeof(byte)) { return(Sse2.Add(left.AsByte(), right.AsByte()).As <byte, T>()); } else if (typeof(T) == typeof(sbyte)) { return(Sse2.Add(left.AsSByte(), right.AsSByte()).As <sbyte, T>()); } else if (typeof(T) == typeof(short)) { return(Sse2.Add(left.AsInt16(), right.AsInt16()).As <short, T>()); } else if (typeof(T) == typeof(ushort)) { return(Sse2.Add(left.AsUInt16(), right.AsUInt16()).As <ushort, T>()); } else if (typeof(T) == typeof(int)) { return(Sse2.Add(left.AsInt32(), right.AsInt32()).As <int, T>()); } else if (typeof(T) == typeof(uint)) { return(Sse2.Add(left.AsUInt32(), right.AsUInt32()).As <uint, T>()); } else if (typeof(T) == typeof(long)) { return(Sse2.Add(left.AsInt64(), right.AsInt64()).As <long, T>()); } else if (typeof(T) == typeof(ulong)) { return(Sse2.Add(left.AsUInt64(), right.AsUInt64()).As <ulong, T>()); } else if (typeof(T) == typeof(float)) { return(Sse.Add(left.AsSingle(), right.AsSingle()).As <float, T>()); } else if (typeof(T) == typeof(double)) { return(Sse2.Add(left.AsDouble(), right.AsDouble()).As <double, T>()); } else { throw new NotSupportedException(); } }
public static Vector128 <T> Select <T, U>(Vector128 <T> left, Vector128 <T> right, Vector128 <U> selector) where T : struct where U : struct { if (Sse41.IsSupported) { if (typeof(T) == typeof(float)) { return(Sse41.BlendVariable(left.AsSingle(), right.AsSingle(), selector.AsSingle()).As <float, T>()); } else if (typeof(T) == typeof(double)) { return(Sse41.BlendVariable(left.AsDouble(), right.AsDouble(), selector.AsDouble()).As <double, T>()); } return(Sse41.BlendVariable(left.AsByte(), right.AsByte(), selector.AsByte()).As <byte, T>()); } return(Or(And(selector.As <U, T>(), right), AndNot(selector.As <U, T>(), left))); }
static void Fold4(ref Vector128 <uint> xmmCRC0, ref Vector128 <uint> xmmCRC1, ref Vector128 <uint> xmmCRC2, ref Vector128 <uint> xmmCRC3) { Vector128 <uint> xmmFold4 = Vector128.Create(0xc6e41596, 0x00000001, 0x54442bd4, 0x00000001); Vector128 <uint> xTmp0 = xmmCRC0; Vector128 <uint> xTmp1 = xmmCRC1; Vector128 <uint> xTmp2 = xmmCRC2; Vector128 <uint> xTmp3 = xmmCRC3; xmmCRC0 = Pclmulqdq.CarrylessMultiply(xmmCRC0.AsUInt64(), xmmFold4.AsUInt64(), 0x01).AsUInt32(); xTmp0 = Pclmulqdq.CarrylessMultiply(xTmp0.AsUInt64(), xmmFold4.AsUInt64(), 0x10).AsUInt32(); Vector128 <float> psCRC0 = xmmCRC0.AsSingle(); Vector128 <float> psT0 = xTmp0.AsSingle(); Vector128 <float> psRes0 = Sse.Xor(psCRC0, psT0); xmmCRC1 = Pclmulqdq.CarrylessMultiply(xmmCRC1.AsUInt64(), xmmFold4.AsUInt64(), 0x01).AsUInt32(); xTmp1 = Pclmulqdq.CarrylessMultiply(xTmp1.AsUInt64(), xmmFold4.AsUInt64(), 0x10).AsUInt32(); Vector128 <float> psCRC1 = xmmCRC1.AsSingle(); Vector128 <float> psT1 = xTmp1.AsSingle(); Vector128 <float> psRes1 = Sse.Xor(psCRC1, psT1); xmmCRC2 = Pclmulqdq.CarrylessMultiply(xmmCRC2.AsUInt64(), xmmFold4.AsUInt64(), 0x01).AsUInt32(); xTmp2 = Pclmulqdq.CarrylessMultiply(xTmp2.AsUInt64(), xmmFold4.AsUInt64(), 0x10).AsUInt32(); Vector128 <float> psCRC2 = xmmCRC2.AsSingle(); Vector128 <float> psT2 = xTmp2.AsSingle(); Vector128 <float> psRes2 = Sse.Xor(psCRC2, psT2); xmmCRC3 = Pclmulqdq.CarrylessMultiply(xmmCRC3.AsUInt64(), xmmFold4.AsUInt64(), 0x01).AsUInt32(); xTmp3 = Pclmulqdq.CarrylessMultiply(xTmp3.AsUInt64(), xmmFold4.AsUInt64(), 0x10).AsUInt32(); Vector128 <float> psCRC3 = xmmCRC3.AsSingle(); Vector128 <float> psT3 = xTmp3.AsSingle(); Vector128 <float> psRes3 = Sse.Xor(psCRC3, psT3); xmmCRC0 = psRes0.AsUInt32(); xmmCRC1 = psRes1.AsUInt32(); xmmCRC2 = psRes2.AsUInt32(); xmmCRC3 = psRes3.AsUInt32(); }
private static Vector128 <byte> HighToLow(Vector128 <byte> value) { return(Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte()); }
public static void Assert(Vector128 <int> condition) { Debug.Assert(Avx.MoveMask(condition.AsSingle()) == Constant.Simd128x4.MaskAllTrue); }