public static Vector128 <T> And <T>(Vector128 <T> left, Vector128 <T> right) where T : struct { if (typeof(T) == typeof(float)) { if (Sse.IsSupported) { return(Sse.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } } if (typeof(T) == typeof(double)) { if (Sse2.IsSupported) { return(Sse2.And(left.AsDouble(), right.AsDouble()).As <double, T>()); } if (Sse.IsSupported) { return(Sse.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } } if (Sse2.IsSupported) { return(Sse2.And(left.AsByte(), right.AsByte()).As <byte, T>()); } if (Sse.IsSupported) { return(Sse.And(left.AsSingle(), right.AsSingle()).As <float, T>()); } return(SoftwareFallbacks.And_Software(left, right)); }
static unsafe int Main(string[] args) { int testResult = Pass; if (Sse.IsSupported) { using (TestTable <float> floatTable = new TestTable <float>(new float[4] { 1, -5, 100, 0 }, new float[4] { 22, -1, -50, 0 }, new float[4])) { var vf1 = Unsafe.Read <Vector128 <float> >(floatTable.inArray1Ptr); var vf2 = Unsafe.Read <Vector128 <float> >(floatTable.inArray2Ptr); var vf3 = Sse.And(vf1, vf2); Unsafe.Write(floatTable.outArrayPtr, vf3); if (!floatTable.CheckResult((x, y, z) => (BitConverter.SingleToInt32Bits(x) & BitConverter.SingleToInt32Bits(y)) == BitConverter.SingleToInt32Bits(z))) { Console.WriteLine("SSE And failed on float:"); foreach (var item in floatTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); testResult = Fail; } } } return(testResult); }
private static void AbsSse(ReadOnlySpan <float> a, Span <float> s) { var remainder = a.Length & 3; var length = a.Length - remainder; var mask = Sse2.ShiftRightLogical(Vector128.Create(-1), 1).AsSingle(); fixed(float *ptr = a) { fixed(float *ptrS = s) { for (var i = 0; i < length; i += 4) { var j = Sse.LoadVector128(ptr + i); Sse.Store(ptrS + i, Sse.And(mask, j)); } } } if (remainder != 0) { AbsNaive(a, s, length, a.Length); } }
public void RunStructFldScenario(SimpleBinaryOpTest__AndSingle testClass) { var result = Sse.And(_fld1, _fld2); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); }
public void RunClassFldScenario() { var result = Sse.And(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunClassLclFldScenario() { var test = new SimpleBinaryOpTest__AndSingle(); var result = Sse.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { var test = TestStruct.Create(); var result = Sse.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClassFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); var result = Sse.And(_fld1, _fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { var left = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr); var result = Sse.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { var left = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)); var right = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public static Vector4Single And(Vector4SingleParam1_3 left, Vector4SingleParam1_3 right) { if (Sse.IsSupported) { return(Sse.And(left, right)); } return(SoftwareFallbacks.And_Software(left, right)); }
public static Vector128 <float> VectorZero32_128(Vector128 <float> vector) { if (Sse.IsSupported) { return(Sse.And(vector, Zero32128Mask)); } throw new PlatformNotSupportedException(); }
public static VectorF And(VectorFParam1_3 left, VectorFParam1_3 right) { if (Sse.IsSupported) { return(Sse.And(left, right)); } return(And_Software(left, right)); }
public static VectorF CrossProduct3D(VectorFParam1_3 left, VectorFParam1_3 right) { if (Sse.IsSupported) { #region Comments /* Cross product of A(x, y, z, _) and B(x, y, z, _) is * 0 1 2 3 0 1 2 3 * * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)' * 1 2 1 2 1 2 * So we can do (Ay, Az, Ax, _) * (Bz, Bx, By, _) (last elem is irrelevant, as this is for Vector3) * which leaves us with a of the first subtraction element for each (marked 1 above) * Then we repeat with the right hand of subtractions (Az, Ax, Ay, _) * (By, Bz, Bx, _) * which leaves us with the right hand sides (marked 2 above) * Then we subtract them to get the correct vector * We then mask out W to zero, because that is required for the Vector3 representation * * We perform the first 2 multiplications by shuffling the vectors and then multiplying them * Helpers.Shuffle is the same as the C++ macro _MM_SHUFFLE, and you provide the order you wish the elements * to be in *reversed* (no clue why), so here (3, 0, 2, 1) means you have the 2nd elem (1, 0 indexed) in the first slot, * the 3rd elem (2) in the next one, the 1st elem (0) in the next one, and the 4th (3, W/_, unused here) in the last reg */ #endregion /* * lhs1 goes from x, y, z, _ to y, z, x, _ * rhs1 goes from x, y, z, _ to z, x, y, _ */ VectorF leftHandSide1 = Sse.Shuffle(left, left, Shuffle(3, 0, 2, 1)); VectorF rightHandSide1 = Sse.Shuffle(right, right, Shuffle(3, 1, 0, 2)); /* * lhs2 goes from x, y, z, _ to z, x, y, _ * rhs2 goes from x, y, z, _ to y, z, x, _ */ VectorF leftHandSide2 = Sse.Shuffle(left, left, Shuffle(3, 1, 0, 2)); VectorF rightHandSide2 = Sse.Shuffle(right, right, Shuffle(3, 0, 2, 1)); VectorF mul1 = Sse.Multiply(leftHandSide1, rightHandSide1); VectorF mul2 = Sse.Multiply(leftHandSide2, rightHandSide2); VectorF resultNonMaskedW = Sse.Subtract(mul1, mul2); return(Sse.And(resultNonMaskedW, MaskWToZero)); // TODO reuse vectors (minimal register usage) - potentially prevent any stack spilling } return(CrossProduct3D_Software(left, right)); }
public void RunClassLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario)); var test = new SimpleBinaryOpTest__AndSingle(); var result = Sse.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunStructLclFldScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); var test = TestStruct.Create(); var result = Sse.And(test._fld1, test._fld2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
public void RunClsVarScenario() { var result = Sse.And( _clsVar1, _clsVar2 ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr); }
public void RunBasicScenario_LoadAligned() { var result = Sse.And( Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)), Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { var result = Sse.And( Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); var left = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr); var right = Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr); var result = Sse.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_LoadAligned() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned)); var left = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray1Ptr)); var right = Sse.LoadAlignedVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.And(left, right); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(left, right, _dataTable.outArrayPtr); }
public void RunLclVarScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load)); var op1 = Sse.LoadVector128((Single *)(_dataTable.inArray1Ptr)); var op2 = Sse.LoadVector128((Single *)(_dataTable.inArray2Ptr)); var result = Sse.And(op1, op2); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(op1, op2, _dataTable.outArrayPtr); }
public void RunBasicScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); var result = Sse.And( Unsafe.Read <Vector128 <Single> >(_dataTable.inArray1Ptr), Unsafe.Read <Vector128 <Single> >(_dataTable.inArray2Ptr) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); }
public static f32 Ceil_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToPositiveInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(fval, a); return(Sse.Add(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public static f32 Floor_f32(f32 a) { if (Sse41.IsSupported) { return(Sse41.RoundToNegativeInfinity(a)); } else { f32 fval = Sse2.ConvertToVector128Single(Sse2.ConvertToVector128Int32WithTruncation(a)); f32 cmp = Sse.CompareLessThan(a, fval); return(Sse.Subtract(fval, Sse.And(cmp, Vector128.Create(1f)))); } }
public void RunStructLclFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load)); var test = TestStruct.Create(); var result = Sse.And( Sse.LoadVector128((Single *)(&test._fld1)), Sse.LoadVector128((Single *)(&test._fld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); }
static float[] SSE2(float *buffer, int channels, int sampleCount) { int channelIndex = 0; float[] levels = new float[channels]; fixed(float *pLevels = levels) { if (channels >= Vector128 <float> .Count) { Vector128 <float> AbsConst = Vector128.Create(int.MaxValue).AsSingle(); do { Vector128 <float> max = Vector128 <float> .Zero; for (int sample = 0; sample < sampleCount; ++sample) { Vector128 <float> tmp = Sse.And(AbsConst, Sse.LoadVector128(buffer + (channels * sample + channelIndex))); max = Sse.Max(max, tmp); } Sse.Store(pLevels + channelIndex, max); channelIndex += Vector128 <float> .Count; }while (channels - channelIndex >= Vector128 <float> .Count); } while (channelIndex < channels) { float max = 0f; for (int sample = 0; sample < sampleCount; ++sample) { float tmp = Math.Abs(buffer[channels * sample + channelIndex]); if (tmp > max) { max = tmp; } } pLevels[channelIndex] = max; channelIndex += 1; } } return(levels); }
public void RunStructFldScenario_Load(SimpleBinaryOpTest__AndSingle testClass) { fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.And( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(testClass._dataTable.outArrayPtr, result); testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); } }
public static Vector128 <float> CrossProduct3D(Vector128 <float> left, Vector128 <float> right) { if (Sse.IsSupported) { /* Cross product of A(x, y, z, _) and B(x, y, z, _) is * 0 1 2 3 0 1 2 3 * * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)' * 1 2 1 2 1 2 * So we can do (Ay, Az, Ax, _) * (Bz, Bx, By, _) (last elem is irrelevant, as this is for Vector3) * which leaves us with a of the first subtraction element for each (marked 1 above) * Then we repeat with the right hand of subtractions (Az, Ax, Ay, _) * (By, Bz, Bx, _) * which leaves us with the right hand sides (marked 2 above) * Then we subtract them to get the correct vector * We then mask out W to zero, because that is required for the Vector3 representation * */ /* * lhs1 goes from x, y, z, _ to y, z, x, _ * rhs1 goes from x, y, z, _ to z, x, y, _ */ Vector128 <float> leftHandSide1 = Sse.Shuffle(left, left, ShuffleValues.YZXW); Vector128 <float> rightHandSide1 = Sse.Shuffle(right, right, ShuffleValues.ZXYW); /* * lhs2 goes from x, y, z, _ to z, x, y, _ * rhs2 goes from x, y, z, _ to y, z, x, _ */ Vector128 <float> leftHandSide2 = Sse.Shuffle(left, left, ShuffleValues.ZXYW); Vector128 <float> rightHandSide2 = Sse.Shuffle(right, right, ShuffleValues.YZXW); Vector128 <float> mul1 = Sse.Multiply(leftHandSide1, rightHandSide1); Vector128 <float> mul2 = Sse.Multiply(leftHandSide2, rightHandSide2); Vector128 <float> resultNonMaskedW = Sse.Subtract(mul1, mul2); return(Sse.And(resultNonMaskedW, SingleConstants.MaskW)); // TODO reuse vectors (minimal register usage) - potentially prevent any stack spilling } return(CrossProduct3D_Software(left, right)); }
public void RunClassFldScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load)); fixed(Vector128 <Single> *pFld1 = &_fld1) fixed(Vector128 <Single> *pFld2 = &_fld2) { var result = Sse.And( Sse.LoadVector128((Single *)(pFld1)), Sse.LoadVector128((Single *)(pFld2)) ); Unsafe.Write(_dataTable.outArrayPtr, result); ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); } }