public static bool AreAllNotEqual(bool[] bools, Vector128 <int> boolVecZeroIsFalseNotZeroIsTrue) { for (var i = 0; i < 4; i++) { if (bools[i] && boolVecZeroIsFalseNotZeroIsTrue.GetElement(i) != 0 || !bools[i] && boolVecZeroIsFalseNotZeroIsTrue.GetElement(i) == 0) { return(false); } } return(true); }
private static void TestVector128C() { Assert.Throws <MarshalDirectiveException>(() => GenericsNative.GetVector128C('0', '1', '2', '3', '4', '5', '6', '7')); Vector128 <char> value2; GenericsNative.GetVector128COut('0', '1', '2', '3', '4', '5', '6', '7', &value2); Vector128 <short> tValue2 = *(Vector128 <short> *) & value2; Assert.Equal(tValue2.GetElement(0), (short)'0'); Assert.Equal(tValue2.GetElement(1), (short)'1'); Assert.Equal(tValue2.GetElement(2), (short)'2'); Assert.Equal(tValue2.GetElement(3), (short)'3'); Assert.Equal(tValue2.GetElement(4), (short)'4'); Assert.Equal(tValue2.GetElement(5), (short)'5'); Assert.Equal(tValue2.GetElement(6), (short)'6'); Assert.Equal(tValue2.GetElement(7), (short)'7'); Assert.Throws <MarshalDirectiveException>(() => GenericsNative.GetVector128COut('0', '1', '2', '3', '4', '5', '6', '7', out Vector128 <char> value3)); Vector128 <char> * value4 = GenericsNative.GetVector128CPtr('0', '1', '2', '3', '4', '5', '6', '7'); Vector128 <short> *tValue4 = (Vector128 <short> *)value4; Assert.Equal(tValue4->GetElement(0), (short)'0'); Assert.Equal(tValue4->GetElement(1), (short)'1'); Assert.Equal(tValue4->GetElement(2), (short)'2'); Assert.Equal(tValue4->GetElement(3), (short)'3'); Assert.Equal(tValue4->GetElement(4), (short)'4'); Assert.Equal(tValue4->GetElement(5), (short)'5'); Assert.Equal(tValue4->GetElement(6), (short)'6'); Assert.Equal(tValue4->GetElement(7), (short)'7'); Assert.Throws <MarshalDirectiveException>(() => GenericsNative.GetVector128CRef('0', '1', '2', '3', '4', '5', '6', '7')); Assert.Throws <MarshalDirectiveException>(() => GenericsNative.AddVector128C(default, default));
internal static unsafe float AbsMaxSse(this ReadOnlySpan <float> array) { const int StepSize = 4; // Vector128<float>.Count; Debug.Assert(array.Length >= StepSize, "Input can't be smaller than the vector size."); // Constant used to get the absolute value of a Vector<float> Vector128 <float> neg = Vector128.Create(-0.0f); int len = array.Length; int rem = len % StepSize; int fit = len - rem; fixed(float *p = array) { Vector128 <float> maxVec = Sse.AndNot(neg, Sse.LoadVector128(p)); for (int i = StepSize; i < fit; i += StepSize) { maxVec = Sse.Max(maxVec, Sse.AndNot(neg, Sse.LoadVector128(p + i))); } if (rem != 0) { maxVec = Sse.Max(maxVec, Sse.AndNot(neg, Sse.LoadVector128(p + len - StepSize))); } maxVec = Sse.Max(maxVec, Sse.Shuffle(maxVec, maxVec, 0b00001110)); maxVec = Sse.Max(maxVec, Sse.Shuffle(maxVec, maxVec, 0b00000001)); return(maxVec.GetElement(0)); } }
//↑をマルチスレッド化 private unsafe long Test17_Intrinsics_SSE41_DotProduct_float_MT(byte[] vs) { long total = 0; int simdLength = Vector128 <int> .Count; int rangeSize = vs.Length / Environment.ProcessorCount; Parallel.ForEach(Partitioner.Create(0, vs.Length, rangeSize), (range) => { long subtotal = 0; int lastIndex = range.Item2 - (range.Item2 - range.Item1) % simdLength; fixed(byte *p = vs) { for (int i = range.Item1; i < lastIndex; i += simdLength) { Vector128 <int> v = Sse41.ConvertToVector128Int32(p + i); var vv = Sse2.ConvertToVector128Single(v); //4要素全てを掛け算(5~8bit目を1)して、足し算した結果を0番目に入れる(1bit目を1) Vector128 <float> dp = Sse41.DotProduct(vv, vv, 0b11110001); //vTotal = Sse.Add(vTotal, dp); subtotal += (long)dp.GetElement(0); } } for (int i = lastIndex; i < range.Item2; i++) { subtotal += vs[i] * vs[i]; } System.Threading.Interlocked.Add(ref total, subtotal); }); return(total); }
private static bool TestSse2X64ConvertScalarToVector128Double() { Vector128 <double> val = Sse2.X64.ConvertScalarToVector128Double(Vector128 <double> .Zero, long.MaxValue); double result = val.GetElement(0); return(AreEqual(0x43E0000000000000, BitConverter.DoubleToInt64Bits(result))); }
private static bool TestSseX64ConvertScalarToVector128Single() { Vector128 <float> val = Sse.X64.ConvertScalarToVector128Single(Vector128 <float> .Zero, long.MaxValue); float result = val.GetElement(0); return(AreEqual(0x5F000000, BitConverter.SingleToInt32Bits(result))); }
private static bool TestSse41X64Insert_UInt64() { Vector128 <ulong> val = Sse41.X64.Insert(Vector128 <ulong> .Zero, ulong.MaxValue, 0); ulong result = val.GetElement(0); return(AreEqual(ulong.MaxValue, result)); }
private static bool TestSse2X64ConvertScalarToVector128UInt64() { Vector128 <ulong> val = Sse2.X64.ConvertScalarToVector128UInt64(ulong.MaxValue); ulong result = val.GetElement(0); return(AreEqual(ulong.MaxValue, result)); }
public static unsafe float PercentageDifference(byte[] img1, byte[] img2) { Debug.Assert(img1.Length == img2.Length, "Images must be of the same size"); long diff = 0; if (Sse2.IsSupported) { Vector128 <ushort> vec = Vector128 <ushort> .Zero; Span <Vector128 <byte> > vImg1 = MemoryMarshal.Cast <byte, Vector128 <byte> >(img1); Span <Vector128 <byte> > vImg2 = MemoryMarshal.Cast <byte, Vector128 <byte> >(img2); for (int i = 0; i < vImg1.Length; i++) { vec = Sse2.Add(vec, Sse2.SumAbsoluteDifferences(vImg2[i], vImg1[i])); } for (int i = 0; i < Vector128 <ushort> .Count; i++) { diff += Math.Abs(vec.GetElement(i)); } } else { for (var i = 0; i < img1.Length; i++) { diff += Math.Abs(img1[i] - img2[i]); } } return((float)diff / 512); }
internal static unsafe (UnsafeMemory <BitState> bits, bool isValidBinary) ToBitStates(ReadOnlySpan <byte> valueText, BitAllocator bitAlloc) { UnsafeMemory <BitState> bitsMem = bitAlloc.GetBits(valueText.Length); Span <BitState> bits = bitsMem.Span; ulong isValidBinary = 0; int index = 0; if (Ssse3.IsSupported && bits.Length >= Vector128 <byte> .Count) { int vecBitCount = bits.Length / Vector128 <byte> .Count; fixed(BitState *bitsPtr = bits) { fixed(byte *textPtr = valueText) { Vector128 <ulong> isValidBin = Vector128 <ulong> .Zero; for (; index < vecBitCount; index++) { var charText = Avx.LoadVector128(textPtr + index * Vector128 <byte> .Count); var byteText = Avx.Shuffle(charText, shuffleIdxs); var firstBit = Avx.And(onlyFirstBit, Avx.Or(byteText, Avx.ShiftRightLogical(byteText.AsInt32(), 1).AsByte())); var secondBit = Avx.And(onlySecondBit, Avx.ShiftRightLogical(byteText.AsInt32(), 5).AsByte()); var bytesAsBitStates = Avx.Or(firstBit, secondBit); Avx.Store((byte *)bitsPtr + bits.Length - (index + 1) * Vector128 <byte> .Count, bytesAsBitStates); isValidBin = Avx.Or(isValidBin, secondBit.AsUInt64()); } isValidBinary = isValidBin.GetElement(0) | isValidBin.GetElement(1); } } index *= Vector128 <byte> .Count; } for (; index < bits.Length; index++) { BitState bit = ToBitState(valueText[index]); bits[bits.Length - index - 1] = bit; isValidBinary |= (uint)bit & 0b10; } return(bitsMem, isValidBinary == 0); }
private static T MinVector128 <T>(this Vector128 <T> min, int size) where T : unmanaged { var j = 0; var x = min.GetElement(0); while (j < size) { var val = min.GetElement(j); var comparer = Comparer.Default; if (comparer.Compare(x, val) > 0) { x = val; } j++; } return(x); }
public static Vector128 <float> Shuffle_Software(Vector128 <float> left, Vector128 <float> right, byte control) { const byte e0Mask = 0b_0000_0011, e1Mask = 0b_0000_1100, e2Mask = 0b_0011_0000, e3Mask = 0b_1100_0000; int e0Selector = control & e0Mask; float e0 = left.GetElement(e0Selector); int e1Selector = (control & e1Mask) >> 2; float e1 = left.GetElement(e1Selector); int e2Selector = (control & e2Mask) >> 4; float e2 = right.GetElement(e2Selector); int e3Selector = (control & e3Mask) >> 6; float e3 = right.GetElement(e3Selector); return(Vector128.Create(e0, e1, e2, e3)); }
internal static T SumVector(Vector128 <T> a) { var sum = default(T); for (var i = 0; i < Vector128 <T> .Count; i++) { sum = MathUnsafe <T> .Add(sum, a.GetElement(i)); } return(sum); }
public static byte Vector128_Create_byte(byte a) { Vector128 <byte> x = default; for (int i = 0; i < 1; i++) { x = Vector128.Create(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, Inline(a)); } return(x.GetElement(15)); }
public static float Vector128_Create_float(float a) { Vector128 <float> x = default; for (int i = 0; i < 1; i++) { x = Vector128.Create(1, 2, 3, Inline(a)); } return(x.GetElement(3)); }
public void RunBasicScenario(int imm = 0, bool expectedOutOfRangeException = false) { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario)); UInt64[] values = new UInt64[ElementCount]; for (int i = 0; i < ElementCount; i++) { values[i] = TestLibrary.Generator.GetUInt64(); } Vector128 <UInt64> value = Vector128.Create(values[0], values[1]); bool succeeded = !expectedOutOfRangeException; try { UInt64 result = value.GetElement(imm); ValidateGetResult(result, values); } catch (ArgumentOutOfRangeException) { succeeded = expectedOutOfRangeException; } if (!succeeded) { TestLibrary.TestFramework.LogInformation($"Vector128<UInt64.GetElement({imm}): {nameof(RunBasicScenario)} failed to throw ArgumentOutOfRangeException."); TestLibrary.TestFramework.LogInformation(string.Empty); Succeeded = false; } succeeded = !expectedOutOfRangeException; UInt64 insertedValue = TestLibrary.Generator.GetUInt64(); try { Vector128 <UInt64> result2 = value.WithElement(imm, insertedValue); ValidateWithResult(result2, values, insertedValue); } catch (ArgumentOutOfRangeException) { succeeded = expectedOutOfRangeException; } if (!succeeded) { TestLibrary.TestFramework.LogInformation($"Vector128<UInt64.WithElement({imm}): {nameof(RunBasicScenario)} failed to throw ArgumentOutOfRangeException."); TestLibrary.TestFramework.LogInformation(string.Empty); Succeeded = false; } }
public void RunBasicScenario(int imm = 15, bool expectedOutOfRangeException = false) { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario)); SByte[] values = new SByte[ElementCount]; for (int i = 0; i < ElementCount; i++) { values[i] = TestLibrary.Generator.GetSByte(); } Vector128 <SByte> value = Vector128.Create(values[0], values[1], values[2], values[3], values[4], values[5], values[6], values[7], values[8], values[9], values[10], values[11], values[12], values[13], values[14], values[15]); bool succeeded = !expectedOutOfRangeException; try { SByte result = value.GetElement(imm); ValidateGetResult(result, values); } catch (ArgumentOutOfRangeException) { succeeded = expectedOutOfRangeException; } if (!succeeded) { TestLibrary.TestFramework.LogInformation($"Vector128<SByte.GetElement({imm}): {nameof(RunBasicScenario)} failed to throw ArgumentOutOfRangeException."); TestLibrary.TestFramework.LogInformation(string.Empty); Succeeded = false; } succeeded = !expectedOutOfRangeException; SByte insertedValue = TestLibrary.Generator.GetSByte(); try { Vector128 <SByte> result2 = value.WithElement(imm, insertedValue); ValidateWithResult(result2, values, insertedValue); } catch (ArgumentOutOfRangeException) { succeeded = expectedOutOfRangeException; } if (!succeeded) { TestLibrary.TestFramework.LogInformation($"Vector128<SByte.WithElement({imm}): {nameof(RunBasicScenario)} failed to throw ArgumentOutOfRangeException."); TestLibrary.TestFramework.LogInformation(string.Empty); Succeeded = false; } }
/// <summary> /// Calculate "distance" of cloud at determined pose /// TODO - It's actually slower than SISD. Need more parallelism. /// </summary> /// <param name="cloud">Cloud of points</param> /// <param name="pose">Pose of cloud</param> /// <returns></returns> private int CalculateDistanceSSE41(ScanCloud cloud, Vector3 pose) { int nb_points = 0; long sum = 0; float px = pose.X * HoleMap.Scale; float py = pose.Y * HoleMap.Scale; float c = MathF.Cos(pose.Z) * HoleMap.Scale; float s = MathF.Sin(pose.Z) * HoleMap.Scale; Vector128 <float> sincos = Vector128.Create(c, -s, s, c); Vector128 <float> posxy = Vector128.Create(px, py, px, py); // Translate and rotate scan to robot position and compute the "distance" for (int i = 0; i < cloud.Points.Count; i++) { Vector128 <float> xy = Vector128.Create(cloud.Points[i].X, cloud.Points[i].Y, cloud.Points[i].X, cloud.Points[i].Y); xy = Sse41.Multiply(sincos, xy); xy = Sse41.HorizontalAdd(xy, xy); xy = Sse41.Add(xy, posxy); xy = Sse41.RoundToNearestInteger(xy); int x = (int)xy.GetElement(0); int y = (int)xy.GetElement(1); // Check boundaries if ((x >= 0) && (x < HoleMap.Size) && (y >= 0) && (y < HoleMap.Size)) { sum += HoleMap.Pixels[y * HoleMap.Size + x]; nb_points++; } } if (nb_points > 0) { return((int)((sum * 1024) / cloud.Points.Count)); } else { return(int.MaxValue); } }
static int Main() { ushort value = TestLibrary.Generator.GetUInt16(); Vector128 <ushort> result = CreateScalar(value); if (result.GetElement(0) != value) { return(0); } for (int i = 1; i < Vector128 <ushort> .Count; i++) { if (result.GetElement(i) != 0) { return(0); } } return(100); }
// Checks that the values in v correspond to those in the values array starting // with values[index] private void checkValues(Vector128 <T> v, int index) { for (int i = 0; i < Vector128 <T> .Count; i++) { if (!CheckValue <T>(v.GetElement(i), values[index])) { isPassing = false; } index++; } }
static Vector128 <sbyte> SoftwareFallback(Vector128 <sbyte> left, Vector128 <sbyte> right) { Vector128 <sbyte> result = default; for (var i = 0; i < Vector128 <sbyte> .Count; i++) { result = result.WithElement(i, left.GetElement(i) == right.GetElement(i) ? (sbyte)-1 : (sbyte)0); } return(result); }
public static unsafe Vector128 <T> ForEach <T>(Vector128 <T> vector, Func <T, T> transform) where T : unmanaged { T *pool = stackalloc T[Vector128 <T> .Count]; for (var i = 0; i < Vector128 <T> .Count; i++) { pool[i] = transform(vector.GetElement(i)); } return(Unsafe.Read <Vector128 <T> >(pool)); }
public static bool PerElemCheck(Vector128 <float> a, Vector128 <float> b, Vector128 <float> c, Func <float, float, float, bool> check) { for (var i = 0; i < Vector128 <float> .Count; i++) { if (!check(a.GetElement(i), b.GetElement(i), c.GetElement(i))) { return(false); } } return(true); }
public static T Sum <T>(this Vector128 <T> vector) where T : unmanaged { var sum = default(T); for (var i = 0; i < Vector128 <T> .Count; i++) { sum = MathUnsafe <T> .Add(sum, vector.GetElement(i)); } return(sum); }
unsafe static bool ValidateResult_Vector128 <T>(Vector128 <T> result, Vector128 <T> expectedElementValue) where T : unmanaged { var succeeded = true; for (var i = 0; i < (16 / sizeof(T)); i++) { if (!result.GetElement(i).Equals(expectedElementValue.GetElement(i))) { succeeded = false; } } return(succeeded); }
public void Exp() { float accuracy = 4E-6F; // error of up to 4E-6 expected for 4th order polynomial float precision = 5E-7F; // numerical error // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 float[] values = new float[] { -10.0F, -5.0F, -4.0F, -3.0F, -2.0F, -1.5F, -1.33F, -1.0F, -0.5F, 0.0F, 0.5F, 1.0F, 1.33F, 1.5F, 2.0F, 2.5F, 3.0F, 4.0F, 5.0F, 10.0F }; for (int index = 0; index < values.Length; ++index) { float value = values[index]; float exp = MathV.Exp(value); float exp10 = MathV.Exp10(value); float exp2 = MathV.Exp2(value); double expError = 1.0 - exp / Math.Exp(value); double exp10Error = 1.0 - exp10 / Math.Pow(10.0, value); double exp2Error = 1.0 - exp2 / Math.Pow(2.0, value); double tolerance = accuracy * Math.Abs(value) + precision; Assert.IsTrue(Math.Abs(expError) < tolerance); Assert.IsTrue(Math.Abs(exp10Error) < tolerance); Assert.IsTrue(Math.Abs(exp2Error) < tolerance); } for (int quadIndex = 0; quadIndex < values.Length; quadIndex += 4) { Vector128 <float> value = Vector128.Create(values[quadIndex], values[quadIndex + 1], values[quadIndex + 2], values[quadIndex + 3]); Vector128 <float> exp = MathV.Exp(value); Vector128 <float> exp10 = MathV.Exp10(value); Vector128 <float> exp2 = MathV.Exp2(value); for (int scalarIndex = 0; scalarIndex < 4; ++scalarIndex) { float scalarValue = value.GetElement(scalarIndex); float scalarExp = exp.GetElement(scalarIndex); float scalarExp10 = exp10.GetElement(scalarIndex); float scalarExp2 = exp2.GetElement(scalarIndex); double expError = 1.0 - scalarExp / Math.Exp(scalarValue); double exp10Error = 1.0 - scalarExp10 / Math.Pow(10.0, scalarValue); double exp2Error = 1.0 - scalarExp2 / Math.Pow(2.0, scalarValue); double tolerance = accuracy * Math.Abs(scalarValue) + precision; Assert.IsTrue(Math.Abs(expError) < tolerance); Assert.IsTrue(Math.Abs(exp10Error) < tolerance); Assert.IsTrue(Math.Abs(exp2Error) < tolerance); } } }
public static bool AreApproxEqual(Vector128 <float> left, Vector128 <float> right, float tolerance) { for (int i = 0; i < Vector128 <float> .Count; i++) { var l = left.GetElement(i); var r = right.GetElement(i); var diff = MathF.Abs(l - r); if (diff < tolerance || l.Equals(r)) { continue; } } return(true); }
public static Vector128 <float> Add_Software(Vector128 <float> left, Vector128 <float> right) { //return Vector128.Create( // X(left) + X(right), // Y(left) + Y(right), // Z(left) + Z(right), // W(left) + W(right) //); Vector128 <float> result = default; for (var i = 0; i < Vector128 <float> .Count; i++) { result = result.WithElement(i, left.GetElement(i) + right.GetElement(i)); } return(result); }
// Checks that the values in v correspond to those in the values array starting // with values[index] private void checkValues(string msg, Vector128 <T> v, int index) { bool printedMsg = false; // Print at most one message for (int i = 0; i < Vector128 <T> .Count; i++) { if (!CheckValue <T>(v.GetElement(i), values[index])) { if (!printedMsg) { Console.WriteLine("{0}: FAILED - Vector64<T> checkValues(index = {1}, i = {2}) {3}", msg, index, i, isReflection ? "(via reflection)" : ""); printedMsg = true; } // Record failure status in global isPassing isPassing = false; } index++; } }
public OpCodeT32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { int imm8 = (opCode >> 0) & 0xff; int imm3 = (opCode >> 12) & 7; int imm1 = (opCode >> 26) & 1; int imm12 = imm8 | (imm3 << 8) | (imm1 << 11); if ((imm12 >> 10) == 0) { Immediate = imm8 * _factor.GetElement((imm12 >> 8) & 3); IsRotated = false; } else { int shift = imm12 >> 7; Immediate = BitUtils.RotateRight(0x80 | (imm12 & 0x7f), shift, 32); IsRotated = shift != 0; } }