static unsafe int Main(string[] args) { int testResult = Pass; if (Sse41.IsSupported) { using (TestTable <int, int, long> intTable = new TestTable <int, int, long>(new int[4] { 1, -5, 100, 0 }, new int[4] { 22, -1, -50, 0 }, new long[2])) { var vi1 = Unsafe.Read <Vector128 <int> >(intTable.inArray1Ptr); var vi2 = Unsafe.Read <Vector128 <int> >(intTable.inArray2Ptr); var vi3 = Sse41.Multiply(vi1, vi2); Unsafe.Write(intTable.outArrayPtr, vi3); for (int i = 0; i < intTable.outArray.Length; i++) { if (intTable.inArray1[i * 2] * intTable.inArray2[i * 2] != intTable.outArray[i]) { Console.WriteLine("SSE4.1 Multiply failed on int:"); foreach (var item in intTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); return(Fail); } } } } return(testResult); }
/// <summary> /// Calculate "distance" of cloud at determined pose /// TODO - It's actually slower than SISD. Need more parallelism. /// </summary> /// <param name="cloud">Cloud of points</param> /// <param name="pose">Pose of cloud</param> /// <returns></returns> private int CalculateDistanceSSE41(ScanCloud cloud, Vector3 pose) { int nb_points = 0; long sum = 0; float px = pose.X * HoleMap.Scale; float py = pose.Y * HoleMap.Scale; float c = MathF.Cos(pose.Z) * HoleMap.Scale; float s = MathF.Sin(pose.Z) * HoleMap.Scale; Vector128 <float> sincos = Vector128.Create(c, -s, s, c); Vector128 <float> posxy = Vector128.Create(px, py, px, py); // Translate and rotate scan to robot position and compute the "distance" for (int i = 0; i < cloud.Points.Count; i++) { Vector128 <float> xy = Vector128.Create(cloud.Points[i].X, cloud.Points[i].Y, cloud.Points[i].X, cloud.Points[i].Y); xy = Sse41.Multiply(sincos, xy); xy = Sse41.HorizontalAdd(xy, xy); xy = Sse41.Add(xy, posxy); xy = Sse41.RoundToNearestInteger(xy); int x = (int)xy.GetElement(0); int y = (int)xy.GetElement(1); // Check boundaries if ((x >= 0) && (x < HoleMap.Size) && (y >= 0) && (y < HoleMap.Size)) { sum += HoleMap.Pixels[y * HoleMap.Size + x]; nb_points++; } } if (nb_points > 0) { return((int)((sum * 1024) / cloud.Points.Count)); } else { return(int.MaxValue); } }
public static Vector128 <long> _mm_mul_epi32(Vector128 <int> left, Vector128 <int> right) { return(Sse41.Multiply(left, right)); }