static unsafe int Main(string[] args)
        {
            int testResult = Pass;

            if (Sse3.IsSupported)
            {
                using (TestTable <double> doubleTable = new TestTable <double>(new double[2] {
                    1, -5
                }, new double[4]))
                {
                    var vf = Sse3.LoadAndDuplicateToVector128((double *)(doubleTable.inArrayPtr));
                    Unsafe.Write(doubleTable.outArrayPtr, vf);

                    if (BitConverter.DoubleToInt64Bits(doubleTable.inArray[0]) != BitConverter.DoubleToInt64Bits(doubleTable.outArray[0]) ||
                        BitConverter.DoubleToInt64Bits(doubleTable.inArray[0]) != BitConverter.DoubleToInt64Bits(doubleTable.outArray[1]))
                    {
                        Console.WriteLine("Sse3 LoadAndDuplicateToVector128 failed on double:");
                        foreach (var item in doubleTable.outArray)
                        {
                            Console.Write(item + ", ");
                        }
                        Console.WriteLine();
                        testResult = Fail;
                    }
                }
            }

            return(testResult);
        }
Example #2
0
 public static unsafe Vector128 <double> _mm_loaddup_pd(double *address)
 {
     return(Sse3.LoadAndDuplicateToVector128(address));
 }
Example #3
0
        public static void AdvanceStatic(NBody *nBodyPtr, double distance = 0.01d)
        {
            Vector128 <double> disV = Sse2.SetAllVector128(distance);

            double *
                xPtr  = nBodyPtr->XPtr,
                yPtr  = nBodyPtr->YPtr,
                zPtr  = nBodyPtr->ZPtr,
                vxPtr = nBodyPtr->VxPtr,
                vyPtr = nBodyPtr->VyPtr,
                vzPtr = nBodyPtr->VzPtr,
                mPtr  = nBodyPtr->MPtr;

            double *
                gxPtr  = nBodyPtr->XPtr,
                gyPtr  = nBodyPtr->YPtr,
                gzPtr  = nBodyPtr->ZPtr,
                gvxPtr = nBodyPtr->VxPtr,
                gvyPtr = nBodyPtr->VyPtr,
                gvzPtr = nBodyPtr->VzPtr,
                gmPtr  = nBodyPtr->MPtr;

            for (int i = 0, k = 2; i < 6; i++, k++, mPtr++, vxPtr++, vyPtr++, vzPtr++, xPtr++, yPtr++, zPtr++)
            {
                var mV  = Sse3.LoadAndDuplicateToVector128(mPtr);
                var xV  = Sse3.LoadAndDuplicateToVector128(xPtr);
                var yV  = Sse3.LoadAndDuplicateToVector128(yPtr);
                var zV  = Sse3.LoadAndDuplicateToVector128(zPtr);
                var vxV = Sse3.LoadAndDuplicateToVector128(vxPtr);
                var vyV = Sse3.LoadAndDuplicateToVector128(vyPtr);
                var vzV = Sse3.LoadAndDuplicateToVector128(vzPtr);

                for (int j = (k - k % 2); j < 6; j += 2)
                {
                    var xxV = Sse2.LoadAlignedVector128(gxPtr + j);
                    var yyV = Sse2.LoadAlignedVector128(gyPtr + j);
                    var zzV = Sse2.LoadAlignedVector128(gzPtr + j);

                    xxV = Sse2.Multiply(xxV, xV);
                    yyV = Sse2.Multiply(yyV, yV);
                    zzV = Sse2.Multiply(zzV, zV);

                    var mag = Get2D(xxV, yyV, zzV);
                    mag = Sse2.Multiply(Reciprocal(mag), ReciprocalSqrt(mag));
                    mag = Sse2.Multiply(mag, disV);

                    var imag = Sse2.Multiply(mV, mag);
                    Sse2.StoreAligned(gvxPtr + j, Sse2.Subtract(Sse2.LoadAlignedVector128(gvxPtr + j), Sse2.Multiply(imag, xxV)));
                    Sse2.StoreAligned(gvyPtr + j, Sse2.Subtract(Sse2.LoadAlignedVector128(gvyPtr + j), Sse2.Multiply(imag, yyV)));
                    Sse2.StoreAligned(gvzPtr + j, Sse2.Subtract(Sse2.LoadAlignedVector128(gvzPtr + j), Sse2.Multiply(imag, zzV)));

                    var jmag = Sse2.Multiply(Sse2.LoadAlignedVector128(gmPtr + j), mag);
                    vxV = Sse2.Add(vxV, Sse2.Multiply(jmag, xxV));
                    vyV = Sse2.Add(vyV, Sse2.Multiply(jmag, yyV));
                    vzV = Sse2.Add(vzV, Sse2.Multiply(jmag, zzV));
                } // inner loop

                Sse2.StoreScalar(vxPtr, Sse3.HorizontalAdd(vxV, vxV));
                Sse2.StoreScalar(vxPtr, Sse3.HorizontalAdd(vxV, vxV));
                Sse2.StoreScalar(vxPtr, Sse3.HorizontalAdd(vxV, vxV));

                xV = Sse2.Add(xV, Sse2.Multiply(vxV, disV));
                yV = Sse2.Add(yV, Sse2.Multiply(vyV, disV));
                zV = Sse2.Add(zV, Sse2.Multiply(vzV, disV));

                Sse2.StoreScalar(xPtr, Sse3.HorizontalAdd(xV, xV));
                Sse2.StoreScalar(yPtr, Sse3.HorizontalAdd(yV, yV));
                Sse2.StoreScalar(zPtr, Sse3.HorizontalAdd(zV, zV));

                if (i == 0)
                {
                    i++;
                }
            } // outer loop

            //*(xPtr - 1) += (*(vxPtr - 1) * distance);
            //*(yPtr - 1) += (*(vyPtr - 1) * distance);
            //*(zPtr - 1) += (*(vzPtr - 1) * distance);
        }