Exemplo n.º 1
0
        //↑をマルチスレッド化
        //Intrinsics FMA MultiplyAdd double
        private unsafe long Test14_Intrinsics_FMA_MultiplyAdd_double_MT(byte[] vs)
        {
            long total      = 0;
            int  simdLength = Vector128 <int> .Count;
            int  rangeSize  = vs.Length / Environment.ProcessorCount;

            Parallel.ForEach(Partitioner.Create(0, vs.Length, rangeSize),
                             (range) =>
            {
                long subtotal             = 0;
                int lastIndex             = range.Item2 - (range.Item2 - range.Item1) % simdLength;
                Vector256 <double> vTotal = Vector256.Create(0d);
                fixed(byte *p             = vs)
                {
                    for (int i = range.Item1; i < lastIndex; i += simdLength)
                    {
                        Vector128 <int> v    = Avx2.ConvertToVector128Int32(p + i);
                        Vector256 <double> f = Avx.ConvertToVector256Double(v);
                        vTotal = Fma.MultiplyAdd(f, f, vTotal);    //float
                    }
                }
                double *pp = stackalloc double[Vector256 <double> .Count];
                Avx.Store(pp, vTotal);
                for (int i = 0; i < Vector256 <double> .Count; i++)
                {
                    subtotal += (long)pp[i];
                }
                for (int i = lastIndex; i < range.Item2; i++)
                {
                    subtotal += vs[i] * vs[i];
                }
                System.Threading.Interlocked.Add(ref total, subtotal);
            });
            return(total);
        }
Exemplo n.º 2
0
        public void RunFldScenario()
        {
            var result = Avx.ConvertToVector256Double(_fld);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld, _dataTable.outArrayPtr);
        }
Exemplo n.º 3
0
        //Intrinsics FMA MultiplyAdd double
        private unsafe long Test4_Intrinsics_FMA_MultiplyAdd_double(byte[] vs)
        {
            long total                = 0;
            int  simdLength           = Vector128 <int> .Count;
            int  lastIndex            = vs.Length - (vs.Length % simdLength);
            Vector256 <double> vTotal = Vector256.Create(0d);

            fixed(byte *p = vs)
            {
                for (int i = 0; i < lastIndex; i += simdLength)
                {
                    Vector128 <int>    v = Sse41.ConvertToVector128Int32(p + i);
                    Vector256 <double> f = Avx.ConvertToVector256Double(v);
                    vTotal = Fma.MultiplyAdd(f, f, vTotal);//double
                }
            }

            double *pp = stackalloc double[Vector256 <double> .Count];

            Avx.Store(pp, vTotal);
            for (int i = 0; i < Vector256 <double> .Count; i++)
            {
                total += (long)pp[i];
            }
            for (int i = lastIndex; i < vs.Length; i++)
            {
                total += vs[i] * vs[i];
            }
            return(total);
        }
Exemplo n.º 4
0
        private unsafe void Test2_Vector256Double(byte[] x, byte[] y, byte[] z, byte[] xx, byte[] yy, byte[] zz, double[] result)
        {
            Parallel.ForEach(Partitioner.Create(0, x.Length), range =>
            {
                int simdLength = Vector256 <double> .Count;
                int lastIndex  = range.Item2 - (range.Item2 - range.Item1) % simdLength;
                Vector256 <double> vx, vy, vz, vm;
                fixed(byte *px = x, py = y, pz = z, pxx = xx, pyy = yy, pzz = zz)
                {
                    fixed(double *dp = result)
                    {
                        for (int i = range.Item1; i < range.Item2; i += simdLength)
                        {
                            //引き算
                            vx = Avx.Subtract(
                                Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(px + i)),
                                Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(pxx + i)));
                            vy = Avx.Subtract(
                                Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(py + i)),
                                Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(pyy + i)));
                            vz = Avx.Subtract(
                                Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(pz + i)),
                                Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(pzz + i)));

                            //2乗和の平方根
                            vm = Avx.Add(Avx.Multiply(vx, vx), Avx.Multiply(vy, vy));
                            vm = Avx.Sqrt(Avx.Add(vm, Avx.Multiply(vz, vz)));

                            //結果を配列に書き込み
                            Avx.Store(dp + i, vm);
                        }
                    }
                }
            });
        }
Exemplo n.º 5
0
        public void RunLclFldScenario()
        {
            var test   = new SimpleUnaryOpTest__ConvertToVector256DoubleSingle();
            var result = Avx.ConvertToVector256Double(test._fld);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld, _dataTable.outArrayPtr);
        }
Exemplo n.º 6
0
        public void RunLclVarScenario_LoadAligned()
        {
            var firstOp = Sse.LoadAlignedVector128((Single *)(_dataTable.inArrayPtr));
            var result  = Avx.ConvertToVector256Double(firstOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, _dataTable.outArrayPtr);
        }
Exemplo n.º 7
0
        public void RunLclVarScenario_UnsafeRead()
        {
            var firstOp = Unsafe.Read <Vector128 <Single> >(_dataTable.inArrayPtr);
            var result  = Avx.ConvertToVector256Double(firstOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, _dataTable.outArrayPtr);
        }
Exemplo n.º 8
0
        public void RunBasicScenario_LoadAligned()
        {
            var result = Avx.ConvertToVector256Double(
                Sse.LoadAlignedVector128((Single *)(_dataTable.inArrayPtr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
        }
Exemplo n.º 9
0
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Avx.ConvertToVector256Double(
                Unsafe.Read <Vector128 <Single> >(_dataTable.inArrayPtr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
        }
Exemplo n.º 10
0
        //4倍速、コンバーターを使ってVector作成
        private unsafe void Test6(byte[] vs)
        {
            int simdLength = Vector256 <double> .Count;
            int lastIndex  = vs.Length - (vs.Length % simdLength);

            fixed(byte *p = vs)
            {
                for (int i = 0; i < lastIndex; i += simdLength)
                {
                    _ = Avx.Sqrt(Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(p)));
                }
            }
        }
Exemplo n.º 11
0
        private unsafe void TestAddSum(byte[] vs)
        {
            fixed(byte *p = vs)
            {
                var v  = Avx.LoadVector256(p);
                var v2 = Avx.LoadVector256(p + 32);
                //Avx.MultipleSumAbsoluteDifferences;
                Vector256 <int>   i1 = Avx2.ConvertToVector256Int32(p);
                Vector256 <float> f1 = Avx.ConvertToVector256Single(i1);
                Vector256 <float> m1 = Avx.Multiply(f1, f1);

                Vector128 <int>    i128 = Sse41.ConvertToVector128Int32(p);
                Vector256 <double> d256 = Avx.ConvertToVector256Double(i128);
                var dZero = Vector256 <double> .Zero;
                Vector256 <double> ma1 = Fma.MultiplyAdd(d256, d256, dZero);

                var i256  = Avx2.ConvertToVector256Int32(p);
                var f256  = Avx.ConvertToVector256Single(i256);
                var fZero = Vector256 <float> .Zero;
                var ma2   = Fma.MultiplyAdd(f256, f256, fZero);

                Vector128 <float> s128 = Sse2.ConvertToVector128Single(i128);
                Vector128 <float> ms   = Sse.MultiplyScalar(s128, s128);

//                x86 / x64 SIMD命令一覧表(SSE~AVX2)
//https://www.officedaytime.com/tips/simd.html
                //                pmaddwd
                //https://www.officedaytime.com/tips/simdimg/si.php?f=pmaddwd

                Vector128 <short> sh128 = Sse41.ConvertToVector128Int16(p);
                Vector128 <int>   vv3   = Avx.MultiplyAddAdjacent(sh128, sh128);

                var neko = 0;
                //Avx.MultiplyAddAdjacent;
                //Avx.MultiplyHigh;
                //Avx.MultiplyHighRoundScale;
                //Avx.MultiplyLow;
                //Avx.MultiplyScalar;
                //Fma.MultiplyAdd;
                //Fma.MultiplyAddNegated;
                //Fma.MultiplyAddNegatedScalar;
                //Fma.MultiplyAddScalar;
                //Fma.MultiplyAddSubtract;
                //Fma.MultiplySubtract;
                //Fma.MultiplySubtractAdd;
                //Fma.MultiplySubtractNegated;
                //Fma.MultiplySubtractNegatedScalar;
                //Fma.MultiplySubtractScalar;
            }
        }
Exemplo n.º 12
0
 //12倍速、やっぱりVectorのSqrtは速い
 private unsafe void Test6_MT(byte[] vs)
 {
     Parallel.ForEach(Partitioner.Create(0, ELEMENT_COUNT), range =>
     {
         int simdLength = Vector256 <double> .Count;
         int lastIndex  = range.Item2 - (range.Item2 - range.Item1) % simdLength;
         fixed(byte *p  = vs)
         {
             for (int i = range.Item1; i < range.Item2; i += simdLength)
             {
                 _ = Avx.Sqrt(Avx.ConvertToVector256Double(Sse41.ConvertToVector128Int32(p)));
             }
         }
     });
 }
Exemplo n.º 13
0
    private static unsafe double[] BilinearInterpol_AVX(
        double[] x,
        double[] A,
        double minXA,
        double maxXA,
        double[] B,
        double minXB,
        double maxXB,
        double weightB)
    {
        double[] z = new double[outputVectorSize];

        fixed(double *pX = &x[0], pA = &A[0], pB = &B[0], pZ = &z[0])
        {
            Vector256 <double> vWeightB = Vector256.Create(weightB);
            Vector256 <double> vWeightA = Vector256.Create(1 - weightB);

            Vector256 <double> vMinXA = Vector256.Create(minXA);
            Vector256 <double> vMaxXA = Vector256.Create(maxXA);
            Vector256 <double> vMinXB = Vector256.Create(minXB);
            Vector256 <double> vMaxXB = Vector256.Create(maxXB);

            double             deltaA  = (maxXA - minXA) / (double)(A.Length - 1);
            double             deltaB  = (maxXB - minXB) / (double)(B.Length - 1);
            Vector256 <double> vDeltaA = Vector256.Create(deltaA);
            Vector256 <double> vDeltaB = Vector256.Create(deltaB);

            double             invDeltaA  = 1.0 / deltaA;
            double             invDeltaB  = 1.0 / deltaB;
            Vector256 <double> vInvDeltaA = Vector256.Create(invDeltaA);
            Vector256 <double> vInvDeltaB = Vector256.Create(invDeltaB);

            Vector128 <int> ALengthMinusOne = Vector128.Create(A.Length - 1);
            Vector128 <int> BLengthMinusOne = Vector128.Create(B.Length - 1);
            Vector128 <int> One             = Vector128.Create(1);

            for (var i = 0; i < x.Length; i += Vector256 <double> .Count)
            {
                Vector256 <double> currentX = Avx.LoadVector256(pX + i);

                // Determine the largest a, such that A[i] = f(xA) and xA <= x[i].
                // This involves casting from double to int; here we use a Vector conversion.
                Vector256 <double> aDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXA), vInvDeltaA);
                Vector128 <int>    a       = Avx.ConvertToVector128Int32WithTruncation(aDouble);
                a = Sse41.Min(Sse41.Max(a, Vector128 <int> .Zero), ALengthMinusOne);
                Vector128 <int> aPlusOne = Sse41.Min(Sse2.Add(a, One), ALengthMinusOne);

                // Now, get the reference input, xA, for our index a.
                // This involves casting from  int to double.
                Vector256 <double> xA = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(a), vDeltaA), vMinXA);

                // Now, compute the lambda for our A reference point.
                Vector256 <double> currentXNormA = Avx.Max(vMinXA, Avx.Min(currentX, vMaxXA));
                Vector256 <double> lambdaA       = Avx.Multiply(Avx.Subtract(currentXNormA, xA), vInvDeltaA);

                // Now, we need to load up our reference points using Vector Gather operations.
                Vector256 <double> AVector        = Avx2.GatherVector256(pA, a, 8);
                Vector256 <double> AVectorPlusOne = Avx2.GatherVector256(pA, aPlusOne, 8);

                // Now, do the all of the above for our B reference point.
                Vector256 <double> bDouble = Avx.Multiply(Avx.Subtract(currentX, vMinXB), vInvDeltaB);
                Vector128 <int>    b       = Avx.ConvertToVector128Int32WithTruncation(bDouble);
                b = Sse41.Min(Sse41.Max(b, Vector128 <int> .Zero), BLengthMinusOne);
                Vector128 <int> bPlusOne = Sse41.Min(Sse2.Add(b, One), BLengthMinusOne);

                Vector256 <double> xB            = Avx.Add(Avx.Multiply(Avx.ConvertToVector256Double(b), vDeltaB), vMinXB);
                Vector256 <double> currentXNormB = Avx.Max(vMinXB, Avx.Min(currentX, vMaxXB));
                Vector256 <double> lambdaB       = Avx.Multiply(Avx.Subtract(currentXNormB, xB), vInvDeltaB);

                Vector256 <double> BVector        = Avx2.GatherVector256(pB, b, 8);
                Vector256 <double> BVectorPlusOne = Avx2.GatherVector256(pB, bPlusOne, 8);

                Vector256 <double> newZ = Avx.Add(Avx.Multiply(vWeightA, Avx.Add(AVector, Avx.Multiply(lambdaA, Avx.Subtract(AVectorPlusOne, AVector)))),
                                                  Avx.Multiply(vWeightB, Avx.Add(BVector, Avx.Multiply(lambdaB, Avx.Subtract(BVectorPlusOne, BVector)))));
                Avx.Store(pZ + i, newZ);
            }
        }

        return(z);
    }
Exemplo n.º 14
0
        public unsafe static Vector <double> BitmapToVector(Bitmap bitmap, BitmapChannel channel = BitmapChannel.Gray)
        {
            int width      = bitmap.Width;
            int height     = bitmap.Height;
            int pixelCount = width * height;

            bool needDispose = false;
            bool isGray      = false;

            var rect = new Rectangle(0, 0, width, height);

            int depth = Bitmap.GetPixelFormatSize(bitmap.PixelFormat);

            switch (bitmap.PixelFormat)
            {
            case PixelFormat.Format24bppRgb:
            case PixelFormat.Format32bppArgb:
            case PixelFormat.Format32bppPArgb:
            case PixelFormat.Format32bppRgb:
                break;

            default:
                bitmap      = channel == BitmapChannel.Gray ? MakeGrayscale(bitmap) : MakeColor(bitmap);
                needDispose = true;
                break;
            }

            if (!needDispose && channel == BitmapChannel.Gray)
            {
                bitmap      = MakeGrayscale(bitmap);
                needDispose = true;
                isGray      = true;
            }

            var result = new double[pixelCount];

            var bitmapData = bitmap.LockBits(rect, ImageLockMode.ReadOnly, bitmap.PixelFormat);

            try
            {
                unsafe
                {
                    byte *scan0 = (byte *)bitmapData.Scan0.ToPointer();

                    var ptr = bitmapData.Scan0;
                    int startIndex;

                    switch (depth)
                    {
                    case 8:     // For 8 bpp get color value (Red, Green and Blue values are the same)
                        if (channel == BitmapChannel.Alpha)
                        {
                            break;
                        }

                        for (int y = 0; y < bitmapData.Height; y++)
                        {
                            var rowB = (byte *)bitmapData.Scan0 + (y * bitmapData.Stride);
                            startIndex = y * bitmapData.Width;
                            if (bitmapData.Stride < 0)
                            {
                                startIndex = (pixelCount - bitmapData.Width) - startIndex;
                            }

                            for (int x = 0; x < bitmapData.Width; x++)
                            {
                                result[startIndex + x] = rowB[x];
                            }
                        }

                        PointwiseDivideInPlace(result, 256.0);
                        break;

                    case 16:     // For 16 bpp - gray with 65536 shades
                        if (channel == BitmapChannel.Alpha)
                        {
                            break;
                        }

                        for (int y = 0; y < bitmapData.Height; y++)
                        {
                            var rowS = (short *)bitmapData.Scan0 + (y * bitmapData.Stride);
                            startIndex = y * bitmapData.Width;
                            if (bitmapData.Stride < 0)
                            {
                                startIndex = (pixelCount - bitmapData.Width) - startIndex;
                            }

                            for (int x = 0; x < bitmapData.Width; x++)
                            {
                                result[startIndex + x] = rowS[x];
                            }
                        }

                        PointwiseDivideInPlace(result, 65536.0);
                        break;

                    case 24:     // For 24 bpp get Red, Green and Blue
                    case 32:     // For 32 bpp get Red, Green, Blue and Alpha
                        if (channel == BitmapChannel.Alpha && depth == 24)
                        {
                            break;
                        }

                        int step = depth / 8;

                        if (channel == BitmapChannel.Gray)
                        {
                            if (isGray && UseGrayConverter)
                            {
                                for (int y = 0; y < bitmapData.Height; y++)
                                {
                                    var row3B = (byte *)bitmapData.Scan0.ToPointer() + (y * bitmapData.Stride);
                                    startIndex = y * bitmapData.Width;
                                    if (bitmapData.Stride < 0)
                                    {
                                        startIndex = (pixelCount - bitmapData.Width) - startIndex;
                                    }

                                    for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                    {
                                        result[startIndex + i] = row3B[x];     //In gray image (made with method MakeGray()) R = G = B.
                                    }
                                }

                                PointwiseDivideInPlace(result, 256.0);
                            }
                            else if (UseAvx)
                            {
                                var vectorGrayCoeffAvx = Vector256.Create(0.11d, 0.59d, 0.3d, 0d);

                                for (int y = 0; y < bitmapData.Height; y++)
                                {
                                    var row3B = (byte *)bitmapData.Scan0 + (y * bitmapData.Stride);
                                    startIndex = y * bitmapData.Width;
                                    if (bitmapData.Stride < 0)
                                    {
                                        startIndex = (pixelCount - bitmapData.Width) - startIndex;
                                    }

                                    for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                    {
                                        var    vectorB    = Vector128.Create((int)row3B[x], (int)row3B[x + 1], (int)row3B[x + 2], (int)0);
                                        var    vectorD    = Avx.ConvertToVector256Double(vectorB);
                                        var    vectorGray = Avx.Multiply(vectorD, vectorGrayCoeffAvx);
                                        double dGray      = vectorGray.GetElement(0) + vectorGray.GetElement(1) + vectorGray.GetElement(2);
                                        result[startIndex + i] = dGray;
                                    }
                                }

                                PointwiseDivideInPlace(result, 256.0);
                            }
                            else if (UseSIMD)
                            {
                                var vectorGrayCoeff = new Numerics.Vector4(0.11f, 0.59f, 0.3f, 0f);

                                for (int y = 0; y < bitmapData.Height; y++)
                                {
                                    var row3B = (byte *)bitmapData.Scan0 + (y * bitmapData.Stride);
                                    startIndex = y * bitmapData.Width;
                                    if (bitmapData.Stride < 0)
                                    {
                                        startIndex = (pixelCount - bitmapData.Width) - startIndex;
                                    }

                                    for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                    {
                                        var vectorF = new Numerics.Vector4(row3B[x], row3B[x + 1], row3B[x + 2], 0);
                                        var fGray   = Numerics.Vector4.Dot(vectorF, vectorGrayCoeff);
                                        result[startIndex + i] = fGray;
                                    }
                                }

                                PointwiseDivideInPlace(result, 256.0);
                            }
                            else
                            {
                                for (int y = 0; y < bitmapData.Height; y++)
                                {
                                    var row3B = (byte *)bitmapData.Scan0.ToPointer() + (y * bitmapData.Stride);
                                    startIndex = y * bitmapData.Width;
                                    if (bitmapData.Stride < 0)
                                    {
                                        startIndex = (pixelCount - bitmapData.Width) - startIndex;
                                    }

                                    for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                    {
                                        double gray = 0.11d * row3B[x] + 0.59d * row3B[x + 1] + 0.11d * row3B[x + 2];
                                        result[startIndex + i] = gray;
                                    }
                                }

                                PointwiseDivideInPlace(result, 256.0);
                            }
                        }
                        else
                        {
                            for (int y = 0; y < bitmapData.Height; y++)
                            {
                                var row3B = (byte *)bitmapData.Scan0 + (y * bitmapData.Stride);
                                startIndex = y * bitmapData.Width;
                                if (bitmapData.Stride < 0)
                                {
                                    startIndex = (pixelCount - bitmapData.Width) - startIndex;
                                }

                                switch (channel)
                                {
                                case BitmapChannel.Red:
                                    for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                    {
                                        result[startIndex + i] = row3B[x + 2];
                                    }
                                    break;

                                case BitmapChannel.Green:
                                    for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                    {
                                        result[startIndex + i] = row3B[x + 1];
                                    }
                                    break;

                                case BitmapChannel.Blue:
                                    for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                    {
                                        result[startIndex + i] = row3B[x];
                                    }
                                    break;

                                case BitmapChannel.Alpha:
                                    if (depth == 32)
                                    {
                                        for (int i = 0, x = 0; i < bitmapData.Width; i++, x += step)
                                        {
                                            result[startIndex + i] = row3B[x + 3];
                                        }
                                    }
                                    else
                                    {
                                        //Do nothing, 24bit images have no alpha channel
                                    }
                                    break;
                                }
                            }

                            PointwiseDivideInPlace(result, 256.0);
                        }
                        break;
                    }
                }
            }
            finally
            {
                bitmap.UnlockBits(bitmapData);
                if (needDispose)
                {
                    bitmap.Dispose();
                }
            }

            return(Vector <double> .Build.Dense(result));
        }