public void RunBasicScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario));

            UInt32[] values = new UInt32[ElementCount];

            for (int i = 0; i < ElementCount; i++)
            {
                values[i] = TestLibrary.Generator.GetUInt32();
            }

            Vector256 <UInt32> value = Vector256.Create(values[0], values[1], values[2], values[3], values[4], values[5], values[6], values[7]);

            Vector128 <UInt32> lowerResult = value.GetLower();
            Vector128 <UInt32> upperResult = value.GetUpper();

            ValidateGetResult(lowerResult, upperResult, values);

            Vector256 <UInt32> result = value.WithLower(upperResult);

            result = result.WithUpper(lowerResult);
            ValidateWithResult(result, values);
        }
        public void RunBasicScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario));

            SByte[] values = new SByte[ElementCount];

            for (int i = 0; i < ElementCount; i++)
            {
                values[i] = TestLibrary.Generator.GetSByte();
            }

            Vector256 <SByte> value = Vector256.Create(values[0], values[1], values[2], values[3], values[4], values[5], values[6], values[7], values[8], values[9], values[10], values[11], values[12], values[13], values[14], values[15], values[16], values[17], values[18], values[19], values[20], values[21], values[22], values[23], values[24], values[25], values[26], values[27], values[28], values[29], values[30], values[31]);

            Vector128 <SByte> lowerResult = value.GetLower();
            Vector128 <SByte> upperResult = value.GetUpper();

            ValidateGetResult(lowerResult, upperResult, values);

            Vector256 <SByte> result = value.WithLower(upperResult);

            result = result.WithUpper(lowerResult);
            ValidateWithResult(result, values);
        }
示例#3
0
        public static Vector256 <float> Divide(Vector256 <float> dividend, Vector256 <float> divisor)
        {
            if (Avx.IsSupported)
            {
                return(Avx.Divide(dividend, divisor));
            }

            return(FromLowHigh(Divide(dividend.GetLower(), divisor.GetLower()), Divide(dividend.GetUpper(), divisor.GetLower())));
        }
示例#4
0
 public static void GetLowHigh(Vector256 <double> vector, out Vector128 <double> low, out Vector128 <double> high)
 {
     low  = vector.GetLower();
     high = vector.GetUpper();
 }
 private static float SumVector256(Vector256 <float> v)
 {
     v = Avx.HorizontalAdd(v, v); //0+1, 2+3, .., .., 4+5, 6+7, .., ..
     v = Avx.HorizontalAdd(v, v); //0+1+2+3, .., .., .., 4+5+6+7, .., .., ..
     return(v.GetUpper().ToScalar() + v.GetLower().ToScalar());
 }
示例#6
0
        unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy)
        {
            float *tp = (float *)tstart, tpe = (float *)(tstart + cb);
            float *pmapx   = (float *)mapxstart;
            int    kstride = smapx * channels;
            int    tstride = smapy * 4;
            int    vcnt    = smapx / Vector128 <float> .Count;

            while (tp < tpe)
            {
                int ix   = *(int *)pmapx++;
                int lcnt = vcnt;

                float *ip = (float *)istart + ix * channels;
                float *mp = pmapx;
                pmapx += kstride;

                Vector128 <float> av0, av1, av2;

                if (Avx.IsSupported && lcnt >= 2)
                {
                    Vector256 <float> ax0 = Vector256 <float> .Zero, ax1 = ax0, ax2 = ax0;

                    for (; lcnt >= 2; lcnt -= 2)
                    {
                        var iv0 = Avx.LoadVector256(ip);
                        var iv1 = Avx.LoadVector256(ip + Vector256 <float> .Count);
                        var iv2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2);
                        ip += Vector256 <int> .Count * channels;

                        if (Fma.IsSupported)
                        {
                            ax0 = Fma.MultiplyAdd(Avx.LoadVector256(mp), iv0, ax0);
                            ax1 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count), iv1, ax1);
                            ax2 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count * 2), iv2, ax2);
                        }
                        else
                        {
                            ax0 = Avx.Add(ax0, Avx.Multiply(iv0, Avx.LoadVector256(mp)));
                            ax1 = Avx.Add(ax1, Avx.Multiply(iv1, Avx.LoadVector256(mp + Vector256 <float> .Count)));
                            ax2 = Avx.Add(ax2, Avx.Multiply(iv2, Avx.LoadVector256(mp + Vector256 <float> .Count * 2)));
                        }
                        mp += Vector256 <float> .Count * channels;
                    }

                    av0 = Sse.Add(ax0.GetLower(), ax1.GetUpper());
                    av1 = Sse.Add(ax0.GetUpper(), ax2.GetLower());
                    av2 = Sse.Add(ax1.GetLower(), ax2.GetUpper());
                }
                else
                {
                    av0 = av1 = av2 = Vector128 <float> .Zero;
                }

                for (; lcnt != 0; lcnt--)
                {
                    var iv0 = Sse.LoadVector128(ip);
                    var iv1 = Sse.LoadVector128(ip + Vector128 <float> .Count);
                    var iv2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2);
                    ip += Vector128 <float> .Count * channels;

                    if (Fma.IsSupported)
                    {
                        av0 = Fma.MultiplyAdd(Sse.LoadVector128(mp), iv0, av0);
                        av1 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count), iv1, av1);
                        av2 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count * 2), iv2, av2);
                    }
                    else
                    {
                        av0 = Sse.Add(av0, Sse.Multiply(iv0, Sse.LoadVector128(mp)));
                        av1 = Sse.Add(av1, Sse.Multiply(iv1, Sse.LoadVector128(mp + Vector128 <float> .Count)));
                        av2 = Sse.Add(av2, Sse.Multiply(iv2, Sse.LoadVector128(mp + Vector128 <float> .Count * 2)));
                    }
                    mp += Vector128 <float> .Count * channels;
                }

                var avs0 = Sse.Add(Sse.Add(
                                       Sse.Shuffle(av0, av0, 0b_00_10_01_11),
                                       Sse.Shuffle(av1, av1, 0b_00_01_11_10)),
                                   Sse.Shuffle(av2, av2, 0b_00_11_10_01)
                                   );
                var avs1 = Sse3.IsSupported ?
                           Sse3.MoveHighAndDuplicate(avs0) :
                           Sse.Shuffle(avs0, avs0, 0b_11_11_01_01);
                var avs2 = Sse.UnpackHigh(avs0, avs0);

                tp[0] = Sse.AddScalar(av0, avs0).ToScalar();
                tp[1] = Sse.AddScalar(av1, avs1).ToScalar();
                tp[2] = Sse.AddScalar(av2, avs2).ToScalar();
                tp   += tstride;
            }
        }
 public static void GetLowHigh <T>(Vector256 <T> vector, out Vector128 <T> low, out Vector128 <T> high) where T : struct
 {
     low  = vector.GetLower();
     high = vector.GetUpper();
 }