示例#1
0
        internal static Vector256 <T> MultiplyAddVector256(Vector256 <T> a, Vector256 <T> b, Vector256 <T> c)
        {
            if (typeof(T) == typeof(int))
            {
                var va = a.As <T, int>();
                var vb = b.As <T, int>();
                var vl = Avx2.MultiplyLow(va, vb);
                var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper());
                return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, int>()).As <int, T>());
            }
            if (typeof(T) == typeof(uint))
            {
                var va = a.As <T, uint>();
                var vb = b.As <T, uint>();
                var vl = Avx2.MultiplyLow(va, vb);
                var vh = Sse41.MultiplyLow(va.GetUpper(), vb.GetUpper());
                return(Avx2.Add(Vector256.Create(vl.GetLower(), vh), c.As <T, uint>()).As <uint, T>());
            }
            if (typeof(T) == typeof(float))
            {
                return(Fma.MultiplyAdd(a.As <T, float>(), b.As <T, float>(), c.As <T, float>()).As <float, T>());
            }
            if (typeof(T) == typeof(double))
            {
                return(Fma.MultiplyAdd(a.As <T, double>(), b.As <T, double>(), c.As <T, double>()).As <double, T>());
            }

            throw new NotSupportedException();
        }
示例#2
0
        internal static Vector256 <T> ShuffleVector256(Vector256 <T> va, Vector256 <T> vb, byte control)
        {
            if (typeof(T) == typeof(float))
            {
                return(Avx.Shuffle(va.As <T, float>(), vb.As <T, float>(), control).As <float, T>());
            }
            if (typeof(T) == typeof(double))
            {
                return(Avx.Shuffle(va.As <T, double>(), vb.As <T, double>(), control).As <double, T>());
            }

            throw new NotSupportedException();
        }
示例#3
0
        internal static Vector256 <T> ShuffleVector256(Vector256 <T> va, byte control)
        {
            if (typeof(T) == typeof(int))
            {
                return(Avx2.Shuffle(va.As <T, int>(), control).As <int, T>());
            }
            if (typeof(T) == typeof(uint))
            {
                return(Avx2.Shuffle(va.As <T, uint>(), control).As <uint, T>());
            }

            throw new NotSupportedException();
        }
示例#4
0
        internal static Vector256 <T> ShuffleVector256(Vector256 <T> va, Vector256 <T> vb)
        {
            if (typeof(T) == typeof(sbyte))
            {
                return(Avx2.Shuffle(va.As <T, sbyte>(), vb.As <T, sbyte>()).As <sbyte, T>());
            }
            if (typeof(T) == typeof(byte))
            {
                return(Avx2.Shuffle(va.As <T, byte>(), vb.As <T, byte>()).As <byte, T>());
            }

            throw new NotSupportedException();
        }
示例#5
0
        /// <summary>
        /// Absolute error bounded by 1e-4.
        /// </summary>
        public static Vector256 <float> Log(Vector256 <float> x)
        {
            Vector256 <float> exp, addcst, val;

            exp = Avx2.ConvertToVector256Single(Avx2.ShiftRightArithmetic(x.As <float, int>(), 23));

            // According to BenchmarkDotNet, isolating all the constants up-front
            // yield nearly 10% speed-up.

            const float bf0 = -89.970756366f;
            const float bf1 = float.NaN; // behavior of MathF.Log() on negative numbers
            const float bf2 = 3.529304993f;
            const float bf3 = -2.461222105f;
            const float bf4 = 1.130626167f;
            const float bf5 = -0.288739945f;
            const float bf6 = 3.110401639e-2f;
            const float bf7 = 0.6931471805f;

            const int bi0 = 0x7FFFFF;
            const int bi1 = 0x3F800000;

            //addcst = val > 0 ? -89.970756366f : -(float)INFINITY;

            addcst = Avx.BlendVariable(Vector256.Create(bf0),
                                       Vector256.Create(bf1),
                                       Avx.Compare(x, Vector256 <float> .Zero, FloatComparisonMode.OrderedLessThanNonSignaling));

            val = Avx2.Or(Avx2.And(
                              x.As <float, int>(),
                              Vector256.Create(bi0)),
                          Vector256.Create(bi1)).As <int, float>();

            /*    x * (3.529304993f +
             *      x * (-2.461222105f +
             *        x * (1.130626167f +
             *          x * (-0.288739945f +
             *            x * 3.110401639e-2f))))
             + (addcst + 0.6931471805f*exp); */

            return(Avx2.Add(
                       Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf2),
                                                   Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf3),
                                                                               Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf4),
                                                                                                           Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf5),
                                                                                                                                       Avx2.Multiply(val, Vector256.Create(bf6)))))))))),
                       Avx.Add(addcst,
                               Avx2.Multiply(Vector256.Create(bf7), exp))));
        }
示例#6
0
    public static Vector256 <float> Exp(Vector256 <float> value)
    {
        value = Min(value, MaxValue);
        value = Max(value, MinValue);
        Vector256 <float> fx = Multiply(value, Log2);

        fx = Floor(Add(fx, Point5));

        Vector256 <float> tmp = Multiply(fx, C1);
        Vector256 <float> z   = Multiply(fx, C2);
        Vector256 <float> x   = Subtract(value, tmp);

        x = Subtract(x, z);
        z = Multiply(x, x);
        Vector256 <float> y = P0;

        y = Add(Multiply(y, x), P1);
        y = Add(Multiply(y, x), P2);
        y = Add(Multiply(y, x), P3);
        y = Add(Multiply(y, x), P4);
        y = Add(Multiply(y, x), Point5);
        y = Add(Add(Multiply(y, z), x), One);

        Vector256 <int> pow2n = ConvertToVector256Int32(fx);

        pow2n = Avx2.Add(pow2n, Ox7);
        pow2n = Avx2.ShiftLeftLogical(pow2n, 23);

        return(Multiply(y, pow2n.As <float>()));
    }
示例#7
0
    public static Vector256 <float> Log(Vector256 <float> value)
    {
        Vector256 <float> invalidMask = Compare(value, Vector256 <float> .Zero, FloatComparisonMode.LessThanOrEqualOrderedNonSignaling);
        Vector256 <float> x           = Max(value, MinNormPos.As <float>());
        Vector256 <int>   ei          = Avx2.ShiftRightLogical(x.As <int>(), 23);

        x  = Or(And(x, MantMask.As <float>()), Point5);
        ei = Avx2.Subtract(ei, Ox7);
        Vector256 <float> e    = Add(ConvertToVector256Single(ei), One);
        Vector256 <float> mask = Compare(x, Sqrthf, FloatComparisonMode.LessThanOrderedNonSignaling);
        Vector256 <float> tmp  = And(x, mask);

        x = Subtract(x, One);
        e = Subtract(e, And(One, mask));
        x = Add(x, tmp);
        Vector256 <float> z = Multiply(x, x);
        Vector256 <float> y = LogP0;

        y = Add(Multiply(y, x), LogP1);
        y = Add(Multiply(y, x), LogP2);
        y = Add(Multiply(y, x), LogP3);
        y = Add(Multiply(y, x), LogP4);
        y = Add(Multiply(y, x), LogP5);
        y = Add(Multiply(y, x), LogP6);
        y = Add(Multiply(y, x), LogP7);
        y = Add(Multiply(y, x), LogP8);
        y = Multiply(Multiply(y, x), z);
        y = Add(y, Multiply(e, LogQ1));
        y = Subtract(y, Multiply(z, Point5));
        x = Add(Add(x, y), Multiply(e, LogQ2));
        return(Or(x, invalidMask));
    }
示例#8
0
 public static void Store(T *address, Vector256 <T> vector256)
 {
     if (typeof(T) == typeof(sbyte))
     {
         Avx.Store((sbyte *)address, vector256.As <T, sbyte>());
     }
     else if (typeof(T) == typeof(byte))
     {
         Avx.Store((byte *)address, vector256.As <T, byte>());
     }
     else if (typeof(T) == typeof(short))
     {
         Avx.Store((short *)address, vector256.As <T, short>());
     }
     else if (typeof(T) == typeof(ushort))
     {
         Avx.Store((ushort *)address, vector256.As <T, ushort>());
     }
     else if (typeof(T) == typeof(int))
     {
         Avx.Store((int *)address, vector256.As <T, int>());
     }
     else if (typeof(T) == typeof(uint))
     {
         Avx.Store((uint *)address, vector256.As <T, uint>());
     }
     else if (typeof(T) == typeof(long))
     {
         Avx.Store((long *)address, vector256.As <T, long>());
     }
     else if (typeof(T) == typeof(ulong))
     {
         Avx.Store((ulong *)address, vector256.As <T, ulong>());
     }
     else if (typeof(T) == typeof(float))
     {
         Avx.Store((float *)address, vector256.As <T, float>());
     }
     else if (typeof(T) == typeof(double))
     {
         Avx.Store((double *)address, vector256.As <T, double>());
     }
     else
     {
         throw new NotSupportedException();
     }
 }
示例#9
0
        internal static Vector256 <T> Permute4X64Vector256(Vector256 <T> va, byte control)
        {
            if (typeof(T) == typeof(long))
            {
                return(Avx2.Permute4x64(va.As <T, long>(), control).As <long, T>());
            }
            if (typeof(T) == typeof(ulong))
            {
                return(Avx2.Permute4x64(va.As <T, ulong>(), control).As <ulong, T>());
            }
            if (typeof(T) == typeof(double))
            {
                return(Avx2.Permute4x64(va.As <T, double>(), control).As <double, T>());
            }

            throw new NotSupportedException();
        }
示例#10
0
        public static Vector256 <T> And(Vector256 <T> va, Vector256 <T> vb)
        {
            if (typeof(T) == typeof(sbyte))
            {
                return(Avx2.And(va.As <T, sbyte>(), vb.As <T, sbyte>()).As <sbyte, T>());
            }
            if (typeof(T) == typeof(byte))
            {
                return(Avx2.And(va.As <T, byte>(), vb.As <T, byte>()).As <byte, T>());
            }
            if (typeof(T) == typeof(short))
            {
                return(Avx2.And(va.As <T, short>(), vb.As <T, short>()).As <short, T>());
            }
            if (typeof(T) == typeof(ushort))
            {
                return(Avx2.And(va.As <T, ushort>(), vb.As <T, ushort>()).As <ushort, T>());
            }
            if (typeof(T) == typeof(int))
            {
                return(Avx2.And(va.As <T, int>(), vb.As <T, int>()).As <int, T>());
            }
            if (typeof(T) == typeof(uint))
            {
                return(Avx2.And(va.As <T, uint>(), vb.As <T, uint>()).As <uint, T>());
            }
            if (typeof(T) == typeof(long))
            {
                return(Avx2.And(va.As <T, long>(), vb.As <T, long>()).As <long, T>());
            }
            if (typeof(T) == typeof(ulong))
            {
                return(Avx2.And(va.As <T, ulong>(), vb.As <T, ulong>()).As <ulong, T>());
            }
            if (typeof(T) == typeof(float))
            {
                return(Avx.And(va.As <T, float>(), vb.As <T, float>()).As <float, T>());
            }
            if (typeof(T) == typeof(double))
            {
                return(Avx.And(va.As <T, double>(), vb.As <T, double>()).As <double, T>());
            }

            throw new NotSupportedException();
        }
示例#11
0
        internal static Vector256 <T> Permute2X128Vector256(Vector256 <T> va, Vector256 <T> vb, byte control)
        {
            if (typeof(T) == typeof(sbyte))
            {
                return(Avx2.Permute2x128(va.As <T, sbyte>(), vb.As <T, sbyte>(), control).As <sbyte, T>());
            }
            if (typeof(T) == typeof(byte))
            {
                return(Avx2.Permute2x128(va.As <T, byte>(), vb.As <T, byte>(), control).As <byte, T>());
            }
            if (typeof(T) == typeof(short))
            {
                return(Avx2.Permute2x128(va.As <T, short>(), vb.As <T, short>(), control).As <short, T>());
            }
            if (typeof(T) == typeof(ushort))
            {
                return(Avx2.Permute2x128(va.As <T, ushort>(), vb.As <T, ushort>(), control).As <ushort, T>());
            }
            if (typeof(T) == typeof(int))
            {
                return(Avx2.Permute2x128(va.As <T, int>(), vb.As <T, int>(), control).As <int, T>());
            }
            if (typeof(T) == typeof(uint))
            {
                return(Avx2.Permute2x128(va.As <T, uint>(), vb.As <T, uint>(), control).As <uint, T>());
            }
            if (typeof(T) == typeof(long))
            {
                return(Avx2.Permute2x128(va.As <T, long>(), vb.As <T, long>(), control).As <long, T>());
            }
            if (typeof(T) == typeof(ulong))
            {
                return(Avx2.Permute2x128(va.As <T, ulong>(), vb.As <T, ulong>(), control).As <ulong, T>());
            }
            if (typeof(T) == typeof(float))
            {
                return(Avx.Permute2x128(va.As <T, float>(), vb.As <T, float>(), control).As <float, T>());
            }
            if (typeof(T) == typeof(double))
            {
                return(Avx.Permute2x128(va.As <T, double>(), vb.As <T, double>(), control).As <double, T>());
            }

            throw new NotSupportedException();
        }
示例#12
0
        internal static Vector256 <T> UnpackHighVector256(Vector256 <T> value, Vector256 <T> data)
        {
            if (typeof(T) == typeof(sbyte))
            {
                return(Avx2.UnpackHigh(value.As <T, sbyte>(), data.As <T, sbyte>()).As <sbyte, T>());
            }
            if (typeof(T) == typeof(byte))
            {
                return(Avx2.UnpackHigh(value.As <T, byte>(), data.As <T, byte>()).As <byte, T>());
            }
            if (typeof(T) == typeof(short))
            {
                return(Avx2.UnpackHigh(value.As <T, short>(), data.As <T, short>()).As <short, T>());
            }
            if (typeof(T) == typeof(ushort))
            {
                return(Avx2.UnpackHigh(value.As <T, ushort>(), data.As <T, ushort>()).As <ushort, T>());
            }
            if (typeof(T) == typeof(int))
            {
                return(Avx2.UnpackHigh(value.As <T, int>(), data.As <T, int>()).As <int, T>());
            }
            if (typeof(T) == typeof(uint))
            {
                return(Avx2.UnpackHigh(value.As <T, uint>(), data.As <T, uint>()).As <uint, T>());
            }
            if (typeof(T) == typeof(long))
            {
                return(Avx2.UnpackHigh(value.As <T, long>(), data.As <T, long>()).As <long, T>());
            }
            if (typeof(T) == typeof(ulong))
            {
                return(Avx2.UnpackHigh(value.As <T, ulong>(), data.As <T, ulong>()).As <ulong, T>());
            }
            if (typeof(T) == typeof(float))
            {
                return(Avx.UnpackHigh(value.As <T, float>(), data.As <T, float>()).As <float, T>());
            }
            if (typeof(T) == typeof(double))
            {
                return(Avx.UnpackHigh(value.As <T, double>(), data.As <T, double>()).As <double, T>());
            }

            throw new NotSupportedException();
        }
示例#13
0
        public static void Avx2Reverse256InPlace(Span <byte> bytes)
        {
            fixed(byte *inputPointer = bytes)
            {
                Vector256 <byte> inputVector  = Avx2.LoadVector256(inputPointer);
                Vector256 <byte> resultVector = Avx2.Shuffle(inputVector, ReverseMaskVec);

                resultVector = Avx2.Permute4x64(resultVector.As <byte, ulong>(), 0b01001110).As <ulong, byte>();

                Avx2.Store(inputPointer, resultVector);
            }
        }
        public void Avx2Version()
        {
            byte[] bytes = _a;
            unsafe
            {
                fixed(byte *ptr_bytes = bytes)
                {
                    Vector256 <byte> inputVector = Avx2.LoadVector256(ptr_bytes);
                    Vector256 <byte> result      = Avx2.Shuffle(inputVector, _shuffleMask);

                    result = Avx2.Permute4x64(result.As <byte, ulong>(), 0b01001110).As <ulong, byte>();
                    Avx2.Store(ptr_bytes, result);
                }
            }
        }
示例#15
0
 public static Vector256 <T> SelectWhereFalse <T, U>(Vector256 <T> vector, Vector256 <U> selector)
     where T : struct where U : struct
 => AndNot(selector.As <U, T>(), vector);
示例#16
0
 public static Vector256 <T> Select <T, U>(Vector256 <T> left, Vector256 <T> right, Vector256 <U> selector)
     where T : struct where U : struct
 => Or(And(selector.As <U, T>(), right), AndNot(selector.As <U, T>(), left));
示例#17
0
        static unsafe int FindIndexOfShortAtEvenIndexHavingValue(Span <int> data, short searchValue, int startIndex, int maxIndex)
        {
            // For convenience/efficiency we require arrays to be divisible by 8
            Debug.Assert(data.Length % 8 == 0);

            Span <short> dTargetValue = stackalloc short[] {
                searchValue,
                short.MinValue,
                searchValue,
                short.MinValue,
                searchValue,
                short.MinValue,
                searchValue,
                short.MinValue,
                searchValue,
                short.MinValue,
                searchValue,
                short.MinValue,
                searchValue,
                short.MinValue,
                searchValue,
                short.MinValue
            };


            int numBlocksProcessed = startIndex / 8;
            int maskMove           = 0;

            fixed(short *pTargetValue = &dTargetValue[0])
            fixed(int *pStartData = &data[0])
            {
                short *           pStartDataShort = (short *)pStartData;
                Vector256 <short> targetData      = Avx.LoadVector256(pTargetValue);

                for (int i = numBlocksProcessed * 8 * 2; i < data.Length * 2; i += 8 * 2)
                {
                    // Load this set of values to examine
                    Vector256 <short> vValues = Avx.LoadVector256(&pStartDataShort[i]);

                    // Compare for equality
                    Vector256 <short> vEQ = Avx2.CompareEqual(vValues, targetData);

                    // Get resulting equality mask so we can tell which index within block had target value
                    Vector256 <byte> equalityAsBytes = vEQ.As <short, byte>();
                    maskMove = Avx2.MoveMask(equalityAsBytes);
                    if (maskMove != 0)
                    {
                        break;
                    }

                    numBlocksProcessed++;
                }


                // Translate  mask into which index
                int indexInBlock;

                if (maskMove <= ThreePow3)
                {
                    indexInBlock = maskMove switch
                    {
                        ThreePow0 => 0,
                        ThreePow1 => 1,
                        ThreePow2 => 2,
                        ThreePow3 => 3,
                        _ => - 2 // false accidental match at odd index
                    };
                }
                else
                {
                    indexInBlock = maskMove switch
                    {
                        ThreePow4 => 4,
                        ThreePow5 => 5,
                        ThreePow6 => 6,
                        ThreePow7 => 7,
                        _ => - 2 // false accidental match at odd index
                    };
                }

                if (indexInBlock == -2)
                {
                    return(-2);
                }

                int index = numBlocksProcessed * 8 + indexInBlock;

                if (index < startIndex || index > maxIndex)
                {
                    return(-1);
                }
                else
                {
                    return(index);
                }
            }


            Console.WriteLine(maskMove);
        }
示例#18
0
 private static Vector256 <ulong> ror64_32_avx(ref Vector256 <ulong> x) => Avx2.Shuffle(x.As <uint>(), 0b_10_11_00_01).As <ulong>();
示例#19
0
文件: YMEM.cs 项目: 0xCM/arrows
 public Vector256 <T> Vec <T>()
     where T : unmanaged
 => Vector256.As <byte, T>(vymm);