示例#1
0
    public static int Main()
    {
        // float
        AssertEqual(Vector256.Create(1f).ToString(), "<1, 1, 1, 1, 1, 1, 1, 1>");
        AssertEqual(Vector256.CreateScalar(1f).ToString(), "<1, 0, 0, 0, 0, 0, 0, 0>");
        AssertEqual(Vector256.CreateScalarUnsafe(1f).ToScalar().ToString(), "1");
        AssertEqual(Vector256.Create(0.0f, 1, 2, 3, 4, 5, 6, 7).ToString(), "<0, 1, 2, 3, 4, 5, 6, 7>");

        // double
        AssertEqual(Vector256.Create(1.0).ToString(), "<1, 1, 1, 1>");
        AssertEqual(Vector256.CreateScalar(1.0).ToString(), "<1, 0, 0, 0>");
        AssertEqual(Vector256.CreateScalarUnsafe(1.0).ToScalar().ToString(), "1");
        AssertEqual(Vector256.Create(0.0, 1, 2, 3).ToString(), "<0, 1, 2, 3>");

        // ushort
        AssertEqual(Vector256.Create((ushort)1).ToString(), "<1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1>");
        AssertEqual(Vector256.CreateScalar((ushort)1).ToString(), "<1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>");
        AssertEqual(Vector256.CreateScalarUnsafe((ushort)1).ToScalar().ToString(), "1");
        AssertEqual(Vector256.Create((ushort)0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15).ToString(), "<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15>");

        // long
        AssertEqual(Vector256.Create((long)1).ToString(), "<1, 1, 1, 1>");
        AssertEqual(Vector256.CreateScalar((long)1).ToString(), "<1, 0, 0, 0>");
        AssertEqual(Vector256.CreateScalarUnsafe((long)1).ToScalar().ToString(), "1");
        AssertEqual(Vector256.Create((long)0, 1, 2, 3).ToString(), "<0, 1, 2, 3>");
        return(retCode);
    }
        public void RunBasicScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario));

            Single             value  = TestLibrary.Generator.GetSingle();
            Vector256 <Single> result = Vector256.CreateScalar(value);

            ValidateResult(result, value);
        }
示例#3
0
 public static Vector256 <double> ToScalarVector256(Vector256 <double> vector)
 {
     return(Vector256.CreateScalar(vector.ToScalar()));
 }
示例#4
0
 public static Vector4D LoadScalar(this double scalar)
 => Vector256.CreateScalar(scalar);
示例#5
0
 static Vec VecInit(double value) => Vector256.CreateScalar(value);
        public unsafe int[,] IntegrateUnsafeVectorBranched()
        {
            int w = _data.Width();
            int h = _data.Height();

            int[,] res = new int[h, w];

            Vector256 <int> shiftRight = RotateRight;

            fixed(byte *pSource = &_data[0, 0])
            fixed(int *pTarget = &res[0, 0])
            {
                var pSrc = pSource;
                var pTrg = pTarget;

                for (var i = 0; i < h; i++)
                {
                    var j = 0;

                    var p  = Vector256.CreateScalar(0);
                    var pr = Vector256.CreateScalar(0);
                    //handle vector part
                    for (; j + Vector256 <int> .Count <= w; j += Vector256 <int> .Count)
                    {
                        var t = Avx2.ConvertToVector256Int32(pSrc); //(int)*(pSrc)

                        var s = Aggregate(p, t);                    // this code block has to be
                        p = t;                                      // added to handle the in-line
                        t = Avx2.Add(t, s);                         // recursion: S[i]=a[i]+S[i-1]

                        if (j > 0)
                        {
                            t = Avx2.Add(t, pr);    // t += *(pTrg - 1);
                        }
                        if (i > 0)
                        {
                            t = Avx2.Add(t, Avx.LoadVector256(pTrg - w));
                            if (j > 0)
                            {
                                t = Avx2.Subtract(t, Avx.LoadVector256(pTrg - w - 8));
                            }
                        }

                        Avx.Store(pTrg, t);
                        pr    = t;
                        pSrc += Vector256 <int> .Count;
                        pTrg += Vector256 <int> .Count;
                    }


                    // handle the tail
                    var pr2 = (j == 0 ? 0 : pr.GetElement(Vector256 <int> .Count - 1)); //  Vector256.CreateScalar(0);
                    for (; j < w; j++)
                    {
                        var t = (int)*(pSrc);           // Avx2.ConvertToVector256Int32(pSrc);
                        if (j > 0)
                        {
                            t += pr2;                   //    t = Avx2.Add(t, pr);
                        }
                        if (i > 0)
                        {
                            t += *(pTrg - w);           //  Avx2.Add(t, Avx.LoadVector256(pTrg - w));
                            if (j > 0)
                            {
                                t -= *(pTrg - w - 1);   //  Avx2.Subtract(t, Avx.LoadVector256(pTrg - w - 8));
                            }
                        }

                        *pTrg = t;                      // Avx2.Store(pTrg, t);
                        pr2 = t;                        // pr = t
                        pSrc++;
                        pTrg++;
                    }
                }
            }
            return(res);
        }
示例#7
0
        private void Block4(ReadOnlySpan <byte> m)
        {
            var n0  = MemoryMarshal.Cast <byte, uint>(m);
            var hc0 = IntrinsicsUtils.Create4UInt(n0[0], n0[4], n0[8], n0[12]);

            hc0 = Avx2.And(hc0, And256);
            hc0 = Avx2.Add(hc0, Vector256.CreateScalar(_h0));

            var n1  = MemoryMarshal.Cast <byte, uint>(m.Slice(3));
            var hc1 = IntrinsicsUtils.Create4UInt(n1[0], n1[4], n1[8], n1[12]);

            hc1 = Avx2.ShiftRightLogical(hc1, 2);
            hc1 = Avx2.And(hc1, And256);
            hc1 = Avx2.Add(hc1, Vector256.CreateScalar(_h1));

            var n2  = MemoryMarshal.Cast <byte, uint>(m.Slice(6));
            var hc2 = IntrinsicsUtils.Create4UInt(n2[0], n2[4], n2[8], n2[12]);

            hc2 = Avx2.ShiftRightLogical(hc2, 4);
            hc2 = Avx2.And(hc2, And256);
            hc2 = Avx2.Add(hc2, Vector256.CreateScalar(_h2));

            var n3  = MemoryMarshal.Cast <byte, uint>(m.Slice(9));
            var hc3 = IntrinsicsUtils.Create4UInt(n3[0], n3[4], n3[8], n3[12]);

            hc3 = Avx2.ShiftRightLogical(hc3, 6);
            hc3 = Avx2.And(hc3, And256);
            hc3 = Avx2.Add(hc3, Vector256.CreateScalar(_h3));

            var n4  = MemoryMarshal.Cast <byte, uint>(m.Slice(12));
            var hc4 = IntrinsicsUtils.Create4UInt(n4[0], n4[4], n4[8], n4[12]);

            hc4 = Avx2.ShiftRightLogical(hc4, 8);
            hc4 = Avx2.Or(hc4, Or256);
            hc4 = Avx2.Add(hc4, Vector256.CreateScalar(_h4));

            var t1 = Avx2.Multiply(_ruwy0, hc0);

            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc1));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc2));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz2, hc3));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz1, hc4));
            var d0 = t1.Add4UInt64();

            t1 = Avx2.Multiply(_ruwy1, hc0);
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc1));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc2));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc3));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz2, hc4));
            var d1 = t1.Add4UInt64();

            t1 = Avx2.Multiply(_ruwy2, hc0);
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc1));
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc2));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc3));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz3, hc4));
            var d2 = t1.Add4UInt64();

            t1 = Avx2.Multiply(_ruwy3, hc0);
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy2, hc1));
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc2));
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc3));
            t1 = Avx2.Add(t1, Avx2.Multiply(_svxz4, hc4));
            var d3 = t1.Add4UInt64();

            t1 = Avx2.Multiply(_ruwy4, hc0);
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy3, hc1));
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy2, hc2));
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy1, hc3));
            t1 = Avx2.Add(t1, Avx2.Multiply(_ruwy0, hc4));
            var d4 = t1.Add4UInt64();

            _h0  = (uint)d0 & 0x3ffffff;
            d1  += d0 >> 26;
            _h1  = (uint)d1 & 0x3ffffff;
            d2  += d1 >> 26;
            _h2  = (uint)d2 & 0x3ffffff;
            d3  += d2 >> 26;
            _h3  = (uint)d3 & 0x3ffffff;
            d4  += d3 >> 26;
            _h4  = (uint)d4 & 0x3ffffff;
            _h0 += (uint)((d4 >> 26) * 5);
            _h1 += _h0 >> 26;
            _h0 &= 0x3ffffff;
        }
示例#8
0
        public static unsafe uint CalculateAvx2(uint adler, ReadOnlySpan <byte> buffer)
        {
            uint s1     = adler & 0xFFFF;
            uint s2     = (adler >> 16) & 0xFFFF;
            uint length = (uint)buffer.Length;

            fixed(byte *bufferPtr = &MemoryMarshal.GetReference(buffer))
            {
                byte *localBufferPtr = bufferPtr;

                Vector256 <byte> zero = Vector256 <byte> .Zero;
                var dot3v             = Vector256.Create((short)1);
                var dot2v             = Vector256.Create(32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);

                // Process n blocks of data. At most NMAX data bytes can be
                // processed before s2 must be reduced modulo BASE.
                var vs1 = Vector256.CreateScalar(s1);
                var vs2 = Vector256.CreateScalar(s2);

                while (length >= 32)
                {
                    int k = length < NMAX ? (int)length : (int)NMAX;
                    k      -= k % 32;
                    length -= (uint)k;

                    Vector256 <uint> vs10 = vs1;
                    Vector256 <uint> vs3  = Vector256 <uint> .Zero;

                    while (k >= 32)
                    {
                        // Load 32 input bytes.
                        Vector256 <byte> block = Avx.LoadVector256(localBufferPtr);

                        // Sum of abs diff, resulting in 2 x int32's
                        Vector256 <ushort> vs1sad = Avx2.SumAbsoluteDifferences(block, zero);

                        vs1 = Avx2.Add(vs1, vs1sad.AsUInt32());
                        vs3 = Avx2.Add(vs3, vs10);

                        // sum 32 uint8s to 16 shorts.
                        Vector256 <short> vshortsum2 = Avx2.MultiplyAddAdjacent(block, dot2v);

                        // sum 16 shorts to 8 uint32s.
                        Vector256 <int> vsum2 = Avx2.MultiplyAddAdjacent(vshortsum2, dot3v);

                        vs2  = Avx2.Add(vsum2.AsUInt32(), vs2);
                        vs10 = vs1;

                        localBufferPtr += BlockSize;
                        k -= 32;
                    }

                    // Defer the multiplication with 32 to outside of the loop.
                    vs3 = Avx2.ShiftLeftLogical(vs3, 5);
                    vs2 = Avx2.Add(vs2, vs3);

                    s1 = (uint)Numerics.EvenReduceSum(vs1.AsInt32());
                    s2 = (uint)Numerics.ReduceSum(vs2.AsInt32());

                    s1 %= BASE;
                    s2 %= BASE;

                    vs1 = Vector256.CreateScalar(s1);
                    vs2 = Vector256.CreateScalar(s2);
                }

                if (length > 0)
                {
                    HandleLeftOver(localBufferPtr, length, ref s1, ref s2);
                }

                return(s1 | (s2 << 16));
            }
        }
示例#9
0
        private static unsafe int CalculateDistance(string sourceString, int sourceLength, string targetString, int targetLength, int startIndex)
        {
            var                 arrayPool   = ArrayPool <int> .Shared;
            var                 pooledArray = arrayPool.Rent(targetLength);
            Span <int>          previousRow = pooledArray;
            ReadOnlySpan <char> source      = sourceString.AsSpan().Slice(startIndex, sourceLength);
            ReadOnlySpan <char> target      = targetString.AsSpan().Slice(startIndex, targetLength);

            //ArrayPool values are sometimes bigger than allocated, let's trim our span to exactly what we use
            previousRow = previousRow.Slice(0, targetLength);

            fixed(char *targetPtr = target)
            fixed(char *srcPtr        = source)
            fixed(int *previousRowPtr = previousRow)
            {
                FillRow(previousRowPtr, targetLength);

                var rowIndex = 0;

                //var sourceV = Vector128<short>.Zero;
                const int VECTOR_LENGTH = 16;

                for (; rowIndex < sourceLength - VECTOR_LENGTH - 1; rowIndex += VECTOR_LENGTH)
                {
                    // todo max
                    var temp = Vector128.Create(rowIndex);
                    var diag = Sse42.PackUnsignedSaturate(temp, temp).ToVector256();
                    var one  = Vector256.Create((ushort)1);
                    var left = Avx2.AddSaturate(diag, one);

                    var sourceV = Avx2.LoadVector256((ushort *)(srcPtr + rowIndex));
                    var targetV = Vector256 <ushort> .Zero;

                    var shift = Vector256.CreateScalar(ushort.MaxValue);
                    // First 3  iterations fills the vector
                    for (int columnIndex = 0; columnIndex < VECTOR_LENGTH - 1; columnIndex++)
                    {
                        // Shift in the next character
                        targetV = ShiftLeft(targetV);

                        //targetV = Avx2.Insert(targetV, (ushort)targetPtr[columnIndex], 0);
                        targetV = Avx2.Or(targetV, Vector256.CreateScalar((ushort)targetPtr[columnIndex]));

                        // Insert "(rowIndex + columnIndex + 1)" from the left
                        var leftValue = Vector256.Create(rowIndex + columnIndex + 1);
                        left  = Avx2.Or(Avx2.And(shift, Avx2.PackUnsignedSaturate(leftValue, leftValue)), left);
                        shift = ShiftLeft(shift);

                        // compare source to target
                        // alternativ, compare equal and OR with One
                        var match = Avx2.CompareEqual(sourceV, targetV);
                        var add   = Avx2.AndNot(match, one);
                        var next  = Avx2.AddSaturate(diag, add);

                        // Create next diag which is current up
                        var up = ShiftLeft(left);
                        //up = Sse42.Insert(up, (ushort)previousRowPtr[columnIndex], 0);
                        up = Avx2.Or(up, Vector256.CreateScalar((ushort)previousRowPtr[columnIndex]));

                        var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one);
                        next = Avx2.Min(next, tmp);

                        left = next;
                        diag = up;
                    }

                    var writePtr = previousRowPtr;
                    *   writePtr = left.GetElement(VECTOR_LENGTH - 1);
                    writePtr++;
                    for (int columnIndex = VECTOR_LENGTH; columnIndex < targetLength; columnIndex++)
                    {
                        // Shift in the next character
                        targetV = ShiftLeft(targetV);
                        //targetV = Avx2.Insert(targetV, (ushort)targetPtr[columnIndex], 0);
                        targetV = Avx2.Or(targetV, Vector256.CreateScalar((ushort)targetPtr[columnIndex]));

                        // compare source to target
                        // alternativ, compare equal and OR with One
                        var match = Avx2.CompareEqual(sourceV, targetV);
                        var add   = Avx2.AndNot(match, one);
                        var next  = Avx2.AddSaturate(diag, add);

                        // Create next diag which is current up
                        var up = ShiftLeft(left);
                        //up = Sse42.Insert(up, (ushort)previousRowPtr[columnIndex], 0);
                        up = Avx2.Or(up, Vector256.CreateScalar((ushort)previousRowPtr[columnIndex]));

                        var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one);
                        next = Avx2.Min(next, tmp);

                        left = next;
                        diag = up;

                        // Store one value
                        *writePtr = next.GetElement(VECTOR_LENGTH - 1);
                        writePtr++;
                    }

                    // Finish with last 3 items, dont read any more chars just extract them
                    for (int i = targetLength - (VECTOR_LENGTH - 1); i < previousRow.Length; i++)
                    {
                        // Shift in the next character
                        targetV = ShiftLeft(targetV);

                        // compare source to target
                        // alternativ, compare equal and OR with One
                        var match = Avx2.CompareEqual(sourceV, targetV);
                        var add   = Avx2.AndNot(match, one);
                        var next  = Avx2.AddSaturate(diag, add);

                        // Create next diag which is current up
                        var up = ShiftLeft(left);

                        var tmp = Avx2.AddSaturate(Avx2.Min(left, up), one);
                        next = Avx2.Min(next, tmp);

                        left = next;
                        diag = up;
                        // Store one value
                        previousRowPtr[i] = left.GetElement(VECTOR_LENGTH - 1);
                        //		writePtr++;
                    }

#if DEBUG
                    if (true)
                    {
                        Console.Write("prev values for row {0}:", rowIndex);
                        for (int i = 0; i < targetLength; ++i)
                        {
                            Console.Write("{0} ", previousRow[i]);
                        }
                        Console.WriteLine();
                    }
#endif
                }

                //Calculate Single Rows
                for (; rowIndex < sourceLength; rowIndex++)
                {
                    var lastSubstitutionCost = rowIndex;
                    var lastInsertionCost    = rowIndex + 1;
                    var sourcePrevChar       = source[rowIndex];
#if DEBUG
                    Console.Write("prev values for row {0}:", rowIndex);
                    for (int i = 0; i < targetLength; ++i)
                    {
                        Console.Write("{0} ", previousRow[i]);
                    }
                    Console.WriteLine();
#endif

                    CalculateRow(previousRowPtr, targetPtr, targetLength, sourcePrevChar, lastInsertionCost, lastSubstitutionCost);
                }
            }

            var result = previousRow[targetLength - 1];
            arrayPool.Return(pooledArray);
            return(result);
        }