コード例 #1
0
        public void RunLclVarScenario_LoadAligned()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));

            var left   = Avx.LoadAlignedVector256((Byte *)(_dataTable.inArray1Ptr));
            var right  = Avx.LoadAlignedVector256((Byte *)(_dataTable.inArray2Ptr));
            var result = Avx2.Add(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
        public unsafe int CalculateDistance(ReadOnlySpan <char> source, ReadOnlySpan <char> target)
        {
            var columns = target.Length + 1;

            columns += Vector256 <int> .Count - (columns & (Vector256 <int> .Count - 1));

            var costMatrix = Enumerable
                             .Range(0, source.Length + 1)
                             .Select(line => new int[columns])
                             .ToArray();

            for (var i = 1; i <= source.Length; ++i)
            {
                costMatrix[i][0] = i;
            }

            for (var i = 1; i <= target.Length; ++i)
            {
                costMatrix[0][i] = i;
            }

            var allOnesVectors = Vector256.Create(1);

            for (var i = 1; i <= source.Length; ++i)
            {
                fixed(int *prevRowPtr = costMatrix[i - 1])
                {
                    var previousRow = new Span <int>(costMatrix[i - 1]);

                    for (int columnIndex = 0, l = target.Length + 1; columnIndex <= l; columnIndex += Vector256 <int> .Count)
                    {
                        var columnsCovered = Avx.LoadVector256(prevRowPtr + columnIndex);
                        var addedColumns   = Avx2.Add(columnsCovered, allOnesVectors);
                        Avx.Store(prevRowPtr + columnIndex, addedColumns);
                    }
                }

                for (var j = 1; j <= target.Length; ++j)
                {
                    var insert = costMatrix[i][j - 1] + 1;
                    var delete = costMatrix[i - 1][j];
                    var edit   = costMatrix[i - 1][j - 1];

                    if (source[i - 1] == target[j - 1])
                    {
                        edit -= 1;
                    }

                    costMatrix[i][j] = Math.Min(Math.Min(insert, delete), edit);
                }
            }

            return(costMatrix[source.Length][target.Length]);
        }
コード例 #3
0
        public void RunLclVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));

            var op1    = Avx.LoadVector256((UInt32 *)(_dataTable.inArray1Ptr));
            var op2    = Avx.LoadVector256((UInt32 *)(_dataTable.inArray2Ptr));
            var result = Avx2.Add(op1, op2);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(op1, op2, _dataTable.outArrayPtr);
        }
コード例 #4
0
        public void RunLclVarScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));

            var left   = Unsafe.Read <Vector256 <Byte> >(_dataTable.inArray1Ptr);
            var right  = Unsafe.Read <Vector256 <Byte> >(_dataTable.inArray2Ptr);
            var result = Avx2.Add(left, right);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(left, right, _dataTable.outArrayPtr);
        }
コード例 #5
0
ファイル: NN_Mokka.cs プロジェクト: marchete/NetMokka
        public static void Activation_Sigmoid(Tensor input, Tensor output)
        {
            __m256 one = Vector256.Create(1.0f);

            for (int i = 0; i < output.xmm.Length; ++i)
            {
                output.xmm[i] = NN_utils.exp256_ps(input.xmm[i]);
                var divisor = Avx2.Add(output.xmm[i], one);
                output.xmm[i] = Avx2.Divide(output.xmm[i], divisor);
            }
        }
コード例 #6
0
ファイル: MainWindow.xaml.cs プロジェクト: gogowaten/2020WPF
        private unsafe long MyTestAddIntLong(int[] a)
        {
            var vz         = Vector256 <long> .Zero;//0で初期化したVector256
            int simdLength = Vector256 <long> .Count;
            int i          = 0;

            fixed(int *ptrA = a)
            {
                for (i = 0; i < a.Length; i += simdLength)
                {
                    //int型配列のポインタからint型のVector256作成
                    //var v2 = Avx.LoadVector256(ptrA + i);//合計値がint型で収まるならこれでいい
                    //int型配列のポインタからlong型のVector256作成
                    var v2 = Avx2.ConvertToVector256Int64(ptrA + i);
                    //Vectorで足し算
                    vz = Avx2.Add(vz, v2);
                }
            }

            var temp = new long[simdLength];

            //Vectorの値を配列にStore(コピー?)
            fixed(long *ptrI = temp)
            {
                Avx.Store(ptrI, vz);
            }

            //long型だと要素数は4つだから合計はLINQのSumで十分、どれも速度は同じだった
            //LINQで合計
            long total = temp.Sum();

            //Forで合計
            //long total = 0;
            //for (int j = 0; j < simdLength; j++)
            //{
            //    total += temp[j];
            //}

            //決め打ちで合計
            //long total = 0;
            //total += temp[0];
            //total += temp[1];
            //total += temp[2];
            //total += temp[3];

            //SIMDLengthで割り切れなかった余りの要素を合計
            for (; i < a.Length; i++)
            {
                total += a[i];
            }

            return(total);
        }
コード例 #7
0
            public static unsafe void Decode32Bytes(byte *source, byte *dest)
            {
                Vector256 <byte> maskA   = Vector256.Create((uint)0x0000_003f).AsByte();
                Vector256 <byte> maskB   = Vector256.Create((uint)0x0000_3f00).AsByte();
                Vector256 <byte> maskC   = Vector256.Create((uint)0x003f_0000).AsByte();
                Vector256 <byte> maskD   = Vector256.Create((uint)0x3f00_0000).AsByte();
                Vector256 <byte> offsets = Vector256.Create((sbyte)-32).AsByte();

                Vector256 <byte> vecSource  = Unsafe.As <byte, Vector256 <byte> >(ref source[0]);
                Vector256 <byte> subtracted = Avx2.Add(vecSource, offsets);

                Vector256 <byte> a = Avx2.And(subtracted, maskA);
                Vector256 <byte> b = Avx2.And(subtracted, maskB);
                Vector256 <byte> c = Avx2.And(subtracted, maskC);
                Vector256 <byte> d = Avx2.And(subtracted, maskD);


                a = Avx2.ShiftLeftLogical(a.AsUInt32(), 18).AsByte();                       // 00000000 00000000 00000000 00aaaaaa -> 00000000 aaaaaa00 00000000 00000000
                b = Avx2.ShiftLeftLogical(b.AsUInt32(), 4).AsByte();                        // 00000000 00000000 00bbbbbb 00000000 -> 00000000 000000bb bbbb0000 00000000
                c = Avx2.ShiftRightLogical(c.AsUInt32(), 10).AsByte();                      // 00000000 00cccccc 00000000 00000000 -> 00000000 00000000 0000cccc cc000000
                d = Avx2.ShiftRightLogical(d.AsUInt32(), 24).AsByte();                      // 00dddddd 00000000 00000000 00000000 -> 00000000 00000000 00000000 00dddddd
                //	After Or:							  00000000 aaaaaabb bbbbcccc ccdddddd
                //                                        byte 3   byte 1   byte 2   byte 0

                // a uint: 0x00000000_00000000__00000000_00111111
                // b uint: 0x00000000_00000000__00111111_00000000
                // c uint: 0x00000000_00111111__00000000_00000000
                // d uint: 0x00111111_00000000__00000000_00000000


                a = Avx2.Or(a, b);
                c = Avx2.Or(c, d);
                a = Avx2.Or(a, c);                                      // AA BB CC 00   AA BB CC 00


                // a contains: [C,B,A,0, F,E,D,0, I,H,G,0, L,K,J,0]
                // Shuffle bytes so that it becomes: [A,B,C, D,E,F, G,H,I, J,K,L, 0,0,0,0]


                //2,   1,  0,   6,  5,  4,   10,  9,  8,  14, 13, 12,  // 3, 7, 11, 15
                //	18, 17, 16,  22, 21, 20,     // 19

                var vecShuffle = Vector256.Create(
                    0x02, 0x01, 0x00, 0x06, 0x05, 0x04, 0x0a, 0x09, 0x08, 0x0e, 0x0d, 0x0c,
                    0x80, 0x80, 0x80, 0x80,                     // 0x03, 0x07, 0x0b, 0x0f
                    0x12, 0x11, 0x10, 0x16, 0x15, 0x14, 0x1a, 0x19, 0x18, 0x1e, 0x1d, 0x1c,
                    0x80, 0x80, 0x80, 0x80);                    // 0x13, 0x17, 0x1b, 0x1f

                var vecBytes2 = Avx2.Shuffle(a, vecShuffle);

                Sse2.Store(dest, vecBytes2.GetLower());
                Sse2.Store(dest + 12, vecBytes2.GetUpper());
            }
コード例 #8
0
    private static void Accumulate1024Avx2(ref Accumulator accumulator, byte *data, byte *secret)
    {
        PrefetchNonTemporalNext(data);
        PrefetchNonTemporalNext(data + 0x40);

        var dataVec0 = Avx2.LoadVector256(data + 0x00u).AsUInt64();
        var dataVec1 = Avx2.LoadVector256(data + 0x20u).AsUInt64();
        var keyVec0  = Avx2.LoadVector256(secret + 0x00u).AsUInt64();
        var keyVec1  = Avx2.LoadVector256(secret + 0x20u).AsUInt64();

        var dataKey0   = Avx2.Xor(dataVec0, keyVec0);
        var dataKey1   = Avx2.Xor(dataVec1, keyVec1);
        var dataKeyLo0 = Avx2.Shuffle(dataKey0.AsUInt32(), ShuffleDataKey);
        var dataKeyLo1 = Avx2.Shuffle(dataKey1.AsUInt32(), ShuffleDataKey);
        var product0   = Avx2.Multiply(dataKey0.AsUInt32(), dataKeyLo0);
        var product1   = Avx2.Multiply(dataKey1.AsUInt32(), dataKeyLo1);

        var dataSwap0 = Avx2.Shuffle(dataVec0.AsUInt32(), ShuffleDataSwap);
        var dataSwap1 = Avx2.Shuffle(dataVec1.AsUInt32(), ShuffleDataSwap);
        var addend0   = accumulator.Data256.Data0;
        var addend1   = accumulator.Data256.Data1;
        var sum0      = Avx2.Add(addend0, dataSwap0.AsUInt64());
        var sum1      = Avx2.Add(addend1, dataSwap1.AsUInt64());

        var result0 = Avx2.Add(product0, sum0);
        var result1 = Avx2.Add(product1, sum1);

        addend0 = result0;
        addend1 = result1;

        var dataVec2 = Avx2.LoadVector256(data + 0x40u).AsUInt64();
        var dataVec3 = Avx2.LoadVector256(data + 0x60u).AsUInt64();
        var keyVec2  = Avx2.LoadVector256(secret + 0x08u).AsUInt64();
        var keyVec3  = Avx2.LoadVector256(secret + 0x28u).AsUInt64();

        var dataKey2   = Avx2.Xor(dataVec2, keyVec2);
        var dataKey3   = Avx2.Xor(dataVec3, keyVec3);
        var dataKeyLo2 = Avx2.Shuffle(dataKey2.AsUInt32(), ShuffleDataKey);
        var dataKeyLo3 = Avx2.Shuffle(dataKey3.AsUInt32(), ShuffleDataKey);
        var product2   = Avx2.Multiply(dataKey2.AsUInt32(), dataKeyLo2);
        var product3   = Avx2.Multiply(dataKey3.AsUInt32(), dataKeyLo3);

        var dataSwap2 = Avx2.Shuffle(dataVec2.AsUInt32(), ShuffleDataSwap);
        var dataSwap3 = Avx2.Shuffle(dataVec3.AsUInt32(), ShuffleDataSwap);
        var sum2      = Avx2.Add(addend0, dataSwap2.AsUInt64());
        var sum3      = Avx2.Add(addend1, dataSwap3.AsUInt64());

        var result2 = Avx2.Add(product2, sum2);
        var result3 = Avx2.Add(product3, sum3);

        accumulator.Data256.Data0 = result2;
        accumulator.Data256.Data1 = result3;
    }
コード例 #9
0
ファイル: TestAvx2.cs プロジェクト: KazuhiroNomura/SIMD2
 public void Add2_Byte()
 {
     for (var left_lower0 = 0; left_lower0 < 1; left_lower0++)
     {
         var left_lower1 = (Byte)left_lower0;
         var left_lower2 = Vector128.Create(left_lower1);
         for (var left_upper0 = 0; left_upper0 < 1; left_upper0++)
         {
             var left_upper1 = (Byte)left_upper0;
             var left_upper2 = Vector128.Create(left_upper1);
             var left3       = Vector256.Create(left_lower2, left_upper2);
             for (var right_lower0 = 0; right_lower0 < 1; right_lower0++)
             {
                 var right_lower1 = (Byte)right_lower0;
                 var right_lower2 = Vector128.Create(right_lower1);
                 for (var right_upper0 = 0; right_upper0 < 1; right_upper0++)
                 {
                     var right_upper1    = (Byte)right_upper0;
                     var right_upper2    = Vector128.Create(right_upper1);
                     var right3          = Vector256.Create(right_lower2, right_upper2);
                     var actual          = Avx2.Add(left3, right3);
                     var expected_upper0 = (UInt64)(left_upper0 + right_upper0);
                     var expected_upper1 =
                         (expected_upper0 << 0) |
                         (expected_upper0 << 8) |
                         (expected_upper0 << 16) |
                         (expected_upper0 << 24) |
                         (expected_upper0 << 32) |
                         (expected_upper0 << 40) |
                         (expected_upper0 << 48) |
                         (expected_upper0 << 56);
                     var expected_upper2 = Vector128.Create(expected_upper1);
                     var expected_lower0 = (UInt64)(left_lower0 + right_lower0);
                     var expected_lower1 =
                         (expected_lower0 << 0) |
                         (expected_lower0 << 8) |
                         (expected_lower0 << 16) |
                         (expected_lower0 << 24) |
                         (expected_lower0 << 32) |
                         (expected_lower0 << 40) |
                         (expected_lower0 << 48) |
                         (expected_lower0 << 56);
                     var expected_lower2 = Vector128.Create(expected_lower1);
                     var expected3       = Vector256.Create(expected_lower2, expected_upper2).AsByte();
                     for (var index = 0; index < 32; index++)
                     {
                         Assert.AreEqual(expected3.GetElement(index), actual.GetElement(index));
                     }
                 }
             }
         }
     }
 }
コード例 #10
0
ファイル: Add.Byte.cs プロジェクト: zwei222/coreclr
        public void RunBasicScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));

            var result = Avx2.Add(
                Avx.LoadVector256((Byte *)(_dataTable.inArray1Ptr)),
                Avx.LoadVector256((Byte *)(_dataTable.inArray2Ptr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
コード例 #11
0
ファイル: TestAvx2.cs プロジェクト: KazuhiroNomura/SIMD2
 public void 垂直加算Int16()
 {
     for (var a = 0; a < 1; a++)
     {
         var operand0 = Vector256.Create(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
         var operand1 = Vector256.Create(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
         for (var b = 0; b < 1; b++)
         {
             var result = Avx2.Add(operand0, operand1);
         }
     }
 }
コード例 #12
0
ファイル: VectorRet.cs プロジェクト: vikramadhav/coreclr
    public static Vector256 <byte> F1_v256i(int t)
    {
        int ti = 1 - t;
        int t0 = ti * ti * ti;
        int t1 = 3 * ti * ti * t;
        int t2 = 3 * ti * t * t;
        int t3 = t * t * t;
        Vector256 <byte> tmp1 = Avx2.Add(Avx2.Subtract(Avx.SetAllVector256((byte)t0), s_v256i_0), Avx2.Subtract(Avx.SetAllVector256((byte)t1), s_v256i_1));
        Vector256 <byte> tmp2 = Avx2.Add(Avx2.Subtract(Avx.SetAllVector256((byte)t2), s_v256i_2), Avx2.Subtract(Avx.SetAllVector256((byte)t3), s_v256i_3));

        return(Avx2.Add(tmp1, tmp2));
    }
コード例 #13
0
ファイル: Add.Byte.cs プロジェクト: zwei222/coreclr
        public void RunClsVarScenario()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));

            var result = Avx2.Add(
                _clsVar1,
                _clsVar2
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
        }
コード例 #14
0
    private static void Accumulate512Avx2(ref Accumulator accumulator, byte *data, byte *secret)
    {
        PrefetchNonTemporalNext(data);
        PrefetchNonTemporalNext(secret);

        if (UnrollCount >= 2u)
        {
            var dataVec0 = Avx2.LoadVector256(data + 0x00u).AsUInt64();
            var dataVec1 = Avx2.LoadVector256(data + 0x20u).AsUInt64();
            var keyVec0  = Avx2.LoadVector256(secret + 0x00u).AsUInt64();
            var keyVec1  = Avx2.LoadVector256(secret + 0x20u).AsUInt64();

            var dataKey0   = Avx2.Xor(dataVec0, keyVec0);
            var dataKey1   = Avx2.Xor(dataVec1, keyVec1);
            var dataKeyLo0 = Avx2.Shuffle(dataKey0.AsUInt32(), ShuffleDataKey);
            var dataKeyLo1 = Avx2.Shuffle(dataKey1.AsUInt32(), ShuffleDataKey);
            var product0   = Avx2.Multiply(dataKey0.AsUInt32(), dataKeyLo0);
            var product1   = Avx2.Multiply(dataKey1.AsUInt32(), dataKeyLo1);

            var dataSwap0 = Avx2.Shuffle(dataVec0.AsUInt32(), ShuffleDataSwap);
            var dataSwap1 = Avx2.Shuffle(dataVec1.AsUInt32(), ShuffleDataSwap);
            var addend0   = accumulator.Data256.Data0;
            var addend1   = accumulator.Data256.Data1;
            var sum0      = Avx2.Add(addend0, dataSwap0.AsUInt64());
            var sum1      = Avx2.Add(addend1, dataSwap1.AsUInt64());

            var result0 = Avx2.Add(product0, sum0);
            var result1 = Avx2.Add(product1, sum1);

            accumulator.Data256.Data0 = result0;
            accumulator.Data256.Data1 = result1;
        }
        else
        {
            for (uint i = 0u; i < StripeLength; i += 0x20u)
            {
                var dataVec = Avx2.LoadVector256(data + i).AsUInt64();
                var keyVec  = Avx2.LoadVector256(secret + i).AsUInt64();

                var dataKey   = Avx2.Xor(dataVec, keyVec);
                var dataKeyLo = Avx2.Shuffle(dataKey.AsUInt32(), ShuffleDataKey);
                var product   = Avx2.Multiply(dataKey.AsUInt32(), dataKeyLo);

                var dataSwap = Avx2.Shuffle(dataVec.AsUInt32(), ShuffleDataSwap);
                var addend   = accumulator.Data256.AtOffset(i);
                var sum      = Avx2.Add(addend, dataSwap.AsUInt64());

                var result = Avx2.Add(product, sum);

                accumulator.Data256.AtOffset(i) = result;
            }
        }
    }
コード例 #15
0
        //Vectorでの足し算、オーバーフロー編
        private unsafe void Test3AddOverflow(byte[] aa)
        {
            Vector256 <byte> v = Vector256.Create((byte)250);
            Vector256 <byte> total;

            fixed(byte *ptrA = aa)
            {
                Vector256 <byte> tempV = Avx.LoadVector256(ptrA);

                total = Avx2.Add(v, tempV);
            }
        }
コード例 #16
0
        //オーバーフローしないように、byte型配列からint型Vector作成して足し算
        private unsafe void Test5Add(byte[] aa)
        {
            Vector256 <int> v = Vector256.Create((int)250);
            Vector256 <int> total;

            fixed(byte *ptrA = aa)
            {
                Vector256 <int> tempV = Avx2.ConvertToVector256Int32(ptrA);

                total = Avx2.Add(v, tempV);
            }
        }
コード例 #17
0
ファイル: Add.Byte.cs プロジェクト: zwei222/coreclr
        public void RunBasicScenario_UnsafeRead()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));

            var result = Avx2.Add(
                Unsafe.Read <Vector256 <Byte> >(_dataTable.inArray1Ptr),
                Unsafe.Read <Vector256 <Byte> >(_dataTable.inArray2Ptr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
        }
コード例 #18
0
        static Vector256 <byte> avxcarryContinuations(Vector256 <byte> initial_lengths,
                                                      Vector256 <byte> previous_carries)
        {
            Vector256 <byte> right1 = Avx2.SubtractSaturate(
                push_last_byte_of_a_to_b(previous_carries, initial_lengths),
                Vector256.Create((byte)1));
            Vector256 <byte> sum = Avx2.Add(initial_lengths, right1);

            Vector256 <byte> right2 = Avx2.SubtractSaturate(
                push_last_2bytes_of_a_to_b(previous_carries, sum), Vector256.Create((byte)2));

            return(Avx2.Add(sum, right2));
        }
コード例 #19
0
        //配列の足し算
        private unsafe void Test6Add(byte[] aa)
        {
            Vector256 <int> total = Vector256 <int> .Zero;

            fixed(byte *ptrA = aa)
            {
                for (int i = 0; i < aa.Length; i += Vector256 <int> .Count)
                {
                    Vector256 <int> tempV = Avx2.ConvertToVector256Int32(ptrA + i);
                    total = Avx2.Add(total, tempV);
                }
            }
        }
コード例 #20
0
ファイル: Add.Byte.cs プロジェクト: zwei222/coreclr
        public void RunStructLclFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario_Load));

            var test   = TestStruct.Create();
            var result = Avx2.Add(
                Avx.LoadVector256((Byte *)(&test._fld1)),
                Avx.LoadVector256((Byte *)(&test._fld2))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
        }
コード例 #21
0
        public override void Step()
        {
            fixed(byte *fieldPtr = currentField, nextFieldPtr = nextField)
            {
                for (int i = 2 * WIDTH; i < currentField.Length - 2 * WIDTH; i += 32)
                {
                    Vector256 <byte> topLeft     = Avx.LoadVector256(fieldPtr + i - WIDTH - 1);
                    Vector256 <byte> top         = Avx.LoadVector256(fieldPtr + i - WIDTH);
                    Vector256 <byte> topRight    = Avx.LoadVector256(fieldPtr + i - WIDTH + 1);
                    Vector256 <byte> left        = Avx.LoadVector256(fieldPtr + i - 1);
                    Vector256 <byte> right       = Avx.LoadVector256(fieldPtr + i + 1);
                    Vector256 <byte> bottomLeft  = Avx.LoadVector256(fieldPtr + i + WIDTH - 1);
                    Vector256 <byte> bottom      = Avx.LoadVector256(fieldPtr + i + WIDTH);
                    Vector256 <byte> bottomRight = Avx.LoadVector256(fieldPtr + i + WIDTH + 1);

                    Vector256 <byte> sum1 = Avx2.Add(topLeft, top);
                    Vector256 <byte> sum2 = Avx2.Add(topRight, left);
                    Vector256 <byte> sum3 = Avx2.Add(right, bottomLeft);
                    Vector256 <byte> sum4 = Avx2.Add(bottom, bottomRight);
                    Vector256 <byte> sum5 = Avx2.Add(sum1, sum2);
                    Vector256 <byte> sum6 = Avx2.Add(sum3, sum4);

                    Vector256 <byte> neighbours = Avx2.Add(sum5, sum6);
                    Vector256 <byte> alive      = Avx.LoadVector256(fieldPtr + i);

                    alive = Avx2.ShiftLeftLogical(alive.AsUInt64(), (byte)3).AsByte();
                    Vector256 <byte> mask          = Avx2.Or(neighbours, alive);
                    Vector256 <byte> shouldBeAlive = Avx2.Shuffle(v_lookup, mask);

                    //Vector256<byte> hasTwoNeighbours = Avx2.CompareEqual(neighbours, v_2);
                    //Vector256<byte> hasThreeNeighbours = Avx2.CompareEqual(neighbours, v_3);
                    //hasThreeNeighbours = Avx2.And(hasThreeNeighbours, v_1);
                    //Vector256<byte> aliveAndTwoNeighbours = Avx2.And(alive, hasTwoNeighbours);
                    //Vector256<byte> shouldBeAlive = Avx2.Or(aliveAndTwoNeighbours, hasThreeNeighbours);
                    //shouldBeAlive = Avx2.And(shouldBeAlive, v_1);

                    Avx2.Store(nextFieldPtr + i, shouldBeAlive);
                }

                byte[] tempField = currentField;
                currentField = nextField;
                nextField    = tempField;
            }

            for (int y = 1; y < HEIGHT - 1; y++)
            {
                currentField[WIDTH + y * WIDTH]             = 0;
                currentField[WIDTH + y * WIDTH + WIDTH - 1] = 0;
            }
        }
コード例 #22
0
ファイル: ChaCha20Utils.cs プロジェクト: HMBSbige/CryptoBase
    private static void QuarterRound(ref Vector256 <uint> a, ref Vector256 <uint> b, ref Vector256 <uint> c, ref Vector256 <uint> d)
    {
        a = Avx2.Add(a, b);
        d = Avx2.Xor(a, d).RotateLeftUInt32_16();

        c = Avx2.Add(c, d);
        b = Avx2.Xor(b, c).RotateLeftUInt32(12);

        a = Avx2.Add(a, b);
        d = Avx2.Xor(a, d).RotateLeftUInt32_8();

        c = Avx2.Add(c, d);
        b = Avx2.Xor(b, c).RotateLeftUInt32(7);
    }
コード例 #23
0
ファイル: Add.Byte.cs プロジェクト: zwei222/coreclr
            public void RunStructFldScenario_Load(SimpleBinaryOpTest__AddByte testClass)
            {
                fixed(Vector256 <Byte> *pFld1 = &_fld1)
                fixed(Vector256 <Byte> *pFld2 = &_fld2)
                {
                    var result = Avx2.Add(
                        Avx.LoadVector256((Byte *)(pFld1)),
                        Avx.LoadVector256((Byte *)(pFld2))
                        );

                    Unsafe.Write(testClass._dataTable.outArrayPtr, result);
                    testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
                }
            }
コード例 #24
0
        public static unsafe void CalculateDiagonalSection_Avx2 <T>(void *refDiag1Ptr, void *refDiag2Ptr, char *sourcePtr, char *targetPtr, ref int rowIndex, int columnIndex) where T : struct
        {
            if (typeof(T) == typeof(int))
            {
                var diag1Ptr = (int *)refDiag1Ptr;
                var diag2Ptr = (int *)refDiag2Ptr;

                var sourceVector = Avx2.ConvertToVector256Int32((ushort *)sourcePtr + rowIndex - Vector256 <T> .Count);
                var targetVector = Avx2.ConvertToVector256Int32((ushort *)targetPtr + columnIndex - 1);
                targetVector = Avx2.Shuffle(targetVector, 0x1b);
                targetVector = Avx2.Permute2x128(targetVector, targetVector, 1);
                var substitutionCostAdjustment = Avx2.CompareEqual(sourceVector, targetVector);

                var substitutionCost = Avx2.Add(
                    Avx.LoadDquVector256(diag1Ptr + rowIndex - Vector256 <T> .Count),
                    substitutionCostAdjustment
                    );
                var deleteCost = Avx.LoadDquVector256(diag2Ptr + rowIndex - (Vector256 <T> .Count - 1));
                var insertCost = Avx.LoadDquVector256(diag2Ptr + rowIndex - Vector256 <T> .Count);

                var localCost = Avx2.Min(Avx2.Min(insertCost, deleteCost), substitutionCost);
                localCost = Avx2.Add(localCost, Vector256.Create(1));

                Avx.Store(diag1Ptr + rowIndex - (Vector256 <T> .Count - 1), localCost);
            }
            else if (typeof(T) == typeof(ushort))
            {
                var diag1Ptr = (ushort *)refDiag1Ptr;
                var diag2Ptr = (ushort *)refDiag2Ptr;

                var sourceVector = Avx.LoadDquVector256((ushort *)sourcePtr + rowIndex - Vector256 <T> .Count);
                var targetVector = Avx.LoadDquVector256((ushort *)targetPtr + columnIndex - 1);
                targetVector = Avx2.Shuffle(targetVector.AsByte(), REVERSE_USHORT_AS_BYTE_256).AsUInt16();
                targetVector = Avx2.Permute2x128(targetVector, targetVector, 1);
                var substitutionCostAdjustment = Avx2.CompareEqual(sourceVector, targetVector);

                var substitutionCost = Avx2.Add(
                    Avx.LoadDquVector256(diag1Ptr + rowIndex - Vector256 <T> .Count),
                    substitutionCostAdjustment
                    );
                var deleteCost = Avx.LoadDquVector256(diag2Ptr + rowIndex - (Vector256 <T> .Count - 1));
                var insertCost = Avx.LoadDquVector256(diag2Ptr + rowIndex - Vector256 <T> .Count);

                var localCost = Avx2.Min(Avx2.Min(insertCost, deleteCost), substitutionCost);
                localCost = Avx2.Add(localCost, Vector256.Create((ushort)1));

                Avx.Store(diag1Ptr + rowIndex - (Vector256 <T> .Count - 1), localCost);
            }
        }
コード例 #25
0
ファイル: VectorRet.cs プロジェクト: vikramadhav/coreclr
    public static Vector256 <T> Vector256PlusOne <T>(Vector256 <T> v1) where T : struct
    {
        Vector256 <T> v2 = Vector256One <T>();

        if (typeof(T) == typeof(float))
        {
            return(Avx.StaticCast <float, T>(Avx.Add(Avx.StaticCast <T, float>(v1), Avx.StaticCast <T, float>(v2))));
        }
        else if (typeof(T) == typeof(double))
        {
            return(Avx.StaticCast <double, T>(Avx.Add(Avx.StaticCast <T, double>(v1), Avx.StaticCast <T, double>(v2))));
        }
        else if (typeof(T) == typeof(byte))
        {
            return(Avx.StaticCast <byte, T>(Avx2.Add(Avx.StaticCast <T, byte>(v1), Avx.StaticCast <T, byte>(v2))));
        }
        else if (typeof(T) == typeof(sbyte))
        {
            return(Avx.StaticCast <sbyte, T>(Avx2.Add(Avx.StaticCast <T, sbyte>(v1), Avx.StaticCast <T, sbyte>(v2))));
        }
        else if (typeof(T) == typeof(short))
        {
            return(Avx.StaticCast <short, T>(Avx2.Add(Avx.StaticCast <T, short>(v1), Avx.StaticCast <T, short>(v2))));
        }
        else if (typeof(T) == typeof(ushort))
        {
            return(Avx.StaticCast <ushort, T>(Avx2.Add(Avx.StaticCast <T, ushort>(v1), Avx.StaticCast <T, ushort>(v2))));
        }
        else if (typeof(T) == typeof(int))
        {
            return(Avx.StaticCast <int, T>(Avx2.Add(Avx.StaticCast <T, int>(v1), Avx.StaticCast <T, int>(v2))));
        }
        else if (typeof(T) == typeof(uint))
        {
            return(Avx.StaticCast <uint, T>(Avx2.Add(Avx.StaticCast <T, uint>(v1), Avx.StaticCast <T, uint>(v2))));
        }
        else if (typeof(T) == typeof(long))
        {
            return(Avx.StaticCast <long, T>(Avx2.Add(Avx.StaticCast <T, long>(v1), Avx.StaticCast <T, long>(v2))));
        }
        else if (typeof(T) == typeof(ulong))
        {
            return(Avx.StaticCast <ulong, T>(Avx2.Add(Avx.StaticCast <T, ulong>(v1), Avx.StaticCast <T, ulong>(v2))));
        }
        else
        {
            throw new NotSupportedException();
        }
    }
コード例 #26
0
 private static void OneQuadUnpack(ref Vector256 <uint> x_A, ref Vector256 <uint> x_B, ref Vector256 <uint> x_C, ref Vector256 <uint> x_D, ref Vector256 <uint> t_A, ref Vector256 <uint> t_B, ref Vector256 <uint> t_C, ref Vector256 <uint> t_D, ref Vector256 <uint> orig_A, ref Vector256 <uint> orig_B, ref Vector256 <uint> orig_C, ref Vector256 <uint> orig_D)
 {
     x_A = Avx2.Add(x_A, orig_A);
     x_B = Avx2.Add(x_B, orig_B);
     x_C = Avx2.Add(x_C, orig_C);
     x_D = Avx2.Add(x_D, orig_D);
     t_A = Avx2.UnpackLow(x_A, x_B);
     t_B = Avx2.UnpackLow(x_C, x_D);
     t_C = Avx2.UnpackHigh(x_A, x_B);
     t_D = Avx2.UnpackHigh(x_C, x_D);
     x_A = Avx2.UnpackLow(t_A.AsUInt64(), t_B.AsUInt64()).AsUInt32();
     x_B = Avx2.UnpackHigh(t_A.AsUInt64(), t_B.AsUInt64()).AsUInt32();
     x_C = Avx2.UnpackLow(t_C.AsUInt64(), t_D.AsUInt64()).AsUInt32();
     x_D = Avx2.UnpackHigh(t_C.AsUInt64(), t_D.AsUInt64()).AsUInt32();
 }
コード例 #27
0
        private static Vector256 <int> Aggregate(Vector256 <int> t, int carry)
        {
            var shiftRight = RotateRight;
            var t2         = Avx2.PermuteVar8x32(t, shiftRight); t2 = t2.WithElement(0, carry);

            t  = Avx2.Add(t, t2);
            t2 = Avx2.PermuteVar8x32(t2, shiftRight); t2 = t2.WithElement(0, 0);  t = Avx2.Add(t, t2);
            t2 = Avx2.PermuteVar8x32(t2, shiftRight); t = Avx2.Add(t, t2);
            t2 = Avx2.PermuteVar8x32(t2, shiftRight); t = Avx2.Add(t, t2);
            t2 = Avx2.PermuteVar8x32(t2, shiftRight); t = Avx2.Add(t, t2);
            t2 = Avx2.PermuteVar8x32(t2, shiftRight); t = Avx2.Add(t, t2);
            t2 = Avx2.PermuteVar8x32(t2, shiftRight); t = Avx2.Add(t, t2);
            t2 = Avx2.PermuteVar8x32(t2, shiftRight); t = Avx2.Add(t, t2);
            return(t);
        }
コード例 #28
0
        /// <summary>
        /// Absolute error bounded by 1e-4.
        /// </summary>
        public static Vector256 <float> Log(Vector256 <float> x)
        {
            Vector256 <float> exp, addcst, val;

            exp = Avx2.ConvertToVector256Single(Avx2.ShiftRightArithmetic(x.As <float, int>(), 23));

            // According to BenchmarkDotNet, isolating all the constants up-front
            // yield nearly 10% speed-up.

            const float bf0 = -89.970756366f;
            const float bf1 = float.NaN; // behavior of MathF.Log() on negative numbers
            const float bf2 = 3.529304993f;
            const float bf3 = -2.461222105f;
            const float bf4 = 1.130626167f;
            const float bf5 = -0.288739945f;
            const float bf6 = 3.110401639e-2f;
            const float bf7 = 0.6931471805f;

            const int bi0 = 0x7FFFFF;
            const int bi1 = 0x3F800000;

            //addcst = val > 0 ? -89.970756366f : -(float)INFINITY;

            addcst = Avx.BlendVariable(Vector256.Create(bf0),
                                       Vector256.Create(bf1),
                                       Avx.Compare(x, Vector256 <float> .Zero, FloatComparisonMode.OrderedLessThanNonSignaling));

            val = Avx2.Or(Avx2.And(
                              x.As <float, int>(),
                              Vector256.Create(bi0)),
                          Vector256.Create(bi1)).As <int, float>();

            /*    x * (3.529304993f +
             *      x * (-2.461222105f +
             *        x * (1.130626167f +
             *          x * (-0.288739945f +
             *            x * 3.110401639e-2f))))
             + (addcst + 0.6931471805f*exp); */

            return(Avx2.Add(
                       Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf2),
                                                   Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf3),
                                                                               Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf4),
                                                                                                           Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf5),
                                                                                                                                       Avx2.Multiply(val, Vector256.Create(bf6)))))))))),
                       Avx.Add(addcst,
                               Avx2.Multiply(Vector256.Create(bf7), exp))));
        }
コード例 #29
0
ファイル: Add.Byte.cs プロジェクト: zwei222/coreclr
        public void RunClassFldScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario_Load));

            fixed(Vector256 <Byte> *pFld1 = &_fld1)
            fixed(Vector256 <Byte> *pFld2 = &_fld2)
            {
                var result = Avx2.Add(
                    Avx.LoadVector256((Byte *)(pFld1)),
                    Avx.LoadVector256((Byte *)(pFld2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
            }
        }
コード例 #30
0
        public void RunClsVarScenario_Load()
        {
            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario_Load));

            fixed(Vector256 <UInt16> *pClsVar1 = &_clsVar1)
            fixed(Vector256 <UInt16> *pClsVar2 = &_clsVar2)
            {
                var result = Avx2.Add(
                    Avx.LoadVector256((UInt16 *)(pClsVar1)),
                    Avx.LoadVector256((UInt16 *)(pClsVar2))
                    );

                Unsafe.Write(_dataTable.outArrayPtr, result);
                ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
            }
        }