public void RunFldScenario()
        {
            var result = Avx.ConvertToVector256Int32(_fld);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_fld, _dataTable.outArrayPtr);
        }
Exemplo n.º 2
0
 public static Vector256 <int> ConvertToVector256Int32(float *origin, uint index)
 {
     if (Avx.IsSupported)
     {
         return(Avx.ConvertToVector256Int32(Avx.LoadVector256(&origin[index])));
     }
     return(default);
        public void RunLclFldScenario()
        {
            var test   = new SimpleUnaryOpTest__ConvertToVector256Int32Single();
            var result = Avx.ConvertToVector256Int32(test._fld);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(test._fld, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_LoadAligned()
        {
            var firstOp = Avx.LoadAlignedVector256((Single *)(_dataTable.inArrayPtr));
            var result  = Avx.ConvertToVector256Int32(firstOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, _dataTable.outArrayPtr);
        }
        public void RunLclVarScenario_UnsafeRead()
        {
            var firstOp = Unsafe.Read <Vector256 <Single> >(_dataTable.inArrayPtr);
            var result  = Avx.ConvertToVector256Int32(firstOp);

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(firstOp, _dataTable.outArrayPtr);
        }
        public void RunBasicScenario_LoadAligned()
        {
            var result = Avx.ConvertToVector256Int32(
                Avx.LoadAlignedVector256((Single *)(_dataTable.inArrayPtr))
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
        }
        public void RunBasicScenario_UnsafeRead()
        {
            var result = Avx.ConvertToVector256Int32(
                Unsafe.Read <Vector256 <Single> >(_dataTable.inArrayPtr)
                );

            Unsafe.Write(_dataTable.outArrayPtr, result);
            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
        }
Exemplo n.º 8
0
        //floatで掛け算して、intで足し算
        //これだと要素数100万程度で桁あふれする
        private unsafe double Test7Variance(byte[] vs)
        {
            int simdLength = Vector256 <int> .Count;
            int i;
            var vTotal = Vector256 <int> .Zero;

            fixed(byte *p = vs)
            {
                for (i = 0; i < vs.Length; i += simdLength)
                {
                    Vector256 <int>   v   = Avx2.ConvertToVector256Int32(p + i);//01234567
                    Vector256 <float> inu = Avx.ConvertToVector256Single(v);
                    Vector256 <float> vv  = Avx.Multiply(inu, inu);
                    v      = Avx.ConvertToVector256Int32(vv);
                    vTotal = Avx2.Add(vTotal, v);
                }
            }

            long total = 0;

            simdLength = Vector256 <int> .Count;
            int *temp = stackalloc int[simdLength];

            Avx.Store(temp, vTotal);
            for (int j = 0; j < simdLength; j++)
            {
                total += temp[j];
            }
            for (; i < vs.Length; i++)
            {
                total += vs[i];
            }

            double average = (double)Test2(vs) / vs.Length;

            return(((double)total / vs.Length) - (average * average));
        }
Exemplo n.º 9
0
 public static i32 Convertf32_i32(f32 a)
 {
     return(Avx.ConvertToVector256Int32(a));
 }
Exemplo n.º 10
0
            unsafe void IConversionProcessor.ConvertLine(byte *ipstart, byte *opstart, int cb)
            {
                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                byte * op = opstart;

#if HWINTRINSICS
                if (Avx2.IsSupported && ipe >= ip + Vector256 <byte> .Count)
                {
                    var vscale = Vector256.Create((float)byte.MaxValue);
                    var vmaskp = Avx.LoadVector256((int *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.PermuteMaskDeinterleave8x32)));
                    var vmaskq = Avx.LoadVector256((int *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.PermuteMask3xTo3Chan)));
                    var vmasks = Avx2.BroadcastVector128ToVector256((byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.ShuffleMask3xTo3Chan)));

                    ipe -= Vector256 <byte> .Count;
                    do
                    {
                        var vf0 = Avx.Multiply(Avx.LoadVector256(ip), vscale);
                        var vf1 = Avx.Multiply(Avx.LoadVector256(ip + Vector256 <float> .Count), vscale);
                        var vf2 = Avx.Multiply(Avx.LoadVector256(ip + Vector256 <float> .Count * 2), vscale);
                        var vf3 = Avx.Multiply(Avx.LoadVector256(ip + Vector256 <float> .Count * 3), vscale);
                        ip += Vector256 <byte> .Count;

                        var vi0 = Avx.ConvertToVector256Int32(vf0);
                        var vi1 = Avx.ConvertToVector256Int32(vf1);
                        var vi2 = Avx.ConvertToVector256Int32(vf2);
                        var vi3 = Avx.ConvertToVector256Int32(vf3);

                        var vs0 = Avx2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Avx2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Avx2.PackUnsignedSaturate(vs0, vs1);
                        vb0 = Avx2.PermuteVar8x32(vb0.AsInt32(), vmaskp).AsByte();

                        vb0 = Avx2.Shuffle(vb0, vmasks);
                        vb0 = Avx2.PermuteVar8x32(vb0.AsInt32(), vmaskq).AsByte();

                        if (ip >= ipe)
                        {
                            goto LastBlock;
                        }

                        Avx.Store(op, vb0);
                        op += Vector256 <byte> .Count * 3 / 4;
                        continue;

LastBlock:
                        Sse2.Store(op, vb0.GetLower());
                        Sse2.StoreScalar((long *)(op + Vector128 <byte> .Count), vb0.GetUpper().AsInt64());
                        op += Vector256 <byte> .Count * 3 / 4;
                        break;
                    } while (true);
                    ipe += Vector256 <byte> .Count;
                }
                else if (Ssse3.IsSupported && ipe >= ip + Vector128 <byte> .Count)
                {
                    var vscale = Vector128.Create((float)byte.MaxValue);
                    var vmasks = Sse2.LoadVector128((byte *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.ShuffleMask3xTo3Chan)));

                    ipe -= Vector128 <byte> .Count;
                    do
                    {
                        var vf0 = Sse.Multiply(Sse.LoadVector128(ip), vscale);
                        var vf1 = Sse.Multiply(Sse.LoadVector128(ip + Vector128 <float> .Count), vscale);
                        var vf2 = Sse.Multiply(Sse.LoadVector128(ip + Vector128 <float> .Count * 2), vscale);
                        var vf3 = Sse.Multiply(Sse.LoadVector128(ip + Vector128 <float> .Count * 3), vscale);
                        ip += Vector128 <byte> .Count;

                        var vi0 = Sse2.ConvertToVector128Int32(vf0);
                        var vi1 = Sse2.ConvertToVector128Int32(vf1);
                        var vi2 = Sse2.ConvertToVector128Int32(vf2);
                        var vi3 = Sse2.ConvertToVector128Int32(vf3);

                        var vs0 = Sse2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Sse2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Sse2.PackUnsignedSaturate(vs0, vs1);

                        vb0 = Ssse3.Shuffle(vb0, vmasks);

                        if (ip >= ipe)
                        {
                            goto LastBlock;
                        }

                        Sse2.Store(op, vb0);
                        op += Vector128 <byte> .Count * 3 / 4;
                        continue;

LastBlock:
                        var vl0 = vb0.AsInt64();
                        Sse2.StoreScalar((long *)op, vl0);
                        Sse.StoreScalar((float *)(op + sizeof(long)), Sse2.UnpackHigh(vl0, vl0).AsSingle());                        // https://github.com/dotnet/corefx/issues/41816
                        op += Vector128 <byte> .Count * 3 / 4;
                        break;
                    } while (true);
                    ipe += Vector128 <byte> .Count;
                }
                else
#endif
                {
                    var vmin   = new VectorF(byte.MinValue);
                    var vmax   = new VectorF(byte.MaxValue);
                    var vround = new VectorF(0.5f);

                    ipe -= VectorF.Count;
                    while (ip <= ipe)
                    {
                        var v = Unsafe.ReadUnaligned <VectorF>(ip) * vmax + vround;
                        v   = v.Clamp(vmin, vmax);
                        ip += VectorF.Count;

#if VECTOR_CONVERT
                        var vi = Vector.ConvertToInt32(v);
#else
                        var vi = v;
#endif

                        op[0] = (byte)vi[0];
                        op[1] = (byte)vi[1];
                        op[2] = (byte)vi[2];

                        if (VectorF.Count == 8)
                        {
                            op[3] = (byte)vi[4];
                            op[4] = (byte)vi[5];
                            op[5] = (byte)vi[6];
                        }
                        op += VectorF.Count - VectorF.Count / 4;
                    }
                    ipe += VectorF.Count;
                }

                while (ip < ipe)
                {
                    op[0] = FixToByte(ip[0]);
                    op[1] = FixToByte(ip[1]);
                    op[2] = FixToByte(ip[2]);

                    ip += 4;
                    op += 3;
                }
            }
Exemplo n.º 11
0
            unsafe void IConversionProcessor.ConvertLine(byte *ipstart, byte *opstart, int cb)
            {
                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                byte * op = opstart;

#if HWINTRINSICS
                if (Avx2.IsSupported)
                {
                    var vzero  = Vector256 <float> .Zero;
                    var vmin   = Vector256.Create(0.5f / byte.MaxValue);
                    var vscale = Vector256.Create((float)byte.MaxValue);

                    var vmaskp = Avx.LoadVector256((int *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.PermuteMaskDeinterleave8x32)));

                    ipe -= Vector256 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Avx.LoadVector256(ip);
                        var vf1 = Avx.LoadVector256(ip + Vector256 <float> .Count);
                        var vf2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2);
                        var vf3 = Avx.LoadVector256(ip + Vector256 <float> .Count * 3);
                        ip += Vector256 <byte> .Count;

                        var vfa0 = Avx.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Avx.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Avx.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Avx.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Avx.Max(vfa0, vmin);
                        vfa1 = Avx.Max(vfa1, vmin);
                        vfa2 = Avx.Max(vfa2, vmin);
                        vfa3 = Avx.Max(vfa3, vmin);

                        vf0 = Avx.Multiply(vf0, Avx.Reciprocal(vfa0));
                        vf1 = Avx.Multiply(vf1, Avx.Reciprocal(vfa1));
                        vf2 = Avx.Multiply(vf2, Avx.Reciprocal(vfa2));
                        vf3 = Avx.Multiply(vf3, Avx.Reciprocal(vfa3));

                        vf0 = Avx.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Avx.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Avx.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Avx.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Avx.BlendVariable(vf0, vzero, HWIntrinsics.AvxCompareEqual(vfa0, vmin));
                        vf1 = Avx.BlendVariable(vf1, vzero, HWIntrinsics.AvxCompareEqual(vfa1, vmin));
                        vf2 = Avx.BlendVariable(vf2, vzero, HWIntrinsics.AvxCompareEqual(vfa2, vmin));
                        vf3 = Avx.BlendVariable(vf3, vzero, HWIntrinsics.AvxCompareEqual(vfa3, vmin));

                        vf0 = Avx.Multiply(vf0, vscale);
                        vf1 = Avx.Multiply(vf1, vscale);
                        vf2 = Avx.Multiply(vf2, vscale);
                        vf3 = Avx.Multiply(vf3, vscale);

                        var vi0 = Avx.ConvertToVector256Int32(vf0);
                        var vi1 = Avx.ConvertToVector256Int32(vf1);
                        var vi2 = Avx.ConvertToVector256Int32(vf2);
                        var vi3 = Avx.ConvertToVector256Int32(vf3);

                        var vs0 = Avx2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Avx2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Avx2.PackUnsignedSaturate(vs0, vs1);
                        vb0 = Avx2.PermuteVar8x32(vb0.AsInt32(), vmaskp).AsByte();

                        Avx.Store(op, vb0);
                        op += Vector256 <byte> .Count;
                    }
                    ipe += Vector256 <byte> .Count;
                }
                else if (Sse41.IsSupported)
                {
                    var vzero  = Vector128 <float> .Zero;
                    var vmin   = Vector128.Create(0.5f / byte.MaxValue);
                    var vscale = Vector128.Create((float)byte.MaxValue);

                    ipe -= Vector128 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Sse.LoadVector128(ip);
                        var vf1 = Sse.LoadVector128(ip + Vector128 <float> .Count);
                        var vf2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2);
                        var vf3 = Sse.LoadVector128(ip + Vector128 <float> .Count * 3);
                        ip += Vector128 <byte> .Count;

                        var vfa0 = Sse.Shuffle(vf0, vf0, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa1 = Sse.Shuffle(vf1, vf1, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa2 = Sse.Shuffle(vf2, vf2, HWIntrinsics.ShuffleMaskAlpha);
                        var vfa3 = Sse.Shuffle(vf3, vf3, HWIntrinsics.ShuffleMaskAlpha);

                        vfa0 = Sse.Max(vfa0, vmin);
                        vfa1 = Sse.Max(vfa1, vmin);
                        vfa2 = Sse.Max(vfa2, vmin);
                        vfa3 = Sse.Max(vfa3, vmin);

                        vf0 = Sse.Multiply(vf0, Sse.Reciprocal(vfa0));
                        vf1 = Sse.Multiply(vf1, Sse.Reciprocal(vfa1));
                        vf2 = Sse.Multiply(vf2, Sse.Reciprocal(vfa2));
                        vf3 = Sse.Multiply(vf3, Sse.Reciprocal(vfa3));

                        vf0 = Sse41.Blend(vf0, vfa0, HWIntrinsics.BlendMaskAlpha);
                        vf1 = Sse41.Blend(vf1, vfa1, HWIntrinsics.BlendMaskAlpha);
                        vf2 = Sse41.Blend(vf2, vfa2, HWIntrinsics.BlendMaskAlpha);
                        vf3 = Sse41.Blend(vf3, vfa3, HWIntrinsics.BlendMaskAlpha);

                        vf0 = Sse41.BlendVariable(vf0, vzero, Sse.CompareEqual(vfa0, vmin));
                        vf1 = Sse41.BlendVariable(vf1, vzero, Sse.CompareEqual(vfa1, vmin));
                        vf2 = Sse41.BlendVariable(vf2, vzero, Sse.CompareEqual(vfa2, vmin));
                        vf3 = Sse41.BlendVariable(vf3, vzero, Sse.CompareEqual(vfa3, vmin));

                        vf0 = Sse.Multiply(vf0, vscale);
                        vf1 = Sse.Multiply(vf1, vscale);
                        vf2 = Sse.Multiply(vf2, vscale);
                        vf3 = Sse.Multiply(vf3, vscale);

                        var vi0 = Sse2.ConvertToVector128Int32(vf0);
                        var vi1 = Sse2.ConvertToVector128Int32(vf1);
                        var vi2 = Sse2.ConvertToVector128Int32(vf2);
                        var vi3 = Sse2.ConvertToVector128Int32(vf3);

                        var vs0 = Sse2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Sse2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Sse2.PackUnsignedSaturate(vs0, vs1);

                        Sse2.Store(op, vb0);
                        op += Vector128 <byte> .Count;
                    }
                    ipe += Vector128 <byte> .Count;
                }
#endif

                float fmax = new Vector4(byte.MaxValue).X, fround = new Vector4(0.5f).X, fmin = fround / fmax;

                while (ip < ipe)
                {
                    float f3 = ip[3];
                    if (f3 < fmin)
                    {
                        *(uint *)op = 0;
                    }
                    else
                    {
                        float f3i = fmax / f3;
                        byte  o0  = ClampToByte((int)(ip[0] * f3i + fround));
                        byte  o1  = ClampToByte((int)(ip[1] * f3i + fround));
                        byte  o2  = ClampToByte((int)(ip[2] * f3i + fround));
                        byte  o3  = ClampToByte((int)(f3 * fmax + fround));
                        op[0] = o0;
                        op[1] = o1;
                        op[2] = o2;
                        op[3] = o3;
                    }

                    ip += 4;
                    op += 4;
                }
            }
Exemplo n.º 12
0
            unsafe void IConversionProcessor.ConvertLine(byte *ipstart, byte *opstart, int cb)
            {
                float *ip = (float *)ipstart, ipe = (float *)(ipstart + cb);
                byte * op = opstart;

#if HWINTRINSICS
                if (Avx2.IsSupported)
                {
                    var vscale = Vector256.Create((float)byte.MaxValue);
                    var vmaskp = Avx.LoadVector256((int *)Unsafe.AsPointer(ref MemoryMarshal.GetReference(HWIntrinsics.PermuteMaskDeinterleave8x32)));

                    ipe -= Vector256 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Avx.Multiply(vscale, Avx.LoadVector256(ip));
                        var vf1 = Avx.Multiply(vscale, Avx.LoadVector256(ip + Vector256 <float> .Count));
                        var vf2 = Avx.Multiply(vscale, Avx.LoadVector256(ip + Vector256 <float> .Count * 2));
                        var vf3 = Avx.Multiply(vscale, Avx.LoadVector256(ip + Vector256 <float> .Count * 3));
                        ip += Vector256 <byte> .Count;

                        var vi0 = Avx.ConvertToVector256Int32(vf0);
                        var vi1 = Avx.ConvertToVector256Int32(vf1);
                        var vi2 = Avx.ConvertToVector256Int32(vf2);
                        var vi3 = Avx.ConvertToVector256Int32(vf3);

                        var vs0 = Avx2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Avx2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Avx2.PackUnsignedSaturate(vs0, vs1);
                        vb0 = Avx2.PermuteVar8x32(vb0.AsInt32(), vmaskp).AsByte();

                        Avx.Store(op, vb0);
                        op += Vector256 <byte> .Count;
                    }
                    ipe += Vector256 <byte> .Count;
                }
                else if (Sse2.IsSupported)
                {
                    var vscale = Vector128.Create((float)byte.MaxValue);

                    ipe -= Vector128 <byte> .Count;
                    while (ip <= ipe)
                    {
                        var vf0 = Sse.Multiply(vscale, Sse.LoadVector128(ip));
                        var vf1 = Sse.Multiply(vscale, Sse.LoadVector128(ip + Vector128 <float> .Count));
                        var vf2 = Sse.Multiply(vscale, Sse.LoadVector128(ip + Vector128 <float> .Count * 2));
                        var vf3 = Sse.Multiply(vscale, Sse.LoadVector128(ip + Vector128 <float> .Count * 3));
                        ip += Vector128 <byte> .Count;

                        var vi0 = Sse2.ConvertToVector128Int32(vf0);
                        var vi1 = Sse2.ConvertToVector128Int32(vf1);
                        var vi2 = Sse2.ConvertToVector128Int32(vf2);
                        var vi3 = Sse2.ConvertToVector128Int32(vf3);

                        var vs0 = Sse2.PackSignedSaturate(vi0, vi1);
                        var vs1 = Sse2.PackSignedSaturate(vi2, vi3);

                        var vb0 = Sse2.PackUnsignedSaturate(vs0, vs1);

                        Sse2.Store(op, vb0);
                        op += Vector128 <byte> .Count;
                    }
                    ipe += Vector128 <byte> .Count;
                }
                else
#endif
                {
#if VECTOR_CONVERT
                    int unrollCount = Vector <byte> .Count;
                    var vmin        = new Vector <short>(byte.MinValue);
                    var vmax        = new Vector <short>(byte.MaxValue);
                    var vscale      = new VectorF(byte.MaxValue);
#else
                    int unrollCount = VectorF.Count;
                    var vmin        = new VectorF(byte.MinValue);
                    var vmax        = new VectorF(byte.MaxValue);
#endif
                    var vround = new VectorF(0.5f);

                    ipe -= unrollCount;
                    while (ip <= ipe)
                    {
#if VECTOR_CONVERT
                        var vf0 = Unsafe.ReadUnaligned <VectorF>(ip);
                        var vf1 = Unsafe.ReadUnaligned <VectorF>(ip + VectorF.Count);
                        var vf2 = Unsafe.ReadUnaligned <VectorF>(ip + VectorF.Count * 2);
                        var vf3 = Unsafe.ReadUnaligned <VectorF>(ip + VectorF.Count * 3);

                        vf0 = vf0 * vscale + vround;
                        vf1 = vf1 * vscale + vround;
                        vf2 = vf2 * vscale + vround;
                        vf3 = vf3 * vscale + vround;

                        var vi0 = Vector.ConvertToInt32(vf0);
                        var vi1 = Vector.ConvertToInt32(vf1);
                        var vi2 = Vector.ConvertToInt32(vf2);
                        var vi3 = Vector.ConvertToInt32(vf3);

                        var vs0 = Vector.Narrow(vi0, vi1);
                        var vs1 = Vector.Narrow(vi2, vi3);

                        vs0 = vs0.Clamp(vmin, vmax);
                        vs1 = vs1.Clamp(vmin, vmax);

                        var vb = Vector.Narrow(Vector.AsVectorUInt16(vs0), Vector.AsVectorUInt16(vs1));
                        Unsafe.WriteUnaligned(op, vb);
#else
                        var v = Unsafe.ReadUnaligned <VectorF>(ip) * vmax + vround;
                        v = v.Clamp(vmin, vmax);

                        op[0] = (byte)v[0];
                        op[1] = (byte)v[1];
                        op[2] = (byte)v[2];
                        op[3] = (byte)v[3];

                        if (VectorF.Count == 8)
                        {
                            op[4] = (byte)v[4];
                            op[5] = (byte)v[5];
                            op[6] = (byte)v[6];
                            op[7] = (byte)v[7];
                        }
#endif

                        ip += unrollCount;
                        op += unrollCount;
                    }
                    ipe += unrollCount;
                }

                while (ip < ipe)
                {
                    op[0] = FixToByte(ip[0]);
                    ip++;
                    op++;
                }
            }