Beispiel #1
0
        public static sbyte3x4 transpose(sbyte4x3 v)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 unpacklo = Sse2.unpacklo_epi16(Sse2.unpacklo_epi8(v.c0, v.c1),
                                                    v.c2);

                return(new sbyte3x4(unpacklo,
                                    Ssse3.shuffle_epi8(unpacklo, new v128(4, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
                                    Ssse3.shuffle_epi8(unpacklo, new v128(8, 9, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)),
                                    Ssse3.shuffle_epi8(unpacklo, new v128(12, 13, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 unpacklo = Sse2.unpacklo_epi8(v.c0, v.c1);

                return(new sbyte3x4(Sse2.unpacklo_epi16(unpacklo, v.c2),
                                    Sse2.unpacklo_epi16(Sse2.bsrli_si128(unpacklo, 2 * sizeof(sbyte)), Sse2.bsrli_si128(v.c2, 1 * sizeof(sbyte))),
                                    Sse2.unpacklo_epi16(Sse2.bsrli_si128(unpacklo, 4 * sizeof(sbyte)), Sse2.bsrli_si128(v.c2, 2 * sizeof(sbyte))),
                                    Sse2.unpacklo_epi16(Sse2.bsrli_si128(unpacklo, 6 * sizeof(sbyte)), Sse2.bsrli_si128(v.c2, 3 * sizeof(sbyte)))));
            }
            else
            {
                return(new sbyte3x4(v.c0.x, v.c0.y, v.c0.z, v.c0.w,
                                    v.c1.x, v.c1.y, v.c1.z, v.c1.w,
                                    v.c2.x, v.c2.y, v.c2.z, v.c2.w));
            }
        }
Beispiel #2
0
        public static sbyte3x2 transpose(sbyte2x3 v)
        {
            if (Sse2.IsSse2Supported)
            {
                v128 unpacklo = Sse2.unpacklo_epi8(v.c0, v.c1);

                if (Ssse3.IsSsse3Supported)
                {
                    unpacklo = Sse2.unpacklo_epi16(unpacklo, v.c2);

                    return(new sbyte3x2(unpacklo,
                                        Ssse3.shuffle_epi8(unpacklo, new v128(4, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0))));
                }
                else
                {
                    return(new sbyte3x2(Sse2.unpacklo_epi16(unpacklo, v.c2),
                                        Sse2.unpacklo_epi16(Sse2.bsrli_si128(unpacklo, 2 * sizeof(sbyte)),
                                                            Sse2.bsrli_si128(v.c2, sizeof(sbyte)))));
                }
            }
            else
            {
                return(new sbyte3x2(v.c0.x, v.c0.y,
                                    v.c1.x, v.c1.y,
                                    v.c2.x, v.c2.y));
            }
        }
Beispiel #3
0
        public static byte4 tzcnt(byte4 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                     Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (byte4)(-(sbyte4)x);

                byte4 first  = Mask.BlendV(default(v128), new byte4(1), Sse2.cmpeq_epi8(compareMask, default(v128)));
                byte4 second = Mask.BlendV(default(v128), new byte4(4), Sse2.cmpeq_epi8(compareMask & (byte4)0x0F, default(v128)));
                byte4 third  = Mask.BlendV(default(v128), new byte4(2), Sse2.cmpeq_epi8(compareMask & (byte4)0x33, default(v128)));
                byte4 fourth = Mask.BlendV(default(v128), new byte4(1), Sse2.cmpeq_epi8(compareMask & (byte4)0x55, default(v128)));

                return((first + second) + (third + fourth));
            }
            else
            {
                return(new byte4(tzcnt(x.x), tzcnt(x.y), tzcnt(x.z), tzcnt(x.w)));
            }
        }
Beispiel #4
0
        public static ushort8 tzcnt(ushort8 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(16, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(16, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                v128 tzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                                 Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))));

                return(Sse2.min_epu8(tzcnt_bytes,
                                     Sse2.srli_epi16(Sse2.add_epi8(tzcnt_bytes, Sse2.set1_epi8(8)), 8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (ushort8)(-((short8)x));

                ushort8 first  = Mask.BlendV(default(v128), new ushort8(1), Sse2.cmpeq_epi16(compareMask, default(v128)));
                ushort8 second = Mask.BlendV(default(v128), new ushort8(8), Sse2.cmpeq_epi16(compareMask & (ushort8)0x00FF, default(v128)));
                ushort8 third  = Mask.BlendV(default(v128), new ushort8(4), Sse2.cmpeq_epi16(compareMask & (ushort8)0x0F0F, default(v128)));
                ushort8 fourth = Mask.BlendV(default(v128), new ushort8(2), Sse2.cmpeq_epi16(compareMask & (ushort8)0x3333, default(v128)));
                ushort8 fifth  = Mask.BlendV(default(v128), new ushort8(1), Sse2.cmpeq_epi16(compareMask & (ushort8)0x5555, default(v128)));

                return((first + second) + ((third + fourth) + fifth));
            }
            else
            {
                return(new ushort8(tzcnt(x.x0), tzcnt(x.x1), tzcnt(x.x2), tzcnt(x.x3), tzcnt(x.x4), tzcnt(x.x5), tzcnt(x.x6), tzcnt(x.x7)));
            }
        }
Beispiel #5
0
        public static byte16 tzcnt(byte16 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0);
                v128 SHUFFLE_MASK_HI = new v128(8, 4, 5, 4, 6, 4, 5, 4, 7, 4, 5, 4, 6, 4, 5, 4);

                return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                     Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))));
            }
            else if (Sse2.IsSse2Supported)
            {
                v128 compareMask = x & (byte16)(-(sbyte16)x);

                byte16 first  = Mask.BlendV(default(v128), new byte16(1), Sse2.cmpeq_epi8(compareMask, default(v128)));
                byte16 second = Mask.BlendV(default(v128), new byte16(4), Sse2.cmpeq_epi8(compareMask & (byte16)0x0F, default(v128)));
                byte16 third  = Mask.BlendV(default(v128), new byte16(2), Sse2.cmpeq_epi8(compareMask & (byte16)0x33, default(v128)));
                byte16 fourth = Mask.BlendV(default(v128), new byte16(1), Sse2.cmpeq_epi8(compareMask & (byte16)0x55, default(v128)));

                return((first + second) + (third + fourth));
            }
            else
            {
                return(new byte16(tzcnt(x.x0), tzcnt(x.x1), tzcnt(x.x2), tzcnt(x.x3), tzcnt(x.x4), tzcnt(x.x5), tzcnt(x.x6), tzcnt(x.x7), tzcnt(x.x8), tzcnt(x.x9), tzcnt(x.x10), tzcnt(x.x11), tzcnt(x.x12), tzcnt(x.x13), tzcnt(x.x14), tzcnt(x.x15)));
            }
        }
Beispiel #6
0
 public static bool all_eq(byte16 c)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(((byte16)Ssse3.shuffle_epi8(c, default(v128))).Equals(c));
     }
     else
     {
         return((((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & (c.x0 == c.x7 & c.x0 == c.x8))) & (((c.x0 == c.x9 & c.x0 == c.x10) & (c.x0 == c.x11 & c.x0 == c.x12)) & ((c.x0 == c.x13 & c.x0 == c.x14) & c.x0 == c.x15)));
     }
 }
Beispiel #7
0
 public static bool all_eq(short8 c)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(((short8)Ssse3.shuffle_epi8(c, new v128(0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1))).Equals(c));
     }
     else
     {
         return(((c.x0 == c.x1 & c.x0 == c.x2) & (c.x0 == c.x3 & c.x0 == c.x4)) & ((c.x0 == c.x5 & c.x0 == c.x6) & c.x0 == c.x7));
     }
 }
Beispiel #8
0
 internal static v128 Int4ToShort4(v128 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(Ssse3.shuffle_epi8(x, new byte8(0, 1, 4, 5, 8, 9, 12, 13)));
     }
     else
     {
         throw new CPUFeatureCheckException();
     }
 }
Beispiel #9
0
 internal static v128 Long2ToShort2(v128 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(Ssse3.shuffle_epi8(x, new byte4(0, 1, 8, 9)));
     }
     else
     {
         throw new CPUFeatureCheckException();
     }
 }
Beispiel #10
0
 internal static v128 Int4ToByte4(v128 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(Ssse3.shuffle_epi8(x, new byte4(0, 4, 8, 12)));
     }
     else
     {
         throw new CPUFeatureCheckException();
     }
 }
Beispiel #11
0
 internal static v128 ShortToByte(short8 x)
 {
     if (Ssse3.IsSsse3Supported)
     {
         return(Ssse3.shuffle_epi8(x, new byte8(0, 2, 4, 6, 8, 10, 12, 14)));
     }
     else
     {
         throw new CPUFeatureCheckException();
     }
 }
Beispiel #12
0
        public byte8 NextByte8(byte8 max)
        {
            if (Ssse3.IsSsse3Supported)
            {
                short8 temp = (short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState());

                return(Ssse3.shuffle_epi8(temp, new byte8(1, 3, 5, 7, 9, 11, 13, 15)));
            }
            else
            {
                return((byte8)(((short8)max * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState())) >> 8));
            }
        }
Beispiel #13
0
        public byte4 NextByte4(byte4 max)
        {
            if (Ssse3.IsSsse3Supported)
            {
                short4 temp = (short4)max * new short4(NextState(), NextState(), NextState(), NextState());

                return(Ssse3.shuffle_epi8(temp, new byte4(1, 3, 5, 7)));
            }
            else
            {
                return((byte4)(((short4)max * new short4(NextState(), NextState(), NextState(), NextState())) >> 8));
            }
        }
Beispiel #14
0
        public byte3 NextByte3(byte3 max)
        {
            if (Ssse3.IsSsse3Supported)
            {
                short3 temp = (short3)max * new short3(NextState(), NextState(), NextState());

                return(Ssse3.shuffle_epi8(temp, new byte4(1, 3, 5, 0)));
            }
            else
            {
                return((byte3)(((short3)max * new short3(NextState(), NextState(), NextState())) >> 8));
            }
        }
Beispiel #15
0
        public byte2 NextByte(byte2 max)
        {
            if (Ssse3.IsSsse3Supported)
            {
                short2 temp = (short2)max * new short2(NextState(), NextState());

                return(Ssse3.shuffle_epi8(temp, new byte4(1, 3, 0, 0)));
            }
            else
            {
                return((byte2)(((short2)max * new short2(NextState(), NextState())) >> 8));
            }
        }
Beispiel #16
0
        public byte2 NextByte2(byte2 min, byte2 max)
        {
            Assert.IsNotSmaller(max.x, min.x);
            Assert.IsNotSmaller(max.y, min.y);

            if (Ssse3.IsSsse3Supported)
            {
                short2 temp = (short2)(max - min) * new short2(NextState(), NextState());

                return(min + Ssse3.shuffle_epi8(temp, new byte4(1, 3, 0, 0)));
            }
            else
            {
                return(min + (byte2)(((short2)(max - min) * new short2(NextState(), NextState())) >> 8));
            }
        }
Beispiel #17
0
        public static byte16 countbits(byte16 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                byte16 lookup = new byte16(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4);
                byte16 mask   = new byte16(0x0F);

                byte16 countLo = Ssse3.shuffle_epi8(lookup, x & mask);
                byte16 countHi = Ssse3.shuffle_epi8(lookup, Sse2.srli_epi16(x, 4) & mask);

                return(countLo + countHi);
            }
            else
            {
                return(new byte16((byte)math.countbits((uint)x.x0), (byte)math.countbits((uint)x.x1), (byte)math.countbits((uint)x.x2), (byte)math.countbits((uint)x.x3), (byte)math.countbits((uint)x.x4), (byte)math.countbits((uint)x.x5), (byte)math.countbits((uint)x.x6), (byte)math.countbits((uint)x.x7), (byte)math.countbits((uint)x.x8), (byte)math.countbits((uint)x.x9), (byte)math.countbits((uint)x.x10), (byte)math.countbits((uint)x.x11), (byte)math.countbits((uint)x.x12), (byte)math.countbits((uint)x.x13), (byte)math.countbits((uint)x.x14), (byte)math.countbits((uint)x.x15)));
            }
        }
Beispiel #18
0
        public sbyte3 NextSByte3(sbyte3 min, sbyte3 max)
        {
            Assert.IsNotSmaller(max.x, min.x);
            Assert.IsNotSmaller(max.y, min.y);
            Assert.IsNotSmaller(max.z, min.z);

            if (Ssse3.IsSsse3Supported)
            {
                ushort3 temp = (ushort3)(max - min) * new ushort3(NextState(), NextState(), NextState());

                return(min + Ssse3.shuffle_epi8(temp, new byte4(1, 3, 5, 0)));
            }
            else
            {
                return(min + (sbyte3)(((ushort3)(max - min) * new ushort3(NextState(), NextState(), NextState())) >> 8));
            }
        }
Beispiel #19
0
        public static ushort8 lzcnt(ushort8 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(16, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4);
                v128 SHUFFLE_MASK_HI = new v128(16, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);

                v128 lzcnt_bytes = Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                                 Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4))));

                return(Sse2.min_epu8(Sse2.add_epi8(lzcnt_bytes, Sse2.set1_epi16(8)),
                                     Sse2.srli_epi16(lzcnt_bytes, 8)));
            }
            else if (Sse2.IsSse2Supported)
            {
                ushort8 y;
                ushort8 n = 16;
                ushort8 mask;

                y    = x >> 8;
                mask = Sse2.cmpeq_epi16(y, default(v128));
                n    = Mask.BlendV(n - 8, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 4;
                mask = Sse2.cmpeq_epi16(y, default(v128));
                n    = Mask.BlendV(n - 4, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 2;
                mask = Sse2.cmpeq_epi16(y, default(v128));
                n    = Mask.BlendV(n - 2, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 1;
                mask = Sse2.cmpeq_epi16(y, default(v128));

                return(Mask.BlendV(n - 2, n - x, mask));
            }
            else
            {
                return(new ushort8(lzcnt(x.x0), lzcnt(x.x1), lzcnt(x.x2), lzcnt(x.x3), lzcnt(x.x4), lzcnt(x.x5), lzcnt(x.x6), lzcnt(x.x7)));
            }
        }
Beispiel #20
0
        public byte4 NextByte4(byte4 min, byte4 max)
        {
            Assert.IsNotSmaller(max.x, min.x);
            Assert.IsNotSmaller(max.y, min.y);
            Assert.IsNotSmaller(max.z, min.z);
            Assert.IsNotSmaller(max.w, min.w);

            if (Ssse3.IsSsse3Supported)
            {
                short4 temp = (short4)(max - min) * new short4(NextState(), NextState(), NextState(), NextState());

                return(min + Ssse3.shuffle_epi8(temp, new byte4(1, 3, 5, 7)));
            }
            else
            {
                return(min + (byte4)(((short4)(max - min) * new short4(NextState(), NextState(), NextState(), NextState())) >> 8));
            }
        }
Beispiel #21
0
        public byte8 NextByte8(byte8 min, byte8 max)
        {
            Assert.IsNotSmaller(max.x0, min.x0);
            Assert.IsNotSmaller(max.x1, min.x1);
            Assert.IsNotSmaller(max.x2, min.x2);
            Assert.IsNotSmaller(max.x3, min.x3);
            Assert.IsNotSmaller(max.x4, min.x4);
            Assert.IsNotSmaller(max.x5, min.x5);
            Assert.IsNotSmaller(max.x6, min.x6);
            Assert.IsNotSmaller(max.x7, min.x7);

            if (Ssse3.IsSsse3Supported)
            {
                short8 temp = (short8)(max - min) * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState());

                return(min + Ssse3.shuffle_epi8(temp, new byte8(1, 3, 5, 7, 9, 11, 13, 15)));
            }
            else
            {
                return(min + (byte8)(((short8)(max - min) * new short8(NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState(), NextState())) >> 8));
            }
        }
Beispiel #22
0
        public static byte3 lzcnt(byte3 x)
        {
            if (Ssse3.IsSsse3Supported)
            {
                v128 NIBBLE_MASK     = new v128(0x0F0F_0F0F);
                v128 SHUFFLE_MASK_LO = new v128(8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4);
                v128 SHUFFLE_MASK_HI = new v128(8, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0);

                return(Sse2.min_epu8(Ssse3.shuffle_epi8(SHUFFLE_MASK_LO, Sse2.and_si128(NIBBLE_MASK, x)),
                                     Ssse3.shuffle_epi8(SHUFFLE_MASK_HI, Sse2.and_si128(NIBBLE_MASK, Sse2.srli_epi16(x, 4)))));
            }
            else if (Sse2.IsSse2Supported)
            {
                byte3 y;
                byte3 n = 8;
                byte3 mask;

                y    = x >> 4;
                mask = Sse2.cmpeq_epi8(y, default(v128));
                n    = Mask.BlendV(n - 4, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 2;
                mask = Sse2.cmpeq_epi8(y, default(v128));
                n    = Mask.BlendV(n - 2, n, mask);
                x    = Mask.BlendV(y, x, mask);

                y    = x >> 1;
                mask = Sse2.cmpeq_epi8(y, default(v128));

                return(Mask.BlendV(n - 2, n - x, mask));
            }
            else
            {
                return(new byte3(lzcnt(x.x), lzcnt(x.y), lzcnt(x.z)));
            }
        }