static void avxcheckContinuations(Vector256 <byte> initial_lengths, Vector256 <byte> carries, ref Vector256 <byte> has_error) { // overlap || underlap // carry > length && length > 0 || !(carry > length) && !(length > 0) // (carries > length) == (lengths > 0) Vector256 <byte> overunder = Avx2.CompareEqual( Avx2.CompareGreaterThan(carries.AsSByte(), initial_lengths.AsSByte()).AsByte(), Avx2.CompareGreaterThan(initial_lengths.AsSByte(), Vector256 <sbyte> .Zero).AsByte()); has_error = Avx2.Or(has_error, overunder); }
// when 0xED is found, next byte must be no larger than 0x9F // when 0xF4 is found, next byte must be no larger than 0x8F // next byte must be continuation, ie sign bit is set, so signed < is ok static void avxcheckFirstContinuationMax(Vector256 <byte> current_bytes, Vector256 <byte> off1_current_bytes, ref Vector256 <byte> has_error) { Vector256 <byte> maskED = Avx2.CompareEqual(off1_current_bytes, Vector256.Create((byte)0xED)); Vector256 <byte> maskF4 = Avx2.CompareEqual(off1_current_bytes, Vector256.Create((byte)0xF4)); Vector256 <byte> badfollowED = Avx2.And( Avx2.CompareGreaterThan(current_bytes.AsSByte(), Vector256.Create((byte)0x9F).AsSByte()).AsByte(), maskED); Vector256 <byte> badfollowF4 = Avx2.And( Avx2.CompareGreaterThan(current_bytes.AsSByte(), Vector256.Create((byte)0x8F).AsSByte()).AsByte(), maskF4); has_error = Avx2.Or(has_error, Avx2.Or(badfollowED, badfollowF4)); }
public static Vector256 <sbyte> ReadVector256(this ref char src) { Vector256 <short> c0 = Unsafe.As <char, Vector256 <short> >(ref Unsafe.Add(ref src, 0)); Vector256 <short> c1 = Unsafe.As <char, Vector256 <short> >(ref Unsafe.Add(ref src, 16)); Vector256 <byte> t0 = Avx2.PackUnsignedSaturate(c0, c1); Vector256 <long> t1 = Avx2.Permute4x64(t0.AsInt64(), 0b_11_01_10_00); return(t1.AsSByte()); }
private static Vector256 <ulong> SpreadSIMD(Vector256 <byte> x) { // x = _mm256_shuffle_epi8(x, _mm256_set_epi8( // -1, 11, -1, 10, -1, 9, -1, 8, // -1, 3, -1, 2, -1, 1, -1, 0, // -1, 11, -1, 10, -1, 9, -1, 8, // -1, 3, -1, 2, -1, 1, -1, 0)); //the order of Vector256.Create is reversed of _mm256_set_epi8! x = Avx2.Shuffle(x.AsSByte(), Vector256.Create( 0, -1, 1, -1, 2, -1, 3, -1, 8, -1, 9, -1, 10, -1, 11, -1, 0, -1, 1, -1, 2, -1, 3, -1, 8, -1, 9, -1, 10, -1, 11, -1)).AsByte(); // const __m256i lut = _mm256_set_epi8( // 85, 84, 81, 80, 69, 68, 65, 64, // 21, 20, 17, 16, 5, 4, 1, 0, // 85, 84, 81, 80, 69, 68, 65, 64, // 21, 20, 17, 16, 5, 4, 1, 0); // Vector256<byte> lut = Vector256.Create( // (byte)0,1,4,5,16,17,20,21, // 64,65,68,69,80,81,84,85, // 0,1,4,5,16,17,20,21, // 64,65,68,69,80,81,84,85 // ); Vector256 <byte> lut = Vector256.Create( (byte)0b00000000, 0b00000001, 0b00000100, 0b00000101, 0b00010000, 0b00010001, 0b00010100, 0b00010101, 0b01000000, 0b01000001, 0b01000100, 0b01000101, 0b01010000, 0b01010001, 0b01010100, 0b01010101, 0b00000000, 0b00000001, 0b00000100, 0b00000101, 0b00010000, 0b00010001, 0b00010100, 0b00010101, 0b01000000, 0b01000001, 0b01000100, 0b01000101, 0b01010000, 0b01010001, 0b01010100, 0b01010101 ); // __m256i lo = _mm256_and_si256(x, _mm256_set1_epi8(0xf)); Vector256 <byte> lo = Avx2.And(x, Vector256.Create((byte)0x0f)); // lo = _mm256_shuffle_epi8(lut, lo); lo = Avx2.Shuffle(lut, lo); // __m256i hi = _mm256_and_si256(x, _mm256_set1_epi8(0xf0)); var hi = Avx2.And(x, Vector256.Create((byte)0xf0)); // hi = _mm256_shuffle_epi8(lut, _mm256_srli_epi64(hi, 4)); hi = Avx2.Shuffle(lut, Avx2.ShiftRightLogical(hi.AsUInt64(), 4).AsByte()); // x = _mm256_or_si256(lo, _mm256_slli_epi64(hi, 8)); x = Avx2.Or(lo, Avx2.ShiftRightLogical(hi.AsUInt64(), 8).AsByte()); return(x.AsUInt64()); }
public static Vector256 <T> Vector256Add <T>(Vector256 <T> left, Vector256 <T> right) where T : struct { if (typeof(T) == typeof(byte)) { return(Avx2.Add(left.AsByte(), right.AsByte()).As <byte, T>()); } else if (typeof(T) == typeof(sbyte)) { return(Avx2.Add(left.AsSByte(), right.AsSByte()).As <sbyte, T>()); } else if (typeof(T) == typeof(short)) { return(Avx2.Add(left.AsInt16(), right.AsInt16()).As <short, T>()); } else if (typeof(T) == typeof(ushort)) { return(Avx2.Add(left.AsUInt16(), right.AsUInt16()).As <ushort, T>()); } else if (typeof(T) == typeof(int)) { return(Avx2.Add(left.AsInt32(), right.AsInt32()).As <int, T>()); } else if (typeof(T) == typeof(uint)) { return(Avx2.Add(left.AsUInt32(), right.AsUInt32()).As <uint, T>()); } else if (typeof(T) == typeof(long)) { return(Avx2.Add(left.AsInt64(), right.AsInt64()).As <long, T>()); } else if (typeof(T) == typeof(ulong)) { return(Avx2.Add(left.AsUInt64(), right.AsUInt64()).As <ulong, T>()); } else if (typeof(T) == typeof(float)) { return(Avx.Add(left.AsSingle(), right.AsSingle()).As <float, T>()); } else if (typeof(T) == typeof(double)) { return(Avx.Add(left.AsDouble(), right.AsDouble()).As <double, T>()); } else { throw new NotSupportedException(); } }
// map off1_hibits => error condition // hibits off1 cur // C => < C2 && true // E => < E1 && < A0 // F => < F1 && < 90 // else false && false static void avxcheckOverlong(Vector256 <byte> current_bytes, Vector256 <byte> off1_current_bytes, Vector256 <byte> hibits, Vector256 <byte> previous_hibits, ref Vector256 <byte> has_error) { Vector256 <byte> off1_hibits = push_last_byte_of_a_to_b(previous_hibits, hibits); Vector256 <byte> initial_mins = Avx2.Shuffle( //Vector256.Create(-128, -128, -128, -128, -128, -128, -128, -128, -128, // -128, -128, -128, // 10xx => false // 0xC2, -128, // 110x // 0xE1, // 1110 // 0xF1, -128, -128, -128, -128, -128, -128, -128, -128, // -128, -128, -128, -128, // 10xx => false // 0xC2, -128, // 110x // 0xE1, // 1110 // 0xF1), Vector256.Create(9259542123273814144, 17429353605768446080, 9259542123273814144, 17429353605768446080).AsByte(), off1_hibits); Vector256 <byte> initial_under = Avx2.CompareGreaterThan(initial_mins.AsSByte(), off1_current_bytes.AsSByte()).AsByte(); Vector256 <byte> second_mins = Avx2.Shuffle( //Vector256.Create(-128, -128, -128, -128, -128, -128, -128, -128, -128, // -128, -128, -128, // 10xx => false // 127, 127, // 110x => true // 0xA0, // 1110 // 0x90, -128, -128, -128, -128, -128, -128, -128, -128, // -128, -128, -128, -128, // 10xx => false // 127, 127, // 110x => true // 0xA0, // 1110 // 0x90), Vector256.Create(9259542123273814144, 10421469723328807040, 9259542123273814144, 10421469723328807040).AsByte(), off1_hibits); Vector256 <byte> second_under = Avx2.CompareGreaterThan(second_mins.AsSByte(), current_bytes.AsSByte()).AsByte(); has_error = Avx2.Or(has_error, Avx2.And(initial_under, second_under)); }