static Vec256 <T> ClearAlternating() { var mask = Span256.Alloc <T>(1); var chop = PrimalInfo.Get <T>().MaxVal; //For the first 128-bit lane var half = mask.Length / 2; for (byte i = 0; i < half; i++) { if (i % 2 != 0) { mask[i] = chop; } else { mask[i] = convert <byte, T>(i); } } //For the second 128-bit lane for (byte i = 0; i < half; i++) { if (i % 2 != 0) { mask[i + half] = chop; } else { mask[i + half] = convert <byte, T>(i); } } return(Vec256.Load(mask)); }
void RunMultiply(int opcount = Pow2.T16) { var lhsSrc = Random.Stream <ulong>(); var rhsSrc = Random.Stream <ulong>(); var len = Vec256 <ulong> .Length; var sw = stopwatch(false); for (var i = 0; i < opcount; i++) { var v1 = Vec256.Load(lhsSrc.TakeSpan(len)); var v2 = Vec256.Load(rhsSrc.TakeSpan(len)); sw.Start(); dinx.mul(v1, v2); sw.Stop(); } Collect((opcount, snapshot(sw), "direct")); sw.Reset(); for (var i = 0; i < opcount; i++) { var v1 = Vec256.Load(lhsSrc.TakeSpan(len)); var v2 = Vec256.Load(rhsSrc.TakeSpan(len)); sw.Start(); _mm256_mul_epi32(v1, v2); sw.Stop(); } Collect((opcount, snapshot(sw), "dsl")); }
public Vec1024(Vec256 <T> v0, Vec256 <T> v1, Vec256 <T> v2, Vec256 <T> v3) { this.v00 = v0; this.v01 = v1; this.v10 = v2; this.v11 = v3; }
static PcgAvx32Rng CreatPcgAvx() { var seed = Vec256.FromParts(Seed64.Seed00, Seed64.Seed01, Seed64.Seed02, Seed64.Seed03); var inc = Vec256.FromParts(0xFFFFul, 0xFFFFul + 128, 0xFFFFul + 256, 0xFFFFul + 512); return(PcgAvx32Rng.Create(seed, inc)); }
public static Vec512 <T> FromParts <T>(Vec128 <T> v00, Vec128 <T> v01, Vec128 <T> v10, Vec128 <T> v11) where T : struct { Vec256 <T> lo = ginx.set(v00, v01); Vec256 <T> hi = ginx.set(v10, v11); return(new Vec512 <T>(in lo, in hi)); }
public static ulong dot(Vec256 <uint> lhs, Vec256 <uint> rhs) { var product = mul(lhs, rhs); var sum = add(extract128(product, 0), extract128(product, 1)); var span = sum.ToSpan128(); return(span[0] + span[1]); }
public static T[] ToArray <T>(this Vec256 <T> src) where T : struct { var dst = new T[Vec256 <T> .Length]; vstore(src, ref head(dst)); return(dst); }
public static Span256 <T> ToSpan256 <T>(this Vec256 <T> src) where T : struct { var dst = Span256.AllocBlocks <T>(1); vstore(src, ref dst[0]); return(dst); }
public static Span256 <float> sqrt(Span256 <float> src, Span256 <float> dst) { for (var block = 0; block < src.BlockCount; block++) { var x = Vec256.Load(ref src.Block(block)); vstore(dfp.sqrt(x), ref dst[block]); } return(dst); }
static Vec256 <ushort> MergeLanesU16() { ushort i = 0, j = 1; return(Vec256.FromParts( i, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, j, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2 )); }
void TraceRot(Vec256 <uint> src, Vec256 <uint> offsets) { var vL = Bits.rotl(src, offsets); var vX = Bits.rotr(vL, offsets); Trace("src", src.FormatHex(), 20); Trace("offsets", offsets.FormatHex(), 20); Trace("rotl(src)", vL.FormatHex(), 20); Trace("rotr(rotl(src))", vX.FormatHex(), 20); }
static Vec256 <byte> MergeLanesU8() { //<0, 0, 2, 0, 4, 0, 6, 0, 8, 0, 10, 0, 12, 0, 14, 0, 16, 0, 18, 0, 20, 0, 22, 0, 24, 0, 26, 0, 28, 0, 30, 0> //<lo = i,i+2,i+4 ... n-2 | hi = i+1, i + 3, i+5, ... n-1 > byte i = 0, j = 1; return(Vec256.FromBytes( i, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, i += 2, j, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2, j += 2 )); }
static Vec256 <T> CalcUnits() { var n = Length; var dst = Span256.Alloc <T>(n); var one = gmath.one <T>(); for (var i = 0; i < n; i++) { dst[i] = one; } return(Vec256.Load(dst)); }
/// <summary> /// Creates a vector populated with component values that alternate between the first operand and the second /// </summary> /// <param name="a">The first operand</param> /// <param name="b">The second operand</param> /// <typeparam name="T">The primal component type</typeparam> public static Vec256 <T> Alternate <T>(T a, T b) where T : unmanaged { var n = Vec256 <T> .Length; var dst = Span256.AllocBlock <T>(); for (var i = 0; i < n; i++) { dst[i] = even(i) ? a : b; } return(Vec256.Load(ref head(dst))); }
/// <summary> /// Creates a vector with incrementing components /// v[0] = first and v[i+1] = v[i] + 1 for i=1...N-1 /// </summary> /// <param name="first">The value of the first component</param> /// <typeparam name="T">The primal component type</typeparam> public static Vec256 <T> Increments(T first = default, params Swap[] swaps) { var n = Length; var dst = Span256.Alloc <T>(n); var val = first; for (var i = 0; i < n; i++) { dst[i] = val; gmath.inc(ref val); } return(Vec256.Load(dst.Swap(swaps))); }
public static unsafe void store <T>(Vec256 <T> src, ref T dst) where T : struct { if (typeof(T) == typeof(sbyte)) { vstore(int8(src), ref int8(ref dst)); } else if (typeof(T) == typeof(byte)) { vstore(uint8(src), ref uint8(ref dst)); } else if (typeof(T) == typeof(short)) { vstore(int16(src), ref int16(ref dst)); } else if (typeof(T) == typeof(ushort)) { vstore(uint16(src), ref uint16(ref dst)); } else if (typeof(T) == typeof(int)) { vstore(int32(src), ref int32(ref dst)); } else if (typeof(T) == typeof(uint)) { vstore(uint32(src), ref uint32(ref dst)); } else if (typeof(T) == typeof(long)) { vstore(int64(src), ref int64(ref dst)); } else if (typeof(T) == typeof(ulong)) { vstore(uint64(src), ref uint64(ref dst)); } else if (typeof(T) == typeof(float)) { vstore(float32(src), ref float32(ref dst)); } else if (typeof(T) == typeof(double)) { vstore(float64(src), ref float64(ref dst)); } else { throw unsupported <T>(); } }
void mul256f64_check(int cycles = DefaltCycleCount) { for (var cycle = 0; cycle < cycles; cycle++) { var domain = closed((long)Int32.MinValue, (long)Int32.MaxValue); var src = Random.Stream(domain).Select(x => (double)x); var u = Vec256.Load(src.TakeSpan(4)); var v = Vec256.Load(src.TakeSpan(4)); var x = dfp.fmul(u, v); var y = Vec256.Load(mathspan.mul(u.ToSpan(), v.ToSpan(), v.ToSpan().Replicate(true))); Claim.eq(x, y); var xi = x.ToSpan().Convert <long>(); var yi = y.ToSpan().Convert <long>(); Claim.eq(xi, yi); } }
public static void VerifyBinOp <T>(IPolyrand random, int blocks, Vector256BinOp <T> inXOp, Func <T, T, T> primalOp) where T : unmanaged { var blocklen = Span256 <T> .BlockLength; var lhs = random.ReadOnlySpan256 <T>(blocks); Claim.eq(blocks * blocklen, lhs.Length); var rhs = random.ReadOnlySpan256 <T>(blocks); Claim.eq(blocks * blocklen, rhs.Length); var expect = Span256.AllocBlocks <T>(blocks); Claim.eq(blocks, expect.BlockCount); var actual = Span256.AllocBlocks <T>(blocks); Claim.eq(blocks, actual.BlockCount); var tmp = new T[blocklen]; for (var block = 0; block < blocks; block++) { var offset = block * blocklen; for (var i = 0; i < blocklen; i++) { tmp[i] = primalOp(lhs[offset + i], rhs[offset + i]); } var vExpect = Vec256.LoadVector <T>(ref tmp[0]); var vX = lhs.LoadVec256(block); var vY = rhs.LoadVec256(block); var vActual = inXOp(vX, vY); Claim.eq(vExpect, vActual); ginx.store(vExpect, ref expect.Block(block)); ginx.store(vActual, ref actual.Block(block)); } Claim.eq(expect, actual); }
public static Vec256 <int> swap(Vec256 <int> src, byte i, byte j) { Span <int> control = stackalloc int[Vec256 <int> .Length]; for (byte k = 0; k < control.Length; k++) { if (k == i) { control[k] = j; } else if (k == j) { control[k] = i; } else { control[k] = k; } } return(perm8x32(src, Vec256.Load(control))); }
public void mulnew() { var v1 = Random.CpuVec128 <int>(); var v2 = Random.CpuVec128 <int>(); // var v3 = dinx.insert(v1, Vec256<int>.Zero,0); // var v4 = dinx.insert(v2, Vec256<int>.Zero,0); var v3 = Vec256.FromParts(1, 0, 2, 0, 3, 0, 4, 0); var v4 = Vec256.FromParts(5, 0, 6, 0, 7, 0, 8, 0); var v5 = dinx.mul(v3, v4); Trace(() => v3); Trace(() => v4); Trace(() => v5); // var lhs = v1.ToSpan(); // var rhs = v2.ToSpan(); // var dst = new long[4]; // for(var i=0; i<dst.Length; i++) // dst[i] = ((long)lhs[i]) * ((long)rhs[i]); // var v4 = Vec256.Load(dst); // Claim.eq(v3,v4); }
public static Vec256 <uint> alignr(Vec256 <uint> left, Vec256 <uint> right, byte offset) => AlignRight(left, right, offset);
public static Vec256 <long> alignr(Vec256 <long> left, Vec256 <long> right, byte offset) => AlignRight(left, right, offset);
public static unsafe void maskstore(Vec256 <ulong> src, Vec256 <ulong> mask, ref ulong dst) => MaskStore(refptr(ref dst), src, mask);
public static unsafe void maskstore(Vec256 <uint> src, Vec256 <uint> mask, ref uint dst) => MaskStore(refptr(ref dst), src, mask);
public static Vector256 <ushort> negate(Vector256 <ushort> src) => add(BitUtil.flip(src), Vec256.Ones <ushort>());
public static Vector256 <byte> negate(Vector256 <byte> src) => add(BitUtil.flip(src), Vec256.Ones <byte>());
public static unsafe Vec256 <ulong> maskload(ref ulong src, Vec256 <ulong> mask) => MaskLoad(refptr(ref src), mask);
public static Vec256 <T> LoadVec256 <T>(this ReadOnlySpan256 <T> src, int block = 0) where T : unmanaged => Vec256.Load(src, block);
public static Vec256 <float> rcp(Vec256 <float> src) => Reciprocal(src);
public static unsafe Vec256 <uint> maskload(ref uint src, Vec256 <uint> mask) => MaskLoad(refptr(ref src), mask);