public void RunBasicScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario)); UInt32[] values = new UInt32[ElementCount]; for (int i = 0; i < ElementCount; i++) { values[i] = TestLibrary.Generator.GetUInt32(); } Vector256 <UInt32> value = Vector256.Create(values[0], values[1], values[2], values[3], values[4], values[5], values[6], values[7]); Vector128 <UInt32> lowerResult = value.GetLower(); Vector128 <UInt32> upperResult = value.GetUpper(); ValidateGetResult(lowerResult, upperResult, values); Vector256 <UInt32> result = value.WithLower(upperResult); result = result.WithUpper(lowerResult); ValidateWithResult(result, values); }
public void RunBasicScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario)); SByte[] values = new SByte[ElementCount]; for (int i = 0; i < ElementCount; i++) { values[i] = TestLibrary.Generator.GetSByte(); } Vector256 <SByte> value = Vector256.Create(values[0], values[1], values[2], values[3], values[4], values[5], values[6], values[7], values[8], values[9], values[10], values[11], values[12], values[13], values[14], values[15], values[16], values[17], values[18], values[19], values[20], values[21], values[22], values[23], values[24], values[25], values[26], values[27], values[28], values[29], values[30], values[31]); Vector128 <SByte> lowerResult = value.GetLower(); Vector128 <SByte> upperResult = value.GetUpper(); ValidateGetResult(lowerResult, upperResult, values); Vector256 <SByte> result = value.WithLower(upperResult); result = result.WithUpper(lowerResult); ValidateWithResult(result, values); }
public static Vector256 <float> Divide(Vector256 <float> dividend, Vector256 <float> divisor) { if (Avx.IsSupported) { return(Avx.Divide(dividend, divisor)); } return(FromLowHigh(Divide(dividend.GetLower(), divisor.GetLower()), Divide(dividend.GetUpper(), divisor.GetLower()))); }
public static void GetLowHigh(Vector256 <double> vector, out Vector128 <double> low, out Vector128 <double> high) { low = vector.GetLower(); high = vector.GetUpper(); }
private static float SumVector256(Vector256 <float> v) { v = Avx.HorizontalAdd(v, v); //0+1, 2+3, .., .., 4+5, 6+7, .., .. v = Avx.HorizontalAdd(v, v); //0+1+2+3, .., .., .., 4+5+6+7, .., .., .. return(v.GetUpper().ToScalar() + v.GetLower().ToScalar()); }
unsafe void IConvolver.ConvolveSourceLine(byte *istart, byte *tstart, int cb, byte *mapxstart, int smapx, int smapy) { float *tp = (float *)tstart, tpe = (float *)(tstart + cb); float *pmapx = (float *)mapxstart; int kstride = smapx * channels; int tstride = smapy * 4; int vcnt = smapx / Vector128 <float> .Count; while (tp < tpe) { int ix = *(int *)pmapx++; int lcnt = vcnt; float *ip = (float *)istart + ix * channels; float *mp = pmapx; pmapx += kstride; Vector128 <float> av0, av1, av2; if (Avx.IsSupported && lcnt >= 2) { Vector256 <float> ax0 = Vector256 <float> .Zero, ax1 = ax0, ax2 = ax0; for (; lcnt >= 2; lcnt -= 2) { var iv0 = Avx.LoadVector256(ip); var iv1 = Avx.LoadVector256(ip + Vector256 <float> .Count); var iv2 = Avx.LoadVector256(ip + Vector256 <float> .Count * 2); ip += Vector256 <int> .Count * channels; if (Fma.IsSupported) { ax0 = Fma.MultiplyAdd(Avx.LoadVector256(mp), iv0, ax0); ax1 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count), iv1, ax1); ax2 = Fma.MultiplyAdd(Avx.LoadVector256(mp + Vector256 <float> .Count * 2), iv2, ax2); } else { ax0 = Avx.Add(ax0, Avx.Multiply(iv0, Avx.LoadVector256(mp))); ax1 = Avx.Add(ax1, Avx.Multiply(iv1, Avx.LoadVector256(mp + Vector256 <float> .Count))); ax2 = Avx.Add(ax2, Avx.Multiply(iv2, Avx.LoadVector256(mp + Vector256 <float> .Count * 2))); } mp += Vector256 <float> .Count * channels; } av0 = Sse.Add(ax0.GetLower(), ax1.GetUpper()); av1 = Sse.Add(ax0.GetUpper(), ax2.GetLower()); av2 = Sse.Add(ax1.GetLower(), ax2.GetUpper()); } else { av0 = av1 = av2 = Vector128 <float> .Zero; } for (; lcnt != 0; lcnt--) { var iv0 = Sse.LoadVector128(ip); var iv1 = Sse.LoadVector128(ip + Vector128 <float> .Count); var iv2 = Sse.LoadVector128(ip + Vector128 <float> .Count * 2); ip += Vector128 <float> .Count * channels; if (Fma.IsSupported) { av0 = Fma.MultiplyAdd(Sse.LoadVector128(mp), iv0, av0); av1 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count), iv1, av1); av2 = Fma.MultiplyAdd(Sse.LoadVector128(mp + Vector128 <float> .Count * 2), iv2, av2); } else { av0 = Sse.Add(av0, Sse.Multiply(iv0, Sse.LoadVector128(mp))); av1 = Sse.Add(av1, Sse.Multiply(iv1, Sse.LoadVector128(mp + Vector128 <float> .Count))); av2 = Sse.Add(av2, Sse.Multiply(iv2, Sse.LoadVector128(mp + Vector128 <float> .Count * 2))); } mp += Vector128 <float> .Count * channels; } var avs0 = Sse.Add(Sse.Add( Sse.Shuffle(av0, av0, 0b_00_10_01_11), Sse.Shuffle(av1, av1, 0b_00_01_11_10)), Sse.Shuffle(av2, av2, 0b_00_11_10_01) ); var avs1 = Sse3.IsSupported ? Sse3.MoveHighAndDuplicate(avs0) : Sse.Shuffle(avs0, avs0, 0b_11_11_01_01); var avs2 = Sse.UnpackHigh(avs0, avs0); tp[0] = Sse.AddScalar(av0, avs0).ToScalar(); tp[1] = Sse.AddScalar(av1, avs1).ToScalar(); tp[2] = Sse.AddScalar(av2, avs2).ToScalar(); tp += tstride; } }
public static void GetLowHigh <T>(Vector256 <T> vector, out Vector128 <T> low, out Vector128 <T> high) where T : struct { low = vector.GetLower(); high = vector.GetUpper(); }