private static Vector256 <float> ComputeScores(Vector256 <float> vW, Vector256 <float> vN, Vector256 <float> vP, Vector256 <float> vVirtualLossMultiplier, float cpuctSqrtParentN, float uctDenominatorPower, Vector256 <float> vQWhenNoChildren, Vector256 <float> vNInFlight) { Vector256 <float> vNPlusNInFlight = Avx.Add(vN, vNInFlight); Vector256 <float> denominator = uctDenominatorPower switch { 1.0f => vNPlusNInFlight, 0.5f => Avx.Sqrt(vNPlusNInFlight), _ => ToPower(vNPlusNInFlight, uctDenominatorPower) }; Vector256 <float> vLossContrib = Avx.Multiply(vNInFlight, vVirtualLossMultiplier); // Compute U = ((p)(cpuct)(sqrt_parentN)) / (n + n_in_flight + 1) Vector256 <float> vCPUCTSqrtParentN = Vector256.Create(cpuctSqrtParentN); Vector256 <float> vUNumerator = Avx.Multiply(vP, vCPUCTSqrtParentN); Vector256 <float> vDenominator = Avx.Add(vOnes, denominator); Vector256 <float> vU = Avx.Divide(vUNumerator, vDenominator); Vector256 <float> vQWithChildren = Avx.Divide(Avx.Subtract(vLossContrib, vW), vNPlusNInFlight); Vector256 <float> vQWithoutChildren = Avx.Add(vQWhenNoChildren, vLossContrib); Vector256 <float> maskNoChildren = Avx.Compare(vNPlusNInFlight, vZeros, FloatComparisonMode.OrderedGreaterThanSignaling); Vector256 <float> vQ = Avx.BlendVariable(vQWithoutChildren, vQWithChildren, maskNoChildren); Vector256 <float> vScore = Avx.Add(vU, vQ); return(vScore); }
private static Vector3b EqualsImpl(Vector3d a, Vector3d b) { if (Avx.IsSupported) { return((Vector3b)Avx.Compare((Vector256 <double>)a, (Vector256 <double>)b, FloatComparisonMode.OrderedEqualSignaling)); } else {
public static Vector256 <double> CompareGreaterThan(Vector256 <double> left, Vector256 <double> right) { if (Avx.IsSupported) { return(Avx.Compare(left, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling)); } return(CompareGreaterThan_Software(left, right)); }
public static Vector256 <double> CompareLessThanOrEqual(Vector256 <double> left, Vector256 <double> right) { if (Avx.IsSupported) { return(Avx.Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualSignaling)); } return(CompareLessThanOrEqual_Software(left, right)); }
public static Vector256 <float> CompareEqual(Vector256 <float> left, Vector256 <float> right) { if (Avx.IsSupported) { return(Avx.Compare(left, right, FloatComparisonMode.UnorderedEqualNonSignaling)); } return(FromLowHigh(CompareEqual(left.GetLower(), right.GetLower()), CompareEqual(left.GetUpper(), right.GetUpper()))); }
/// <summary> /// Absolute error bounded by 1e-4. /// </summary> public static Vector256 <float> Log(Vector256 <float> x) { Vector256 <float> exp, addcst, val; exp = Avx2.ConvertToVector256Single(Avx2.ShiftRightArithmetic(x.As <float, int>(), 23)); // According to BenchmarkDotNet, isolating all the constants up-front // yield nearly 10% speed-up. const float bf0 = -89.970756366f; const float bf1 = float.NaN; // behavior of MathF.Log() on negative numbers const float bf2 = 3.529304993f; const float bf3 = -2.461222105f; const float bf4 = 1.130626167f; const float bf5 = -0.288739945f; const float bf6 = 3.110401639e-2f; const float bf7 = 0.6931471805f; const int bi0 = 0x7FFFFF; const int bi1 = 0x3F800000; //addcst = val > 0 ? -89.970756366f : -(float)INFINITY; addcst = Avx.BlendVariable(Vector256.Create(bf0), Vector256.Create(bf1), Avx.Compare(x, Vector256 <float> .Zero, FloatComparisonMode.OrderedLessThanNonSignaling)); val = Avx2.Or(Avx2.And( x.As <float, int>(), Vector256.Create(bi0)), Vector256.Create(bi1)).As <int, float>(); /* x * (3.529304993f + * x * (-2.461222105f + * x * (1.130626167f + * x * (-0.288739945f + * x * 3.110401639e-2f)))) + (addcst + 0.6931471805f*exp); */ return(Avx2.Add( Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf2), Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf3), Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf4), Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf5), Avx2.Multiply(val, Vector256.Create(bf6)))))))))), Avx.Add(addcst, Avx2.Multiply(Vector256.Create(bf7), exp)))); }
public (double near, double far) IntersectAVX(Ray ray) { Vector256 <double> origin = (Vector256 <double>)ray.Origin; Vector256 <double> direction = (Vector256 <double>)ray.Direction; Vector256 <double> zeroes = new Vector256 <double>(); Vector256 <double> min = (Vector256 <double>)Minimum; Vector256 <double> max = (Vector256 <double>)Maximum; // Replace slabs that won't be checked (0 direction axis) with infinity so that NaN doesn't propagate Vector256 <double> dirInfMask = Avx.And( Avx.Compare(direction, zeroes, FloatComparisonMode.OrderedEqualNonSignaling), Avx.And( Avx.Compare(origin, min, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling), Avx.Compare(origin, max, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling))); min = Avx.BlendVariable(min, SIMDHelpers.BroadcastScalar4(double.NegativeInfinity), dirInfMask); max = Avx.BlendVariable(max, SIMDHelpers.BroadcastScalar4(double.PositiveInfinity), dirInfMask); // Flip slabs in direction axes that are negative (using direction as mask takes the most significant bit, the sign.. probably includes -0) Vector256 <double> minMasked = Avx.BlendVariable(min, max, direction); Vector256 <double> maxMasked = Avx.BlendVariable(max, min, direction); direction = Avx.Divide(Vector256.Create(1D), direction); Vector256 <double> near4 = Avx.Multiply(Avx.Subtract(minMasked, origin), direction); Vector256 <double> far4 = Avx.Multiply(Avx.Subtract(maxMasked, origin), direction); Vector128 <double> near2 = Sse2.Max(near4.GetLower(), near4.GetUpper()); near2 = Sse2.MaxScalar(near2, SIMDHelpers.Swap(near2)); Vector128 <double> far2 = Sse2.Min(far4.GetLower(), far4.GetUpper()); far2 = Sse2.MinScalar(far2, SIMDHelpers.Swap(far2)); if (Sse2.CompareScalarOrderedGreaterThan(near2, far2) | Sse2.CompareScalarOrderedLessThan(far2, new Vector128 <double>())) { return(double.NaN, double.NaN); } return(near2.ToScalar(), far2.ToScalar()); }
public static bool SequenceEqual_Avx(float[] array1, float[] array2) { if (array1.Length != array2.Length) { return(false); } if (array1.Length == 0) { return(true);//SequenceEqual_Soft(array1, array2, 0); } int i = 0; fixed(float *ptr1 = &array1[0]) fixed(float *ptr2 = &array2[0]) { if (array1.Length < 8) { return(SequenceEqual_Soft(ptr1, ptr2, 0, array1.Length)); } for (; i <= array1.Length - 8; i += 8) //16 for AVX512 { var vec1 = Avx.LoadVector256(ptr1 + i); var vec2 = Avx.LoadVector256(ptr2 + i); var ce = Avx.Compare(vec1, vec2, FloatComparisonMode.NotEqualOrderedNonSignaling); if (!Avx.TestZ(ce, ce)) { return(false); } } // TODO: shift the last `ce` vector to ignore garbage values // so we won't have to call SequenceEqual_Soft (in case if array1.Length % 8 != 0) return(SequenceEqual_Soft(ptr1, ptr2, i, array1.Length)); } }
public static bool SequenceEqual_Avx(float[] array1, float[] array2) { if (array1.Length != array2.Length) { return(false); } if (array1.Length == 0) { return(true);//SequenceEqual_Soft(array1, array2, 0); } int i = 0; fixed(float *ptr1 = &array1[0]) fixed(float *ptr2 = &array2[0]) { if (array1.Length < 8) { return(SequenceEqual_Soft(ptr1, ptr2, 0, array1.Length)); } for (; i <= array1.Length - 8; i += 8) //16 for AVX512 { var vec1 = Avx.LoadVector256(ptr1 + i); var vec2 = Avx.LoadVector256(ptr2 + i); var ce = Avx.Compare(vec1, vec2, FloatComparisonMode.NotEqualOrderedNonSignaling); if (!Avx.TestZ(ce, ce)) { return(false); } } return(SequenceEqual_Soft(ptr1, ptr2, i, array1.Length)); } }
public static Vector256 <double> op_Inequality(Vector256 <double> left, Vector256 <double> right) => Avx.Compare(left, right, FloatComparisonMode.OrderedNotEqualNonSignaling);
private static bool NotEqual(Vector256 <double> vector1, Vector256 <double> vector2) { return(Avx.MoveMask(Avx.Compare(vector1, vector2, FloatComparisonMode.OrderedNotEqualNonSignaling)) != 0); }
private static unsafe bool TryFindZero(Storage <float> costs, [NotNull] bool[] rowsCovered, [NotNull] bool[] colsCovered, out Location zeroLocation) { if (rowsCovered == null) { throw new ArgumentNullException(nameof(rowsCovered)); } if (colsCovered == null) { throw new ArgumentNullException(nameof(colsCovered)); } if (Avx2.IsSupported && costs.RowCount >= Vector256 <float> .Count) { var rowCount = costs.RowCount; var columnCount = costs.ColumnCount; var storage = costs.ColumnMajorBackingStore; var maxVectorOffset = rowCount - rowCount % Vector256 <float> .Count; var zeroVector = Vector256 <float> .Zero; var coveredMasks = new int[maxVectorOffset / Vector256 <float> .Count]; for (var i = 0; i < maxVectorOffset; i += Vector256 <float> .Count) { coveredMasks[i / Vector256 <float> .Count] = (rowsCovered[i] ? 0 : 1) | (rowsCovered[i + 1] ? 0 : 2) | (rowsCovered[i + 2] ? 0 : 4) | (rowsCovered[i + 3] ? 0 : 8) | (rowsCovered[i + 4] ? 0 : 16) | (rowsCovered[i + 5] ? 0 : 32) | (rowsCovered[i + 6] ? 0 : 64) | (rowsCovered[i + 7] ? 0 : 128); } fixed(float *storagePtr = storage) { for (var column = 0; column < columnCount; column++) { if (!colsCovered[column]) { var basePtr = storagePtr + rowCount * column; for (int row = 0, rowBatchIndex = 0; row < maxVectorOffset; row += Vector256 <float> .Count, rowBatchIndex++) { var rowVector = Avx.LoadVector256(basePtr + row); var comparisonResult = Avx.Compare(rowVector, zeroVector, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling); var equality = (uint)Avx.MoveMask(comparisonResult); if (equality == 0) { continue; } equality &= (uint)coveredMasks[rowBatchIndex]; if (equality == 0) { continue; } var zeroRow = row + (int)Bmi1.TrailingZeroCount(equality); zeroLocation = new Location(zeroRow, column); return(true); } for (var i = maxVectorOffset; i < rowCount; i++) { if (!rowsCovered[i] && storage[column * rowCount + i] <= 0) { zeroLocation = new Location(i, column); return(true); } } } } } } else { for (var column = 0; column < costs.ColumnCount; column++) { if (colsCovered[column]) { continue; } for (var row = 0; row < costs.RowCount; row++) { if (!rowsCovered[row] && costs.ColumnMajorBackingStore[column * costs.RowCount + row] <= 0) { zeroLocation = new Location(row, column); return(true); } } } } zeroLocation = new Location(-1, -1); return(false); }
public static Vector256 <double> op_LessThanOrEqual(Vector256 <double> left, Vector256 <double> right) => Avx.Compare(left, right, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling);
public unsafe void Vector256Mandel() { int floatL3Size = TOTALBYTES / sizeof(float); resolutionX = (int)MathF.Floor(MathF.Sqrt(floatL3Size * ratioy_x)); if (resolutionX % 8 != 0) { resolutionX -= resolutionX % 8; } resolutionY = (int)MathF.Floor(resolutionX * ratioy_x); if (resolutionY % 8 != 0) { resolutionY -= resolutionY % 8; } STEP_X = (RIGHT_X - LEFT_X) / resolutionX; STEP_Y = STEP_X; // ratioy_x * STEP_X; Bug from reddit comment numberOfPoints = resolutionX * resolutionY; results2 = new float[numberOfPoints]; xPoints = new float[resolutionX]; yPoints = new float[resolutionY]; for (int i = 0; i < resolutionX; i++) { xPoints.Span[i] = LEFT_X + i * STEP_X; } for (int i = 0; i < resolutionY; i++) { yPoints.Span[i] = TOP_Y - i * STEP_Y; } int countX = 0, countY = 0; int maxInter = 256; int inter; ReadOnlySpan <float> ySpan = yPoints.Span;// MemoryMarshal.Cast<float, Vector256<float>>(yPoints.Span); ReadOnlySpan <Vector256 <float> > xSpan = MemoryMarshal.Cast <float, Vector256 <float> >(xPoints.Span); Span <Vector256 <float> > res = MemoryMarshal.Cast <float, Vector256 <float> >(results2.Span); Span <Vector256 <float> > testSpan = MemoryMarshal.Cast <float, Vector256 <float> >(testValue2.Span); int resVectorNumber = 0; Vector256 <float> xVec, yVec; var oneVec = Vector256.Create(1.0f); var fourVec = Vector256.Create(4.0f); while (countY < ySpan.Length) { var currYVec = Vector256.Create(ySpan[countY]); while (countX < xSpan.Length) { Vector256 <float> currXVec = xSpan[countX]; var xSquVec = Vector256.Create(0.0f); var ySquVec = Vector256.Create(0.0f); var zSquVec = Vector256.Create(0.0f); var interVec = Vector256.Create(0.0f); Vector256 <float> sumVector = oneVec; inter = 0; bool goOn = true; while (goOn) { xVec = Avx.Add(Avx.Subtract(xSquVec, ySquVec), currXVec); yVec = Avx.Add(Avx.Subtract(Avx.Subtract(zSquVec, ySquVec), xSquVec), currYVec); xSquVec = Avx.Multiply(xVec, xVec); ySquVec = Avx.Multiply(yVec, yVec); zSquVec = Avx.Multiply(Avx.Add(xVec, yVec), Avx.Add(xVec, yVec)); Vector256 <float> test = Avx.Compare(Avx.Add(xSquVec, ySquVec), fourVec, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling); // <= 4.0? sumVector = Avx.BlendVariable(Vector256 <float> .Zero, sumVector, test); // selects from second if true, from first otherwise goOn = (Avx.MoveMask(test) > 0) & (inter < maxInter); //any of the values still alive, and inter still below cutoff value? if (goOn) { interVec = Avx.Add(interVec, sumVector); } inter = goOn ? inter + 1 : inter; } testSpan[resVectorNumber] = Avx.Add(xSquVec, ySquVec); res[resVectorNumber] = interVec; resVectorNumber++; countX++; } countX = 0; countY++; } }
public int IndexOfFirstElementGreaterOrEqualToLimit_Avx() { var values = this.values; float limit = this.limitToFind; if (Avx.IsSupported) { unsafe { fixed(float *valuesPtr = values) { const int ElementsPerByte = sizeof(float) / sizeof(byte); var alignmentOffset = (long)(uint)(-(int)valuesPtr / ElementsPerByte) & (Vector256 <float> .Count - 1); // handle first values sequentially until we hit the 256bit alignment boundary for (long i = 0; i < alignmentOffset; i++) { if (*(valuesPtr + i) >= limit) { return((int)i); } } var remainingLength = values.Length - alignmentOffset; var vectorizableLength = values.Length - remainingLength % (long)Vector256 <float> .Count; // handle vectorizable items var limitVector = Vector256.Create(limit); for (var i = alignmentOffset; i < vectorizableLength; i += Vector256 <float> .Count) { var valuesVector = Avx.LoadAlignedVector256(valuesPtr + i); var comparisonResultVector = Avx.Compare(valuesVector, limitVector, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling); // create int bitmask from vector bitmask // the first bit (right-to-left) that is 1 indicates a comparision yielding true var comparisonResult = (uint)Avx.MoveMask(comparisonResultVector); if (comparisonResult == 0) { // no element of the vector matches the compare criteria continue; } // a match was found var matchedLocation = i + Bmi1.TrailingZeroCount(comparisonResult); return((int)matchedLocation); } // handle remaining items for (var i = (int)vectorizableLength; i < values.Length; i++) { if (values[i] >= limit) { return(i); } } return(-1); } } } else { for (int i = 0; i < values.Length; i++) { if (values[i] >= limit) { return(i); } } return(-1); } }
static unsafe int Main(string[] args) { int testResult = Pass; if (Avx.IsSupported) { using (TestTable <float> floatTable = new TestTable <float>(new float[8] { 1, -5, 100, 0, 1, -5, 100, 0 }, new float[8] { 22, -5, -50, 0, 22, -1, -50, 0 }, new float[8])) using (TestTable <double> doubleTable = new TestTable <double>(new double[4] { 1, -5, 100, 0 }, new double[4] { 1, 1, 50, 0 }, new double[4])) { var vf1 = Unsafe.Read <Vector256 <float> >(floatTable.inArray1Ptr); var vf2 = Unsafe.Read <Vector256 <float> >(floatTable.inArray2Ptr); var vf3 = Avx.Compare(vf1, vf2, FloatComparisonMode.OrderedEqualNonSignaling); Unsafe.Write(floatTable.outArrayPtr, vf3); var vd1 = Unsafe.Read <Vector256 <double> >(doubleTable.inArray1Ptr); var vd2 = Unsafe.Read <Vector256 <double> >(doubleTable.inArray2Ptr); var vd3 = Avx.Compare(vd1, vd2, FloatComparisonMode.OrderedEqualNonSignaling); Unsafe.Write(doubleTable.outArrayPtr, vd3); for (int i = 0; i < floatTable.outArray.Length; i++) { if (BitConverter.SingleToInt32Bits(floatTable.outArray[i]) != (floatTable.inArray1[i] == floatTable.inArray2[i] ? -1 : 0)) { Console.WriteLine("Avx Compare failed on float:"); foreach (var item in floatTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); return(Fail); } } for (int i = 0; i < doubleTable.outArray.Length; i++) { if (BitConverter.DoubleToInt64Bits(doubleTable.outArray[i]) != (doubleTable.inArray1[i] == doubleTable.inArray2[i] ? -1 : 0)) { Console.WriteLine("Avx Compare failed on double:"); foreach (var item in doubleTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); return(Fail); } } var svf1 = Unsafe.Read <Vector128 <float> >(floatTable.inArray1Ptr); var svf2 = Unsafe.Read <Vector128 <float> >(floatTable.inArray2Ptr); var svf3 = Avx.Compare(svf1, svf2, FloatComparisonMode.OrderedEqualNonSignaling); Unsafe.Write(floatTable.outArrayPtr, svf3); var svd1 = Unsafe.Read <Vector128 <double> >(doubleTable.inArray1Ptr); var svd2 = Unsafe.Read <Vector128 <double> >(doubleTable.inArray2Ptr); var svd3 = Avx.Compare(svd1, svd2, FloatComparisonMode.OrderedEqualNonSignaling); Unsafe.Write(doubleTable.outArrayPtr, svd3); for (int i = 0; i < floatTable.outArray.Length / 2; i++) { if (BitConverter.SingleToInt32Bits(floatTable.outArray[i]) != (floatTable.inArray1[i] == floatTable.inArray2[i] ? -1 : 0)) { Console.WriteLine("Avx Compare Vector128 failed on float:"); foreach (var item in floatTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); return(Fail); } } for (int i = 0; i < doubleTable.outArray.Length / 2; i++) { if (BitConverter.DoubleToInt64Bits(doubleTable.outArray[i]) != (doubleTable.inArray1[i] == doubleTable.inArray2[i] ? -1 : 0)) { Console.WriteLine("Avx Compare Vector128 failed on double:"); foreach (var item in doubleTable.outArray) { Console.Write(item + ", "); } Console.WriteLine(); return(Fail); } } try { var ve = Avx.Compare(vf1, vf2, (FloatComparisonMode)32); Unsafe.Write(floatTable.outArrayPtr, ve); Console.WriteLine("Avx Compare failed on float with out-of-range argument:"); return(Fail); } catch (ArgumentOutOfRangeException e) { testResult = Pass; } try { var ve = Avx.Compare(vd1, vd2, (FloatComparisonMode)32); Unsafe.Write(floatTable.outArrayPtr, ve); Console.WriteLine("Avx Compare failed on double with out-of-range argument:"); return(Fail); } catch (ArgumentOutOfRangeException e) { testResult = Pass; } try { var ve = typeof(Avx).GetMethod(nameof(Avx.Compare), new Type[] { typeof(Vector256 <Single>), typeof(Vector256 <Single>), typeof(FloatComparisonMode) }) .Invoke(null, new object[] { vf1, vf2, (FloatComparisonMode)32 }); Console.WriteLine("Indirect-calling Avx Compare failed on float with out-of-range argument:"); return(Fail); } catch (System.Reflection.TargetInvocationException e) { if (e.InnerException is ArgumentOutOfRangeException) { testResult = Pass; } else { Console.WriteLine("Indirect-calling Avx Compare failed on float with out-of-range argument:"); return(Fail); } } try { var ve = typeof(Avx).GetMethod(nameof(Avx.Compare), new Type[] { typeof(Vector256 <Double>), typeof(Vector256 <Double>), typeof(FloatComparisonMode) }) .Invoke(null, new object[] { vd1, vd2, (FloatComparisonMode)32 }); Console.WriteLine("Indirect-calling Avx Compare failed on double with out-of-range argument:"); return(Fail); } catch (System.Reflection.TargetInvocationException e) { if (e.InnerException is ArgumentOutOfRangeException) { testResult = Pass; } else { Console.WriteLine("Indirect-calling Avx Compare failed on double with out-of-range argument:"); return(Fail); } } } } return(testResult); }
public static Vector256 <float> op_GreaterThanOrEqual(Vector256 <float> left, Vector256 <float> right) => Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanOrEqualNonSignaling);
public static Vector256 <float> op_LessThan(Vector256 <float> left, Vector256 <float> right) => Avx.Compare(left, right, FloatComparisonMode.OrderedLessThanNonSignaling);
public static Vector256 <float> op_Equality(Vector256 <float> left, Vector256 <float> right) => Avx.Compare(left, right, FloatComparisonMode.OrderedEqualNonSignaling);
public unsafe void Vector256Mandel() { int countX = 0, countY = 0; int maxInter = 256; int inter; ReadOnlySpan <float> ySpan = yPoints.Span; ReadOnlySpan <Vector256 <float> > xSpan = MemoryMarshal.Cast <float, Vector256 <float> >(xPoints.Span); Span <Vector256 <float> > res = MemoryMarshal.Cast <float, Vector256 <float> >(results.Span); int resVectorNumber = 0; Vector256 <float> xVec, yVec; Vector256 <float> zeroVec = Vector256 <float> .Zero; var oneVec = Vector256.Create(1.0f); var fourVec = Vector256.Create(4.0f); var one4Vec = Vector256.Create(0.25f); var one16Vec = Vector256.Create(1.0f / 16.0f); Vector256 <float> qVec; Vector256 <float> test; while (countY < ySpan.Length) { var currYVec = Vector256.Create(ySpan[countY]); while (countX < xSpan.Length) { Vector256 <float> currXVec = xSpan[countX]; Vector256 <float> xSquVec = zeroVec; Vector256 <float> ySquVec = zeroVec; Vector256 <float> zSquVec = zeroVec; Vector256 <float> interVec = zeroVec; Vector256 <float> sumVector; inter = 0; bool goOn; Vector256 <float> temp = Avx.Subtract(currXVec, one4Vec); Vector256 <float> temp1 = Avx.Multiply(currYVec, currYVec); qVec = Avx.Add(Avx.Multiply(temp, temp), temp1); Vector256 <float> temp2 = Avx.Multiply(qVec, Avx.Add(qVec, temp)); test = Avx.Compare(temp2, Avx.Multiply(one4Vec, temp1), FloatComparisonMode.OrderedGreaterThanNonSignaling); goOn = (Avx.MoveMask(test) > 0); if (goOn) { temp2 = Avx.Add(currXVec, oneVec); temp = Avx.Add(Avx.Multiply(temp2, temp2), temp1); test = Avx.Compare(temp, one16Vec, FloatComparisonMode.OrderedGreaterThanNonSignaling); goOn = Avx.MoveMask(test) > 0; if (!goOn) { interVec = Vector256.Create(255.0f); // make all point = maximum value } } while (goOn) { xVec = Avx.Add(Avx.Subtract(xSquVec, ySquVec), currXVec); yVec = Avx.Add(Avx.Subtract(Avx.Subtract(zSquVec, ySquVec), xSquVec), currYVec); xSquVec = Avx.Multiply(xVec, xVec); ySquVec = Avx.Multiply(yVec, yVec); temp = Avx.Add(xVec, yVec); zSquVec = Avx.Multiply(temp, temp); test = Avx.Compare(Avx.Add(xSquVec, ySquVec), fourVec, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling); // <= 4.0? sumVector = Avx.BlendVariable(zeroVec, oneVec, test); goOn = (Avx.MoveMask(test) > 0) & (inter < maxInter); //any of the values still alive, and inter still below cutoff value? if (goOn) { interVec = Avx.Add(interVec, sumVector); } inter = goOn ? inter + 1 : inter; } res[resVectorNumber] = interVec; resVectorNumber++; countX++; } countX = 0; countY++; } }
public static Vector256 <double> op_GreaterThan(Vector256 <double> left, Vector256 <double> right) => Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling);
public static unsafe void ComputeSingle( uint[,] iterations, int startScanline, int increment, double offsetX, double offsetY, double zoom, uint maxIterations, ref bool cancel) { const int stride = 8; int height = iterations.GetLength(0); int width = iterations.GetLength(1); var maxIter = Vector256.Create((float)maxIterations); var limit = Vector256.Create(4.0f); var one = Vector256.Create(1.0f); var two = Vector256.Create(2.0f); float *results = stackalloc float[stride]; for (int i = startScanline; i < height && !cancel; i += increment) { for (int j = 0; j < width && !cancel; j += stride) { var c0 = Impl.GetPointCoordinate(j + 0, i, width, height, offsetX, offsetY, zoom); var c1 = Impl.GetPointCoordinate(j + 1, i, width, height, offsetX, offsetY, zoom); var c2 = Impl.GetPointCoordinate(j + 2, i, width, height, offsetX, offsetY, zoom); var c3 = Impl.GetPointCoordinate(j + 3, i, width, height, offsetX, offsetY, zoom); var c4 = Impl.GetPointCoordinate(j + 4, i, width, height, offsetX, offsetY, zoom); var c5 = Impl.GetPointCoordinate(j + 5, i, width, height, offsetX, offsetY, zoom); var c6 = Impl.GetPointCoordinate(j + 6, i, width, height, offsetX, offsetY, zoom); var c7 = Impl.GetPointCoordinate(j + 7, i, width, height, offsetX, offsetY, zoom); var cr = Vector256.Create((float)c0.X, (float)c1.X, (float)c2.X, (float)c3.X, (float)c4.X, (float)c5.X, (float)c6.X, (float)c7.X); var ci = Vector256.Create((float)c0.Y, (float)c1.Y, (float)c2.Y, (float)c3.Y, (float)c4.Y, (float)c5.Y, (float)c6.Y, (float)c7.Y); var zr = cr; var zi = ci; var it = Vector256.Create(0f); for (;;) { var zr2 = Avx.Multiply(zr, zr); var zi2 = Avx.Multiply(zi, zi); var squaredMagnitude = Avx.Add(zr2, zi2); var cond = Avx.And( Avx.Compare(squaredMagnitude, limit, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling), Avx.Compare(it, maxIter, FloatComparisonMode.OrderedLessThanOrEqualNonSignaling)); if (Avx.MoveMask(cond) == 0) { Avx.Store(results, it); if (j + 0 < width) { iterations[i, j + 0] = (uint)results[0] % maxIterations; } if (j + 1 < width) { iterations[i, j + 1] = (uint)results[1] % maxIterations; } if (j + 2 < width) { iterations[i, j + 2] = (uint)results[2] % maxIterations; } if (j + 3 < width) { iterations[i, j + 3] = (uint)results[3] % maxIterations; } if (j + 4 < width) { iterations[i, j + 4] = (uint)results[4] % maxIterations; } if (j + 5 < width) { iterations[i, j + 5] = (uint)results[5] % maxIterations; } if (j + 6 < width) { iterations[i, j + 6] = (uint)results[6] % maxIterations; } if (j + 7 < width) { iterations[i, j + 7] = (uint)results[7] % maxIterations; } break; } zi = Fma.MultiplyAdd(two, Avx.Multiply(zr, zi), ci); zr = Avx.Add(Avx.Subtract(zr2, zi2), cr); it = Avx.Add(it, Avx.And(one, cond)); } } } }
public Intro() { var middleVector = Vector128.Create(1.0f); // middleVector = <1,1,1,1> middleVector = Vector128.CreateScalar(-1.0f); // middleVector = <-1,0,0,0> var floatBytes = Vector64.AsByte(Vector64.Create(1.0f, -1.0f)); // floatBytes = <0, 0, 128, 63, 0, 0, 128, 191> if (Avx.IsSupported) { var left = Vector256.Create(-2.5f); // <-2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5, -2.5> var right = Vector256.Create(5.0f); // <5, 5, 5, 5, 5, 5, 5, 5> Vector256 <float> result = Avx.AddSubtract(left, right); // result = <-7.5, 2.5, -7.5, 2.5, -7.5, 2.5, -7.5, 2.5>xit left = Vector256.Create(-1.0f, -2.0f, -3.0f, -4.0f, -50.0f, -60.0f, -70.0f, -80.0f); right = Vector256.Create(0.0f, 2.0f, 3.0f, 4.0f, 50.0f, 60.0f, 70.0f, 80.0f); result = Avx.UnpackHigh(left, right); // result = <-3, 3, -4, 4, -70, 70, -80, 80> result = Avx.UnpackLow(left, right); // result = <-1, 1, -2, 2, -50, 50, -60, 60> result = Avx.DotProduct(left, right, 0b1111_0001); // result = <-30, 0, 0, 0, -17400, 0, 0, 0> bool testResult = Avx.TestC(left, right); // testResult = true testResult = Avx.TestC(right, left); // testResult = false Vector256 <float> result1 = Avx.Divide(left, right); var plusOne = Vector256.Create(1.0f); result = Avx.Compare(right, result1, FloatComparisonMode.OrderedGreaterThanNonSignaling); result = Avx.Compare(right, result1, FloatComparisonMode.UnorderedNotLessThanNonSignaling); left = Vector256.Create(0.0f, 3.0f, -3.0f, 4.0f, -50.0f, 60.0f, -70.0f, 80.0f); right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); Vector256 <float> nanInFirstPosition = Avx.Divide(left, right); left = Vector256.Create(1.1f, 3.3333333f, -3.0f, 4.22f, -50.0f, 60.0f, -70.0f, 80.0f); Vector256 <float> InfInFirstPosition = Avx.Divide(left, right); left = Vector256.Create(-1.1f, 3.0f, 1.0f / 3.0f, MathF.PI, -50.0f, 60.0f, -70.0f, 80.0f); right = Vector256.Create(0.0f, 2.0f, 3.1f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); Vector256 <float> compareResult = Avx.Compare(left, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN> Vector256 <float> mixed = Avx.BlendVariable(left, right, compareResult); // mixed = <-1, 2, -3, 2, -50, -60, -70, -80> //left = Vector256.Create(-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f); //right = Vector256.Create(1.0f, 1.0f, -1.0f, 1.0f, 1.0f, 1.0f, -1.0f, 1.0f); Vector256 <float> other = right = Vector256.Create(0.0f, 2.0f, 3.0f, 2.0f, 50.0f, -60.0f, 70.0f, -80.0f); bool bRes = Avx.TestZ(plusOne, compareResult); bool bRes2 = Avx.TestC(plusOne, compareResult); bool allTrue = !Avx.TestZ(compareResult, compareResult); compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.OrderedEqualNonSignaling); // compareResult = <0, NaN, 0, NaN, 0, NaN, 0, NaN> compareResult = Avx.Compare(nanInFirstPosition, right, FloatComparisonMode.UnorderedEqualNonSignaling); compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.UnorderedNotLessThanOrEqualNonSignaling); compareResult = Avx.Compare(InfInFirstPosition, right, FloatComparisonMode.OrderedGreaterThanNonSignaling); var left128 = Vector128.Create(1.0f, 2.0f, 3.0f, 4.0f); var right128 = Vector128.Create(2.0f, 3.0f, 4.0f, 5.0f); Vector128 <float> compResult128 = Sse.CompareGreaterThan(left128, right128); // compResult128 = <0, 0, 0, 0> int res = Avx.MoveMask(compareResult); if (Fma.IsSupported) { Vector256 <float> resultFma = Fma.MultiplyAdd(left, right, other); // = left * right + other for each element resultFma = Fma.MultiplyAddNegated(left, right, other); // = -(left * right + other) for each element resultFma = Fma.MultiplySubtract(left, right, other); // = left * right - other for each element Fma.MultiplyAddSubtract(left, right, other); // even elements (0, 2, ...) like MultiplyAdd, odd elements like MultiplySubtract } result = Avx.DotProduct(left, right, 0b1010_0001); // result = <-20, 0, 0, 0, -10000, 0, 0, 0> result = Avx.Floor(left); // result = <-3, -3, -3, -3, -3, -3, -3, -3> result = Avx.Add(left, right); // result = <2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5> result = Avx.Ceiling(left); // result = <-2, -2, -2, -2, -2, -2, -2, -2> result = Avx.Multiply(left, right); // result = <-12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5, -12.5> result = Avx.HorizontalAdd(left, right); // result = <-5, -5, 10, 10, -5, -5, 10, 10> result = Avx.HorizontalSubtract(left, right); // result = <0, 0, 0, 0, 0, 0, 0, 0> double[] someDoubles = new double[] { 1.0, 3.0, -2.5, 7.5, 10.8, 0.33333 }; double[] someOtherDoubles = new double[] { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; double[] someResult = new double[someDoubles.Length]; float[] someFloats = new float[] { 1, 2, 3, 4, 10, 20, 30, 40, 0 }; float[] someOtherFloats = new float[] { 1, 1, 1, 1, 1, 1, 1, 1 }; unsafe { fixed(double *ptr = &someDoubles[1]) { fixed(double *ptr2 = &someResult[0]) { Vector256 <double> res2 = Avx.LoadVector256(ptr); // res2 = <3, -2.5, 7.5, 10.8> Avx.Store(ptr2, res2); } } fixed(float *ptr = &someFloats[0]) { fixed(float *ptr2 = &someOtherFloats[0]) { Vector256 <float> res2 = Avx.DotProduct(Avx.LoadVector256(ptr), Avx.LoadVector256(ptr2), 0b0001_0001); //Avx.Store(ptr2, res2); } } } } }