public static Vector4F AndNot(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse.IsSupported) { return(Sse.AndNot(left, right)); } return(SoftwareFallbacks.AndNot_Software(left, right)); }
public static Vector128 <float> Divide_Software(Vector4FParam1_3 dividend, Vector4FParam1_3 divisor) { return(Vector128.Create( X(dividend) / X(divisor), Y(dividend) / Y(divisor), Z(dividend) / Z(divisor), W(dividend) / W(divisor) )); }
public static Vector128 <float> Sqrt_Software(Vector4FParam1_3 vector) { return(Vector128.Create( MathF.Sqrt(X(vector)), MathF.Sqrt(Y(vector)), MathF.Sqrt(Z(vector)), MathF.Sqrt(W(vector)) )); }
public static Vector128 <float> Subtract_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { return(Vector128.Create( X(left) - X(right), Y(left) - Y(right), Z(left) - Z(right), W(left) - W(right) )); }
public static Vector128 <float> Multiply_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { return(Vector128.Create( X(left) * X(right), Y(left) * Y(right), Z(left) * Z(right), W(left) * W(right) )); }
public static Vector128 <float> Add_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { return(Vector128.Create( X(left) + X(right), Y(left) + Y(right), Z(left) + Z(right), W(left) + W(right) )); }
public static Vector128 <float> DotProduct4D_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { return(Vector128.Create( X(left) * X(right) + Y(left) * Y(right) + Z(left) * Z(right) + W(left) * W(right) )); }
public static Vector4F HorizontalAdd_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { return(Vector128.Create( X(left) + Y(left), Z(left) + W(left), X(right) + Y(right), Z(right) + W(right) )); }
public static Vector4F Xor(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse.IsSupported) { return(Sse.Xor(left, right)); } return(SoftwareFallbacks.Xor_Software(left, right)); }
public static Vector4F CrossProduct3D(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse.IsSupported) { #region Comments /* Cross product of A(x, y, z, _) and B(x, y, z, _) is * 0 1 2 3 0 1 2 3 * * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)' * 1 2 1 2 1 2 * So we can do (Ay, Az, Ax, _) * (Bz, Bx, By, _) (last elem is irrelevant, as this is for Vector3) * which leaves us with a of the first subtraction element for each (marked 1 above) * Then we repeat with the right hand of subtractions (Az, Ax, Ay, _) * (By, Bz, Bx, _) * which leaves us with the right hand sides (marked 2 above) * Then we subtract them to get the correct vector * We then mask out W to zero, because that is required for the Vector3 representation * * We perform the first 2 multiplications by shuffling the vectors and then multiplying them * Helpers.Shuffle is the same as the C++ macro _MM_SHUFFLE, and you provide the order you wish the elements * to be in *reversed* (no clue why), so here (3, 0, 2, 1) means you have the 2nd elem (1, 0 indexed) in the first slot, * the 3rd elem (2) in the next one, the 1st elem (0) in the next one, and the 4th (3, W/_, unused here) in the last reg */ #endregion /* * lhs1 goes from x, y, z, _ to y, z, x, _ * rhs1 goes from x, y, z, _ to z, x, y, _ */ Vector4F leftHandSide1 = Sse.Shuffle(left, left, Helpers.Shuffle(3, 0, 2, 1)); Vector4F rightHandSide1 = Sse.Shuffle(right, right, Helpers.Shuffle(3, 1, 0, 2)); /* * lhs2 goes from x, y, z, _ to z, x, y, _ * rhs2 goes from x, y, z, _ to y, z, x, _ */ Vector4F leftHandSide2 = Sse.Shuffle(left, left, Helpers.Shuffle(3, 1, 0, 2)); Vector4F rightHandSide2 = Sse.Shuffle(right, right, Helpers.Shuffle(3, 0, 2, 1)); Vector4F mul1 = Sse.Multiply(leftHandSide1, rightHandSide1); Vector4F mul2 = Sse.Multiply(leftHandSide2, rightHandSide2); Vector4F resultNonMaskedW = Sse.Subtract(mul1, mul2); return(Sse.And(resultNonMaskedW, MaskW)); // TODO reuse vectors (minimal register usage) - potentially prevent any stack spilling } return(CrossProduct3D_Software(left, right)); }
public static Vector4F Subtract(Vector4FParam1_3 vector, float scalar) { if (Sse.IsSupported) { Vector4F expand = Vector128.Create(scalar); return(Sse.Add(vector, expand)); } return(SoftwareFallbacks.SoftwareFallbacksVector4F.Subtract_Software(vector, scalar)); }
static Vector128 <float> SoftwareFallback(Vector4FParam1_3 vector) { // TODO is this semantically equivalent to 'roundps'? return(Vector128.Create( MathF.Round(X(vector)), MathF.Round(Y(vector)), MathF.Round(Z(vector)), MathF.Round(W(vector)) )); }
public static Vector4F Divide(Vector4FParam1_3 dividend, float scalarDivisor) { if (Sse.IsSupported) { Vector4F expand = Vector128.Create(scalarDivisor); return(Sse.Divide(dividend, expand)); } return(SoftwareFallbacks.SoftwareFallbacksVector4F.Divide_Software(dividend, scalarDivisor)); }
public static Vector4F Clamp(Vector4FParam1_3 vector, Vector4FParam1_3 low, Vector4FParam1_3 high) { if (Sse.IsSupported) { Vector4F temp = Sse.Min(vector, high); return(Sse.Max(temp, low)); } return(SoftwareFallbacks.SoftwareFallbacksVector4F.Clamp_Software(vector, low, high)); }
public static Vector4F Not(Vector4FParam1_3 vector) { if (Sse.IsSupported) { Vector4F mask = Vector128.Create(-1, -1, -1, -1).AsSingle(); return(Sse.AndNot(vector, mask)); } return(SoftwareFallbacks.SoftwareFallbacksVector4F.Not_Software(vector)); }
public static Vector4F Abs(Vector4FParam1_3 vector) { if (Sse.IsSupported) { Vector4F zero = Vector4F.Zero; zero = Sse.Subtract(zero, vector); // This gets the inverted results of all elements return(Sse.Max(zero, vector)); // This selects the positive values of the 2 vectors } return(SoftwareFallbacks.SoftwareFallbacksVector4F.Abs_Software(vector)); }
public static Vector4F Lerp(Vector4FParam1_3 from, Vector4FParam1_3 to, float weight) { Debug.Assert(weight <= 1 && weight >= 0); // Lerp (Linear interpolate) interpolates between two values (here, vectors) // The general formula for it is 'from + (to - from) * weight' Vector4F offset = Subtract(to, from); offset = Multiply(offset, weight.LoadScalarBroadcast()); return(Add(from, offset)); }
public static Vector4F HorizontalAdd(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse3.IsSupported) { return(Sse3.HorizontalAdd(left, right)); } // TODO can Sse be used over the software fallback? return(SoftwareFallbacks.SoftwareFallbacksVector4F.HorizontalAdd_Software(left, right)); }
public static Vector128 <float> Lerp(Vector4FParam1_3 from, Vector4FParam1_3 to, Vector4FParam1_3 weight) { Debug.Assert(CompareLessThanOrEqual(weight, Vector128.Create(1f)).AllTrue() && CompareGreaterThanOrEqual(weight, Vector4F.Zero).AllTrue()); // Lerp (Linear interpolate) interpolates between two values (here, vectors) // The general formula for it is 'from + (to - from) * weight' Vector4F offset = Subtract(to, from); offset = Multiply(offset, weight); return(Add(from, offset)); }
public static Vector128 <float> CrossProduct3D_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { /* Cross product of A(x, y, z, _) and B(x, y, z, _) is * * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)' */ return(Vector128.Create( Y(left) * Z(right) - Z(left) * Y(right), Z(left) * X(right) - X(left) * Z(right), X(left) * Y(right) - Y(left) * X(right), 0 )); }
public static Vector128 <float> CompareLessThanOrEqual_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { float lX = X(left), rX = X(right); float lY = Y(left), rY = Y(right); float lZ = Z(left), rZ = Z(right); float lW = W(left), rW = W(right); return(Vector128.Create( BoolToSimdBoolSingle(lX <= rX /* || IsNan(lX, rX)*/), BoolToSimdBoolSingle(lY <= rY /* || IsNan(lY, rY)*/), BoolToSimdBoolSingle(lZ <= rZ /* || IsNan(lZ, rZ)*/), BoolToSimdBoolSingle(lW <= rW /* || IsNan(lW, rW)*/) )); }
public static Vector128 <float> CompareGreaterThanOrEqual_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { float lX = X(left), rX = X(right); float lY = Y(left), rY = Y(right); float lZ = Z(left), rZ = Z(right); float lW = W(left), rW = W(right); return(Vector128.Create( BoolToSimdBoolSingle(lX >= rX || IsNan(lX, rX)), BoolToSimdBoolSingle(lY >= rY || IsNan(lY, rY)), BoolToSimdBoolSingle(lZ >= rZ || IsNan(lZ, rZ)), BoolToSimdBoolSingle(lW >= rW || IsNan(lW, rW)) )); }
public static Vector128 <float> CompareEqual_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { float lX = X(left), rX = X(right); float lY = Y(left), rY = Y(right); float lZ = Z(left), rZ = Z(right); float lW = W(left), rW = W(right); return(Vector128.Create( BoolToSimdBoolSingle(lX == rX), BoolToSimdBoolSingle(lY == rY), BoolToSimdBoolSingle(lZ == rZ), BoolToSimdBoolSingle(lW == rW) )); }
public static Vector128 <float> Max_Software(Vector4FParam1_3 left, Vector4FParam1_3 right) { float lX = X(left), rX = X(right); float lY = Y(left), rY = Y(right); float lZ = Z(left), rZ = Z(right); float lW = W(left), rW = W(right); if (float.IsNaN(lX)) { lX = rX; } if (float.IsNaN(lY)) { lY = rY; } if (float.IsNaN(lZ)) { lZ = rZ; } if (float.IsNaN(lW)) { lW = rW; } if (float.IsNaN(rX)) { rX = lX; } if (float.IsNaN(rY)) { rY = lY; } if (float.IsNaN(rZ)) { rZ = lZ; } if (float.IsNaN(rW)) { rW = lW; } return(Vector128.Create( MathF.Max(lX, rX), MathF.Max(lY, rY), MathF.Max(lZ, rZ), MathF.Max(lW, rW) )); }
public static Vector128 <float> CrossProduct3D(Vector4FParam1_3 left, Vector4FParam1_3 right) { if (Sse.IsSupported) { /* Cross product of A(x, y, z, _) and B(x, y, z, _) is * 0 1 2 3 0 1 2 3 * * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)' * 1 2 1 2 1 2 * So we can do (Ay, Az, Ax, _) * (Bz, Bx, By, _) (last elem is irrelevant, as this is for Vector3) * which leaves us with a of the first subtraction element for each (marked 1 above) * Then we repeat with the right hand of subtractions (Az, Ax, Ay, _) * (By, Bz, Bx, _) * which leaves us with the right hand sides (marked 2 above) * Then we subtract them to get the correct vector * We then mask out W to zero, because that is required for the Vector3 representation * */ /* * lhs1 goes from x, y, z, _ to y, z, x, _ * rhs1 goes from x, y, z, _ to z, x, y, _ */ Vector4F leftHandSide1 = Sse.Shuffle(left, left, ShuffleValues._1_2_0_3); Vector4F rightHandSide1 = Sse.Shuffle(right, right, ShuffleValues._2_0_1_3); /* * lhs2 goes from x, y, z, _ to z, x, y, _ * rhs2 goes from x, y, z, _ to y, z, x, _ */ Vector4F leftHandSide2 = Sse.Shuffle(left, left, ShuffleValues._2_0_1_3); Vector4F rightHandSide2 = Sse.Shuffle(right, right, ShuffleValues._1_2_0_3); Vector4F mul1 = Sse.Multiply(leftHandSide1, rightHandSide1); Vector4F mul2 = Sse.Multiply(leftHandSide2, rightHandSide2); Vector4F resultNonMaskedW = Sse.Subtract(mul1, mul2); return(Sse.And(resultNonMaskedW, SingleConstants.MaskW)); // TODO reuse vectors (minimal register usage) - potentially prevent any stack spilling } return(CrossProduct3D_Software(left, right)); }
public static Vector4F DistanceSquared2D(Vector4FParam1_3 left, Vector4FParam1_3 right) { // SSE4.1 has a native dot product instruction, dpps if (Sse41.IsSupported) { Vector4F diff = Sse.Subtract(left, right); // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector const byte control = 0b_0011_1111; return(Sse41.DotProduct(diff, diff, control)); } // We can use SSE to vectorize the multiplication // There are different fastest methods to sum the resultant vector // on SSE3 vs SSE1 else if (Sse3.IsSupported) { Vector4F diff = Sse.Subtract(left, right); Vector4F mul = Sse.Multiply(diff, diff); // Set W and Z to zero Vector4F result = Sse.And(mul, MaskWAndZToZero); // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?) result = Sse3.HorizontalAdd(result, result); // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z) return(Sse3.MoveLowAndDuplicate(result)); } else if (Sse.IsSupported) { Vector4F diff = Sse.Subtract(left, right); Vector4F mul = Sse.Multiply(diff, diff); Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(1, 1, 1, 1)); mul = Sse.AddScalar(mul, temp); mul = Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0)); return(mul); } return(DistanceSquared2D_Software(left, right)); }
public static Vector4F Normalize2D(Vector4FParam1_3 vector) { #region Manual Inline // SSE4.1 has a native dot product instruction, dpps if (Sse41.IsSupported) { // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector const byte control = 0b_0011_1111; Vector4F dp = Sse41.DotProduct(vector, vector, control); return(Sse.Divide(vector, Sse.Sqrt(dp))); } // We can use SSE to vectorize the multiplication // There are different fastest methods to sum the resultant vector // on SSE3 vs SSE1 else if (Sse3.IsSupported) { Vector4F mul = Sse.Multiply(vector, vector); // Set W and Z to zero Vector4F result = Sse.And(mul, MaskWAndZToZero); // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?) result = Sse3.HorizontalAdd(result, result); // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z) Vector4F dp = Sse3.MoveLowAndDuplicate(result); return(Sse.Divide(vector, Sse.Sqrt(dp))); } else if (Sse.IsSupported) { Vector4F mul = Sse.Multiply(vector, vector); Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(1, 1, 1, 1)); mul = Sse.AddScalar(mul, temp); mul = Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0)); return(Sse.Divide(vector, Sse.Sqrt(mul))); } #endregion return(Normalize2D_Software(vector)); }
public static MatrixSingle SetTranslation(MatrixSingle matrix, Vector4FParam1_3 translation) { // (X, Y, Z, W) - we must keep W Vector4F old = matrix._v3; // Make W of translation zero Vector4F newTranslation = And(translation, SingleConstants.MaskW); // Mask out everything but W old = And(old, SingleConstants.MaskXYZ); // Or them together to get X Y Z from translation and W from old newTranslation = Or(newTranslation, old); matrix._v3 = newTranslation; return(matrix); }
public static Vector4F Normalize3D(Vector4FParam1_3 vector) { // SSE4.1 has a native dot product instruction, dpps if (Sse41.IsSupported) { // This multiplies the first 3 elems of each and broadcasts it into each element of the returning vector const byte control = 0b_0111_1111; return(Sse.Divide(vector, Sse.Sqrt(Sse41.DotProduct(vector, vector, control)))); } // We can use SSE to vectorize the multiplication // There are different fastest methods to sum the resultant vector // on SSE3 vs SSE1 else if (Sse3.IsSupported) { Vector4F mul = Sse.Multiply(vector, vector); // Set W to zero Vector4F result = Sse.And(mul, MaskW); // Doubly horizontally adding fills the final vector with the sum result = VectorF.HorizontalAdd(result, result); return(Sse.Divide(vector, Sse.Sqrt(VectorF.HorizontalAdd(result, result)))); } else if (Sse.IsSupported) { // Multiply to get the needed values Vector4F mul = Sse.Multiply(vector, vector); // Shuffle around the values and AddScalar them Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(2, 1, 2, 1)); mul = Sse.AddScalar(mul, temp); temp = Sse.Shuffle(temp, temp, Helpers.Shuffle(1, 1, 1, 1)); mul = Sse.AddScalar(mul, temp); return(Sse.Divide(vector, Sse.Sqrt(Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0))))); } return(Normalize3D_Software(vector)); }
public static Vector4F DotProduct3D(Vector4FParam1_3 left, Vector4FParam1_3 right) { // SSE4.1 has a native dot product instruction, dpps if (Sse41.IsSupported) { // This multiplies the first 3 elems of each and broadcasts it into each element of the returning vector const byte control = 0b_0111_1111; return(Sse41.DotProduct(left, right, control)); } // We can use SSE to vectorize the multiplication // There are different fastest methods to sum the resultant vector // on SSE3 vs SSE1 else if (Sse3.IsSupported) { Vector4F mul = Sse.Multiply(left, right); // Set W to zero Vector4F result = Sse.And(mul, MaskWSingle); // Doubly horizontally adding fills the final vector with the sum result = Sse3.HorizontalAdd(result, result); return(Sse3.HorizontalAdd(result, result)); } else if (Sse.IsSupported) { // Multiply to get the needed values Vector4F mul = Sse.Multiply(left, right); // Shuffle around the values and AddScalar them Vector4F temp = Sse.Shuffle(mul, mul, ShuffleValues._2_1_2_1); mul = Sse.AddScalar(mul, temp); temp = Sse.Shuffle(temp, temp, ShuffleValues._1_1_1_1); mul = Sse.AddScalar(mul, temp); return(Sse.Shuffle(mul, mul, ShuffleValues._0_0_0_0)); } return(DotProduct3D_Software(left, right)); }