Example #1
0
        public static Vector4F AndNot(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse.IsSupported)
            {
                return(Sse.AndNot(left, right));
            }

            return(SoftwareFallbacks.AndNot_Software(left, right));
        }
 public static Vector128 <float> Divide_Software(Vector4FParam1_3 dividend, Vector4FParam1_3 divisor)
 {
     return(Vector128.Create(
                X(dividend) / X(divisor),
                Y(dividend) / Y(divisor),
                Z(dividend) / Z(divisor),
                W(dividend) / W(divisor)
                ));
 }
 public static Vector128 <float> Sqrt_Software(Vector4FParam1_3 vector)
 {
     return(Vector128.Create(
                MathF.Sqrt(X(vector)),
                MathF.Sqrt(Y(vector)),
                MathF.Sqrt(Z(vector)),
                MathF.Sqrt(W(vector))
                ));
 }
 public static Vector128 <float> Subtract_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
 {
     return(Vector128.Create(
                X(left) - X(right),
                Y(left) - Y(right),
                Z(left) - Z(right),
                W(left) - W(right)
                ));
 }
 public static Vector128 <float> Multiply_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
 {
     return(Vector128.Create(
                X(left) * X(right),
                Y(left) * Y(right),
                Z(left) * Z(right),
                W(left) * W(right)
                ));
 }
 public static Vector128 <float> Add_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
 {
     return(Vector128.Create(
                X(left) + X(right),
                Y(left) + Y(right),
                Z(left) + Z(right),
                W(left) + W(right)
                ));
 }
Example #7
0
 public static Vector128 <float> DotProduct4D_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
 {
     return(Vector128.Create(
                X(left) * X(right)
                + Y(left) * Y(right)
                + Z(left) * Z(right)
                + W(left) * W(right)
                ));
 }
 public static Vector4F HorizontalAdd_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
 {
     return(Vector128.Create(
                X(left) + Y(left),
                Z(left) + W(left),
                X(right) + Y(right),
                Z(right) + W(right)
                ));
 }
Example #9
0
        public static Vector4F Xor(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse.IsSupported)
            {
                return(Sse.Xor(left, right));
            }

            return(SoftwareFallbacks.Xor_Software(left, right));
        }
        public static Vector4F CrossProduct3D(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse.IsSupported)
            {
                #region Comments

                /* Cross product of A(x, y, z, _) and B(x, y, z, _) is
                 *                    0  1  2  3        0  1  2  3
                 *
                 * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)'
                 *           1           2              1           2              1            2
                 * So we can do (Ay, Az, Ax, _) * (Bz, Bx, By, _) (last elem is irrelevant, as this is for Vector3)
                 * which leaves us with a of the first subtraction element for each (marked 1 above)
                 * Then we repeat with the right hand of subtractions (Az, Ax, Ay, _) * (By, Bz, Bx, _)
                 * which leaves us with the right hand sides (marked 2 above)
                 * Then we subtract them to get the correct vector
                 * We then mask out W to zero, because that is required for the Vector3 representation
                 *
                 * We perform the first 2 multiplications by shuffling the vectors and then multiplying them
                 * Helpers.Shuffle is the same as the C++ macro _MM_SHUFFLE, and you provide the order you wish the elements
                 * to be in *reversed* (no clue why), so here (3, 0, 2, 1) means you have the 2nd elem (1, 0 indexed) in the first slot,
                 * the 3rd elem (2) in the next one, the 1st elem (0) in the next one, and the 4th (3, W/_, unused here) in the last reg
                 */

                #endregion

                /*
                 * lhs1 goes from x, y, z, _ to y, z, x, _
                 * rhs1 goes from x, y, z, _ to z, x, y, _
                 */

                Vector4F leftHandSide1  = Sse.Shuffle(left, left, Helpers.Shuffle(3, 0, 2, 1));
                Vector4F rightHandSide1 = Sse.Shuffle(right, right, Helpers.Shuffle(3, 1, 0, 2));

                /*
                 * lhs2 goes from x, y, z, _ to z, x, y, _
                 * rhs2 goes from x, y, z, _ to y, z, x, _
                 */


                Vector4F leftHandSide2  = Sse.Shuffle(left, left, Helpers.Shuffle(3, 1, 0, 2));
                Vector4F rightHandSide2 = Sse.Shuffle(right, right, Helpers.Shuffle(3, 0, 2, 1));

                Vector4F mul1 = Sse.Multiply(leftHandSide1, rightHandSide1);

                Vector4F mul2 = Sse.Multiply(leftHandSide2, rightHandSide2);

                Vector4F resultNonMaskedW = Sse.Subtract(mul1, mul2);

                return(Sse.And(resultNonMaskedW, MaskW));

                // TODO reuse vectors (minimal register usage) - potentially prevent any stack spilling
            }

            return(CrossProduct3D_Software(left, right));
        }
Example #11
0
        public static Vector4F Subtract(Vector4FParam1_3 vector, float scalar)
        {
            if (Sse.IsSupported)
            {
                Vector4F expand = Vector128.Create(scalar);
                return(Sse.Add(vector, expand));
            }

            return(SoftwareFallbacks.SoftwareFallbacksVector4F.Subtract_Software(vector, scalar));
        }
Example #12
0
 static Vector128 <float> SoftwareFallback(Vector4FParam1_3 vector)
 {
     // TODO is this semantically equivalent to 'roundps'?
     return(Vector128.Create(
                MathF.Round(X(vector)),
                MathF.Round(Y(vector)),
                MathF.Round(Z(vector)),
                MathF.Round(W(vector))
                ));
 }
Example #13
0
        public static Vector4F Divide(Vector4FParam1_3 dividend, float scalarDivisor)
        {
            if (Sse.IsSupported)
            {
                Vector4F expand = Vector128.Create(scalarDivisor);
                return(Sse.Divide(dividend, expand));
            }

            return(SoftwareFallbacks.SoftwareFallbacksVector4F.Divide_Software(dividend, scalarDivisor));
        }
Example #14
0
        public static Vector4F Clamp(Vector4FParam1_3 vector, Vector4FParam1_3 low, Vector4FParam1_3 high)
        {
            if (Sse.IsSupported)
            {
                Vector4F temp = Sse.Min(vector, high);
                return(Sse.Max(temp, low));
            }

            return(SoftwareFallbacks.SoftwareFallbacksVector4F.Clamp_Software(vector, low, high));
        }
Example #15
0
        public static Vector4F Not(Vector4FParam1_3 vector)
        {
            if (Sse.IsSupported)
            {
                Vector4F mask = Vector128.Create(-1, -1, -1, -1).AsSingle();
                return(Sse.AndNot(vector, mask));
            }

            return(SoftwareFallbacks.SoftwareFallbacksVector4F.Not_Software(vector));
        }
Example #16
0
        public static Vector4F Abs(Vector4FParam1_3 vector)
        {
            if (Sse.IsSupported)
            {
                Vector4F zero = Vector4F.Zero;
                zero = Sse.Subtract(zero, vector); // This gets the inverted results of all elements
                return(Sse.Max(zero, vector));     // This selects the positive values of the 2 vectors
            }

            return(SoftwareFallbacks.SoftwareFallbacksVector4F.Abs_Software(vector));
        }
Example #17
0
        public static Vector4F Lerp(Vector4FParam1_3 from, Vector4FParam1_3 to, float weight)
        {
            Debug.Assert(weight <= 1 && weight >= 0);

            // Lerp (Linear interpolate) interpolates between two values (here, vectors)
            // The general formula for it is 'from + (to - from) * weight'
            Vector4F offset = Subtract(to, from);

            offset = Multiply(offset, weight.LoadScalarBroadcast());
            return(Add(from, offset));
        }
Example #18
0
        public static Vector4F HorizontalAdd(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse3.IsSupported)
            {
                return(Sse3.HorizontalAdd(left, right));
            }

            // TODO can Sse be used over the software fallback?

            return(SoftwareFallbacks.SoftwareFallbacksVector4F.HorizontalAdd_Software(left, right));
        }
Example #19
0
        public static Vector128 <float> Lerp(Vector4FParam1_3 from, Vector4FParam1_3 to, Vector4FParam1_3 weight)
        {
            Debug.Assert(CompareLessThanOrEqual(weight, Vector128.Create(1f)).AllTrue() &&
                         CompareGreaterThanOrEqual(weight, Vector4F.Zero).AllTrue());

            // Lerp (Linear interpolate) interpolates between two values (here, vectors)
            // The general formula for it is 'from + (to - from) * weight'
            Vector4F offset = Subtract(to, from);

            offset = Multiply(offset, weight);
            return(Add(from, offset));
        }
Example #20
0
        public static Vector128 <float> CrossProduct3D_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            /* Cross product of A(x, y, z, _) and B(x, y, z, _) is
             *
             * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)'
             */

            return(Vector128.Create(
                       Y(left) * Z(right) - Z(left) * Y(right),
                       Z(left) * X(right) - X(left) * Z(right),
                       X(left) * Y(right) - Y(left) * X(right),
                       0
                       ));
        }
        public static Vector128 <float> CompareLessThanOrEqual_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            float lX = X(left), rX = X(right);
            float lY = Y(left), rY = Y(right);
            float lZ = Z(left), rZ = Z(right);
            float lW = W(left), rW = W(right);

            return(Vector128.Create(
                       BoolToSimdBoolSingle(lX <= rX /* || IsNan(lX, rX)*/),
                       BoolToSimdBoolSingle(lY <= rY /* || IsNan(lY, rY)*/),
                       BoolToSimdBoolSingle(lZ <= rZ /* || IsNan(lZ, rZ)*/),
                       BoolToSimdBoolSingle(lW <= rW /* || IsNan(lW, rW)*/)
                       ));
        }
        public static Vector128 <float> CompareGreaterThanOrEqual_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            float lX = X(left), rX = X(right);
            float lY = Y(left), rY = Y(right);
            float lZ = Z(left), rZ = Z(right);
            float lW = W(left), rW = W(right);

            return(Vector128.Create(
                       BoolToSimdBoolSingle(lX >= rX || IsNan(lX, rX)),
                       BoolToSimdBoolSingle(lY >= rY || IsNan(lY, rY)),
                       BoolToSimdBoolSingle(lZ >= rZ || IsNan(lZ, rZ)),
                       BoolToSimdBoolSingle(lW >= rW || IsNan(lW, rW))
                       ));
        }
        public static Vector128 <float> CompareEqual_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            float lX = X(left), rX = X(right);
            float lY = Y(left), rY = Y(right);
            float lZ = Z(left), rZ = Z(right);
            float lW = W(left), rW = W(right);

            return(Vector128.Create(
                       BoolToSimdBoolSingle(lX == rX),
                       BoolToSimdBoolSingle(lY == rY),
                       BoolToSimdBoolSingle(lZ == rZ),
                       BoolToSimdBoolSingle(lW == rW)
                       ));
        }
        public static Vector128 <float> Max_Software(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            float lX = X(left), rX = X(right);
            float lY = Y(left), rY = Y(right);
            float lZ = Z(left), rZ = Z(right);
            float lW = W(left), rW = W(right);

            if (float.IsNaN(lX))
            {
                lX = rX;
            }
            if (float.IsNaN(lY))
            {
                lY = rY;
            }
            if (float.IsNaN(lZ))
            {
                lZ = rZ;
            }
            if (float.IsNaN(lW))
            {
                lW = rW;
            }

            if (float.IsNaN(rX))
            {
                rX = lX;
            }
            if (float.IsNaN(rY))
            {
                rY = lY;
            }
            if (float.IsNaN(rZ))
            {
                rZ = lZ;
            }
            if (float.IsNaN(rW))
            {
                rW = lW;
            }

            return(Vector128.Create(
                       MathF.Max(lX, rX),
                       MathF.Max(lY, rY),
                       MathF.Max(lZ, rZ),
                       MathF.Max(lW, rW)
                       ));
        }
Example #25
0
        public static Vector128 <float> CrossProduct3D(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            if (Sse.IsSupported)
            {
                /* Cross product of A(x, y, z, _) and B(x, y, z, _) is
                 *                    0  1  2  3        0  1  2  3
                 *
                 * '(X = (Ay * Bz) - (Az * By), Y = (Az * Bx) - (Ax * Bz), Z = (Ax * By) - (Ay * Bx)'
                 *           1           2              1           2              1            2
                 * So we can do (Ay, Az, Ax, _) * (Bz, Bx, By, _) (last elem is irrelevant, as this is for Vector3)
                 * which leaves us with a of the first subtraction element for each (marked 1 above)
                 * Then we repeat with the right hand of subtractions (Az, Ax, Ay, _) * (By, Bz, Bx, _)
                 * which leaves us with the right hand sides (marked 2 above)
                 * Then we subtract them to get the correct vector
                 * We then mask out W to zero, because that is required for the Vector3 representation
                 *
                 */

                /*
                 * lhs1 goes from x, y, z, _ to y, z, x, _
                 * rhs1 goes from x, y, z, _ to z, x, y, _
                 */

                Vector4F leftHandSide1  = Sse.Shuffle(left, left, ShuffleValues._1_2_0_3);
                Vector4F rightHandSide1 = Sse.Shuffle(right, right, ShuffleValues._2_0_1_3);

                /*
                 * lhs2 goes from x, y, z, _ to z, x, y, _
                 * rhs2 goes from x, y, z, _ to y, z, x, _
                 */


                Vector4F leftHandSide2  = Sse.Shuffle(left, left, ShuffleValues._2_0_1_3);
                Vector4F rightHandSide2 = Sse.Shuffle(right, right, ShuffleValues._1_2_0_3);

                Vector4F mul1 = Sse.Multiply(leftHandSide1, rightHandSide1);

                Vector4F mul2 = Sse.Multiply(leftHandSide2, rightHandSide2);

                Vector4F resultNonMaskedW = Sse.Subtract(mul1, mul2);

                return(Sse.And(resultNonMaskedW, SingleConstants.MaskW));

                // TODO reuse vectors (minimal register usage) - potentially prevent any stack spilling
            }

            return(CrossProduct3D_Software(left, right));
        }
        public static Vector4F DistanceSquared2D(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                return(Sse41.DotProduct(diff, diff, control));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                Vector4F mul = Sse.Multiply(diff, diff);

                // Set W and Z to zero
                Vector4F result = Sse.And(mul, MaskWAndZToZero);

                // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                return(Sse3.MoveLowAndDuplicate(result));
            }
            else if (Sse.IsSupported)
            {
                Vector4F diff = Sse.Subtract(left, right);

                Vector4F mul = Sse.Multiply(diff, diff);

                Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0));

                return(mul);
            }

            return(DistanceSquared2D_Software(left, right));
        }
        public static Vector4F Normalize2D(Vector4FParam1_3 vector)
        {
            #region Manual Inline
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 2 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0011_1111;
                Vector4F   dp      = Sse41.DotProduct(vector, vector, control);

                return(Sse.Divide(vector, Sse.Sqrt(dp)));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector4F mul = Sse.Multiply(vector, vector);

                // Set W and Z to zero
                Vector4F result = Sse.And(mul, MaskWAndZToZero);

                // Add X and Y horizontally, leaving the vector as (X+Y, Y, X+Y. ?)
                result = Sse3.HorizontalAdd(result, result);

                // MoveLowAndDuplicate makes a new vector from (X, Y, Z, W) to (X, X, Z, Z)
                Vector4F dp = Sse3.MoveLowAndDuplicate(result);
                return(Sse.Divide(vector, Sse.Sqrt(dp)));
            }
            else if (Sse.IsSupported)
            {
                Vector4F mul = Sse.Multiply(vector, vector);

                Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                mul = Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0));

                return(Sse.Divide(vector, Sse.Sqrt(mul)));
            }
            #endregion

            return(Normalize2D_Software(vector));
        }
Example #28
0
        public static MatrixSingle SetTranslation(MatrixSingle matrix, Vector4FParam1_3 translation)
        {
            // (X, Y, Z, W) - we must keep W
            Vector4F old = matrix._v3;

            // Make W of translation zero

            Vector4F newTranslation = And(translation, SingleConstants.MaskW);

            // Mask out everything but W
            old = And(old, SingleConstants.MaskXYZ);

            // Or them together to get X Y Z from translation and W from old
            newTranslation = Or(newTranslation, old);

            matrix._v3 = newTranslation;

            return(matrix);
        }
        public static Vector4F Normalize3D(Vector4FParam1_3 vector)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 3 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0111_1111;
                return(Sse.Divide(vector, Sse.Sqrt(Sse41.DotProduct(vector, vector, control))));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector4F mul = Sse.Multiply(vector, vector);

                // Set W to zero
                Vector4F result = Sse.And(mul, MaskW);

                // Doubly horizontally adding fills the final vector with the sum
                result = VectorF.HorizontalAdd(result, result);
                return(Sse.Divide(vector, Sse.Sqrt(VectorF.HorizontalAdd(result, result))));
            }
            else if (Sse.IsSupported)
            {
                // Multiply to get the needed values
                Vector4F mul = Sse.Multiply(vector, vector);


                // Shuffle around the values and AddScalar them
                Vector4F temp = Sse.Shuffle(mul, mul, Helpers.Shuffle(2, 1, 2, 1));

                mul = Sse.AddScalar(mul, temp);

                temp = Sse.Shuffle(temp, temp, Helpers.Shuffle(1, 1, 1, 1));

                mul = Sse.AddScalar(mul, temp);

                return(Sse.Divide(vector, Sse.Sqrt(Sse.Shuffle(mul, mul, Helpers.Shuffle(0, 0, 0, 0)))));
            }

            return(Normalize3D_Software(vector));
        }
Example #30
0
        public static Vector4F DotProduct3D(Vector4FParam1_3 left, Vector4FParam1_3 right)
        {
            // SSE4.1 has a native dot product instruction, dpps
            if (Sse41.IsSupported)
            {
                // This multiplies the first 3 elems of each and broadcasts it into each element of the returning vector
                const byte control = 0b_0111_1111;
                return(Sse41.DotProduct(left, right, control));
            }
            // We can use SSE to vectorize the multiplication
            // There are different fastest methods to sum the resultant vector
            // on SSE3 vs SSE1
            else if (Sse3.IsSupported)
            {
                Vector4F mul = Sse.Multiply(left, right);

                // Set W to zero
                Vector4F result = Sse.And(mul, MaskWSingle);

                // Doubly horizontally adding fills the final vector with the sum
                result = Sse3.HorizontalAdd(result, result);
                return(Sse3.HorizontalAdd(result, result));
            }
            else if (Sse.IsSupported)
            {
                // Multiply to get the needed values
                Vector4F mul = Sse.Multiply(left, right);

                // Shuffle around the values and AddScalar them
                Vector4F temp = Sse.Shuffle(mul, mul, ShuffleValues._2_1_2_1);

                mul = Sse.AddScalar(mul, temp);

                temp = Sse.Shuffle(temp, temp, ShuffleValues._1_1_1_1);

                mul = Sse.AddScalar(mul, temp);

                return(Sse.Shuffle(mul, mul, ShuffleValues._0_0_0_0));
            }

            return(DotProduct3D_Software(left, right));
        }