public static void ApplyImpulse(ref Vector3Wide angularJacobianA, ref BodyInertias inertiaA, ref Vector <float> correctiveImpulse, ref BodyVelocities wsvA) { Vector3Wide.Scale(angularJacobianA, correctiveImpulse, out var worldCorrectiveImpulseA); Symmetric3x3Wide.TransformWithoutOverlap(worldCorrectiveImpulseA, inertiaA.InverseInertiaTensor, out var worldCorrectiveVelocityA); Vector3Wide.Add(wsvA.Angular, worldCorrectiveVelocityA, out wsvA.Angular); }
public static void ApplyImpulse(ref Jacobians jacobians, ref BodyInertias inertiaA, ref Vector2Wide correctiveImpulse, ref BodyVelocities wsvA) { Matrix2x3Wide.Transform(correctiveImpulse, jacobians.LinearA, out var linearImpulseA); Matrix2x3Wide.Transform(correctiveImpulse, jacobians.AngularA, out var angularImpulseA); BodyVelocities correctiveVelocityA; Vector3Wide.Scale(linearImpulseA, inertiaA.InverseMass, out correctiveVelocityA.Linear); Symmetric3x3Wide.TransformWithoutOverlap(angularImpulseA, inertiaA.InverseInertiaTensor, out correctiveVelocityA.Angular); Vector3Wide.Add(wsvA.Linear, correctiveVelocityA.Linear, out wsvA.Linear); Vector3Wide.Add(wsvA.Angular, correctiveVelocityA.Angular, out wsvA.Angular); }
public static void ApplyImpulse(ref PenetrationLimitOneBodyProjection projection, ref BodyInertias inertiaA, ref Vector3Wide normal, ref Vector <float> correctiveImpulse, ref BodyVelocities wsvA) { var linearVelocityChangeA = correctiveImpulse * inertiaA.InverseMass; Vector3Wide.Scale(normal, linearVelocityChangeA, out var correctiveVelocityALinearVelocity); Vector3Wide.Scale(projection.AngularA, correctiveImpulse, out var correctiveAngularImpulseA); Symmetric3x3Wide.TransformWithoutOverlap(correctiveAngularImpulseA, inertiaA.InverseInertiaTensor, out var correctiveVelocityAAngularVelocity); Vector3Wide.Add(wsvA.Linear, correctiveVelocityALinearVelocity, out wsvA.Linear); Vector3Wide.Add(wsvA.Angular, correctiveVelocityAAngularVelocity, out wsvA.Angular); }
public static void ComputeCorrectiveImpulse(ref BodyVelocities velocityA, ref BodyVelocities velocityB, ref Vector3Wide offsetA, ref Vector3Wide offsetB, ref Vector3Wide biasVelocity, ref Symmetric3x3Wide effectiveMass, ref Vector <float> softnessImpulseScale, ref Vector3Wide accumulatedImpulse, out Vector3Wide correctiveImpulse) { //csi = projection.BiasImpulse - accumulatedImpulse * projection.SoftnessImpulseScale - (csiaLinear + csiaAngular + csibLinear + csibAngular); //Note subtraction; jLinearB = -I. Vector3Wide.Subtract(velocityA.Linear, velocityB.Linear, out var csv); Vector3Wide.CrossWithoutOverlap(velocityA.Angular, offsetA, out var angularCSV); Vector3Wide.Add(csv, angularCSV, out csv); //Note reversed cross order; matches the jacobian -CrossMatrix(offsetB). Vector3Wide.CrossWithoutOverlap(offsetB, velocityB.Angular, out angularCSV); Vector3Wide.Add(csv, angularCSV, out csv); Vector3Wide.Subtract(biasVelocity, csv, out csv); Symmetric3x3Wide.TransformWithoutOverlap(csv, effectiveMass, out correctiveImpulse); Vector3Wide.Scale(accumulatedImpulse, softnessImpulseScale, out var softness); Vector3Wide.Subtract(correctiveImpulse, softness, out correctiveImpulse); }
public static void ApplyImpulse(ref BodyVelocities velocityA, ref BodyVelocities velocityB, ref Vector3Wide offsetA, ref Vector3Wide offsetB, ref BodyInertias inertiaA, ref BodyInertias inertiaB, ref Vector3Wide constraintSpaceImpulse) { Vector3Wide.CrossWithoutOverlap(offsetA, constraintSpaceImpulse, out var wsi); Symmetric3x3Wide.TransformWithoutOverlap(wsi, inertiaA.InverseInertiaTensor, out var change); Vector3Wide.Add(velocityA.Angular, change, out velocityA.Angular); Vector3Wide.Scale(constraintSpaceImpulse, inertiaA.InverseMass, out change); Vector3Wide.Add(velocityA.Linear, change, out velocityA.Linear); Vector3Wide.CrossWithoutOverlap(constraintSpaceImpulse, offsetB, out wsi); //note flip-negation Symmetric3x3Wide.TransformWithoutOverlap(wsi, inertiaB.InverseInertiaTensor, out change); Vector3Wide.Add(velocityB.Angular, change, out velocityB.Angular); Vector3Wide.Scale(constraintSpaceImpulse, inertiaB.InverseMass, out change); Vector3Wide.Subtract(velocityB.Linear, change, out velocityB.Linear); //note subtraction; the jacobian is -I }
public static void ApplyImpulse(ref Jacobians jacobians, ref BodyInertias inertiaA, ref BodyInertias inertiaB, ref Vector2Wide correctiveImpulse, ref BodyVelocities wsvA, ref BodyVelocities wsvB) { Matrix2x3Wide.Transform(correctiveImpulse, jacobians.LinearA, out var linearImpulseA); Matrix2x3Wide.Transform(correctiveImpulse, jacobians.AngularA, out var angularImpulseA); Matrix2x3Wide.Transform(correctiveImpulse, jacobians.AngularB, out var angularImpulseB); BodyVelocities correctiveVelocityA, correctiveVelocityB; Vector3Wide.Scale(linearImpulseA, inertiaA.InverseMass, out correctiveVelocityA.Linear); Symmetric3x3Wide.TransformWithoutOverlap(angularImpulseA, inertiaA.InverseInertiaTensor, out correctiveVelocityA.Angular); Vector3Wide.Scale(linearImpulseA, inertiaB.InverseMass, out correctiveVelocityB.Linear); Symmetric3x3Wide.TransformWithoutOverlap(angularImpulseB, inertiaB.InverseInertiaTensor, out correctiveVelocityB.Angular); Vector3Wide.Add(wsvA.Linear, correctiveVelocityA.Linear, out wsvA.Linear); Vector3Wide.Add(wsvA.Angular, correctiveVelocityA.Angular, out wsvA.Angular); Vector3Wide.Subtract(wsvB.Linear, correctiveVelocityB.Linear, out wsvB.Linear); //note subtract- we based it on the LinearA jacobian. Vector3Wide.Add(wsvB.Angular, correctiveVelocityB.Angular, out wsvB.Angular); }
public static void ApplyImpulse(ref PenetrationLimitProjection projection, ref BodyInertias inertiaA, ref BodyInertias inertiaB, ref Vector3Wide normal, ref Vector <float> correctiveImpulse, ref BodyVelocities wsvA, ref BodyVelocities wsvB) { var linearVelocityChangeA = correctiveImpulse * inertiaA.InverseMass; Vector3Wide.Scale(normal, linearVelocityChangeA, out var correctiveVelocityALinearVelocity); Vector3Wide.Scale(projection.AngularA, correctiveImpulse, out var correctiveAngularImpulseA); Symmetric3x3Wide.TransformWithoutOverlap(correctiveAngularImpulseA, inertiaA.InverseInertiaTensor, out var correctiveVelocityAAngularVelocity); var linearVelocityChangeB = correctiveImpulse * inertiaB.InverseMass; Vector3Wide.Scale(normal, linearVelocityChangeB, out var correctiveVelocityBLinearVelocity); Vector3Wide.Scale(projection.AngularB, correctiveImpulse, out var correctiveAngularImpulseB); Symmetric3x3Wide.TransformWithoutOverlap(correctiveAngularImpulseB, inertiaB.InverseInertiaTensor, out var correctiveVelocityBAngularVelocity); Vector3Wide.Add(wsvA.Linear, correctiveVelocityALinearVelocity, out wsvA.Linear); Vector3Wide.Add(wsvA.Angular, correctiveVelocityAAngularVelocity, out wsvA.Angular); Vector3Wide.Subtract(wsvB.Linear, correctiveVelocityBLinearVelocity, out wsvB.Linear); //Note subtract; normal = -jacobianLinearB Vector3Wide.Add(wsvB.Angular, correctiveVelocityBAngularVelocity, out wsvB.Angular); }
public static void Prestep(ref BodyInertias inertiaA, ref BodyInertias inertiaB, ref TwoBody1DOFJacobians jacobians, ref SpringSettingsWide springSettings, ref Vector <float> maximumRecoveryVelocity, ref Vector <float> positionError, float dt, float inverseDt, out Projection2Body1DOF projection) { //unsoftened effective mass = (J * M^-1 * JT)^-1 //where J is a constraintDOF x bodyCount*6 sized matrix, JT is its transpose, and for two bodies3D M^-1 is: //[inverseMassA, 0, 0, 0] //[0, inverseInertiaA, 0, 0] //[0, 0, inverseMassB, 0] //[0, 0, 0, inverseInertiaB] //The entries of J match up to this convention, containing the linear and angular components of each body in sequence, so for a 2 body 1DOF constraint J would look like: //[linearA 1x3, angularA 1x3, linearB 1x3, angularB 1x3] //Note that it is a row vector by convention. When transforming velocities from world space into constraint space, it is assumed that the velocity vector is organized as a //row vector matching up to the jacobian (that is, [linearA 1x3, angularA 1x3, linearB 1x3, angularB 1x3]), so for a 2 body 2 DOF constraint, //worldVelocity * JT would be a [worldVelocity: 1x12] * [JT: 12x2], resulting in a 1x2 constraint space velocity row vector. //Similarly, when going from constraint space impulse to world space impulse in the above example, we would do [csi: 1x2] * [J: 2x12] to get a 1x12 world impulse row vector. //Note that the engine uses row vectors for all velocities and positions and so on. Rotation and inertia tensors are constructed for premultiplication. //In other words, unlike many of the presentations in the space, we use v * JT and csi * J instead of J * v and JT * csi. //There is no meaningful difference- the two conventions are just transpositions of each other. //(If you want to know how this stuff works, go read the constraint related presentations: http://box2d.org/downloads/ //Be mindful of the difference in conventions. You'll see J * v instead of v * JT, for example. Everything is still fundamentally the same, though.) //Due to the block structure of the mass matrix, we can handle each component separately and then sum the results. //For this 1DOF constraint, the result is a simple scalar. //Note that we store the intermediate results of J * M^-1 for use when projecting from constraint space impulses to world velocity changes. //If we didn't store those intermediate values, we could just scale the dot product of jacobians.LinearA with itself to save 4 multiplies. Vector3Wide.Scale(jacobians.LinearA, inertiaA.InverseMass, out projection.CSIToWSVLinearA); Vector3Wide.Scale(jacobians.LinearB, inertiaB.InverseMass, out projection.CSIToWSVLinearB); Vector3Wide.Dot(projection.CSIToWSVLinearA, jacobians.LinearA, out var linearA); Vector3Wide.Dot(projection.CSIToWSVLinearB, jacobians.LinearB, out var linearB); //The angular components are a little more involved; (J * I^-1) * JT is explicitly computed. Symmetric3x3Wide.TransformWithoutOverlap(jacobians.AngularA, inertiaA.InverseInertiaTensor, out projection.CSIToWSVAngularA); Symmetric3x3Wide.TransformWithoutOverlap(jacobians.AngularB, inertiaB.InverseInertiaTensor, out projection.CSIToWSVAngularB); Vector3Wide.Dot(projection.CSIToWSVAngularA, jacobians.AngularA, out var angularA); Vector3Wide.Dot(projection.CSIToWSVAngularB, jacobians.AngularB, out var angularB); //Now for a digression! //Softness is applied along the diagonal (which, for a 1DOF constraint, is just the only element). //Check the the ODE reference for a bit more information: http://ode.org/ode-latest-userguide.html#sec_3_8_0 //And also see Erin Catto's Soft Constraints presentation for more details: http://box2d.org/files/GDC2011/GDC2011_Catto_Erin_Soft_Constraints.pdf) //There are some very interesting tricks you can use here, though. //Our core tuning variables are the damping ratio and natural frequency. //Our runtime used variables are softness and an error reduction feedback scale.. //(For the following, I'll use the ODE terms CFM and ERP, constraint force mixing and error reduction parameter.) //So first, we need to get from damping ratio and natural frequency to stiffness and damping spring constants. //From there, we'll go to CFM/ERP. //Then, we'll create an expression for a softened effective mass matrix (i.e. one that takes into account the CFM term), //and an expression for the contraint force mixing term in the solve iteration. //Finally, compute ERP. //(And then some tricks.) //1) Convert from damping ratio and natural frequency to stiffness and damping constants. //The raw expressions are: //stiffness = effectiveMass * naturalFrequency^2 //damping = effectiveMass * 2 * dampingRatio * naturalFrequency //Rather than using any single object as the reference for the 'mass' term involved in this conversion, use the effective mass of the constraint. //In other words, we're dynamically picking the spring constants necessary to achieve the desired behavior for the current constraint configuration. //(See Erin Catto's presentation above for more details on this.) //(Note that this is different from BEPUphysics v1. There, users configured stiffness and damping constants. That worked okay, but people often got confused about //why constraints didn't behave the same when they changed masses. Usually it manifested as someone creating an incredibly high mass object relative to the default //stiffness/damping, and they'd post on the forum wondering why constraints were so soft. Basically, the defaults were another sneaky tuning factor to get wrong. //Since damping ratio and natural frequency define the behavior independent of the mass, this problem goes away- and it makes some other interesting things happen...) //2) Convert from stiffness and damping constants to CFM and ERP. //CFM = (stiffness * dt + damping)^-1 //ERP = (stiffness * dt) * (stiffness * dt + damping)^-1 //Or, to rephrase: //ERP = (stiffness * dt) * CFM //3) Use CFM and ERP to create a softened effective mass matrix and a force mixing term for the solve iterations. //Start with a base definition which we won't be deriving, the velocity constraint itself (stated as an equality constraint here): //This means 'world space velocity projected into constraint space should equal the velocity bias term combined with the constraint force mixing term'. //(The velocity bias term will be computed later- it's the position error scaled by the error reduction parameter, ERP. Position error is used to create a velocity motor goal.) //We're pulling back from the implementation of sequential impulses here, so rather than using the term 'accumulated impulse', we'll use 'lambda' //(which happens to be consistent with the ODE documentation covering the same topic). Lambda is impulse that satisfies the constraint. //wsv * JT = bias - lambda * CFM/dt //This can be phrased as: //currentVelocity = targetVelocity //Or: //goalVelocityChange = targetVelocity - currentVelocity //lambda = goalVelocityChange * effectiveMass //lambda = (targetVelocity - currentVelocity) * effectiveMass //lambda = (bias - lambda * CFM/dt - currentVelocity) * effectiveMass //Solving for lambda: //lambda = (bias - currentVelocity) * effectiveMass - lambda * CFM/dt * effectiveMass //lambda + lambda * CFM/dt * effectiveMass = (bias - currentVelocity) * effectiveMass //(lambda + lambda * CFM/dt * effectiveMass) * effectiveMass^-1 = bias - currentVelocity //lambda * effectiveMass^-1 + lambda * CFM/dt = bias - currentVelocity //lambda * (effectiveMass^-1 + CFM/dt) = bias - currentVelocity //lambda = (bias - currentVelocity) * (effectiveMass^-1 + CFM/dt)^-1 //lambda = (bias - wsv * JT) * (effectiveMass^-1 + CFM/dt)^-1 //In other words, we transform the velocity change (bias - wsv * JT) into the constraint-satisfying impulse, lambda, using a matrix (effectiveMass^-1 + CFM/dt)^-1. //That matrix is the softened effective mass: //softenedEffectiveMass = (effectiveMass^-1 + CFM/dt)^-1 //Here's where some trickiness occurs. (Be mindful of the distinction between the softened and unsoftened effective mass). //Start by substituting CFM into the softened effective mass definition: //CFM/dt = (stiffness * dt + damping)^-1 / dt = (dt * (stiffness * dt + damping))^-1 = (stiffness * dt^2 + damping*dt)^-1 //softenedEffectiveMass = (effectiveMass^-1 + (stiffness * dt^2 + damping * dt)^-1)^-1 //Now substitute the definitions of stiffness and damping, treating the scalar components as uniform scaling matrices of dimension equal to effectiveMass: //softenedEffectiveMass = (effectiveMass^-1 + ((effectiveMass * naturalFrequency^2) * dt^2 + (effectiveMass * 2 * dampingRatio * naturalFrequency) * dt)^-1)^-1 //Combine the inner effectiveMass coefficients, given matrix multiplication distributes over addition: //softenedEffectiveMass = (effectiveMass^-1 + (effectiveMass * (naturalFrequency^2 * dt^2) + effectiveMass * (2 * dampingRatio * naturalFrequency * dt))^-1)^-1 //softenedEffectiveMass = (effectiveMass^-1 + (effectiveMass * (naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt))^-1)^-1 //Apply the inner matrix inverse: //softenedEffectiveMass = (effectiveMass^-1 + (naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1 * effectiveMass^-1)^-1 //Once again, combine coefficients of the inner effectiveMass^-1 terms: //softenedEffectiveMass = ((1 + (naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1) * effectiveMass^-1)^-1 //Apply the inverse again: //softenedEffectiveMass = effectiveMass * (1 + (naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1)^-1 //So, to put it another way- because CFM is based on the effective mass, applying it to the effective mass results in a simple downscale. //What has been gained? Consider what happens in the solve iteration. //We take the velocity error: //velocityError = bias - accumulatedImpulse * CFM/dt - wsv * JT //and convert it to a corrective impulse with the effective mass: //impulse = (bias - accumulatedImpulse * CFM/dt - wsv * JT) * softenedEffectiveMass //The effective mass distributes over the set: //impulse = bias * softenedEffectiveMass - accumulatedImpulse * CFM/dt * softenedEffectiveMass - wsv * JT * softenedEffectiveMass //Focus on the CFM term: //-accumulatedImpulse * CFM/dt * softenedEffectiveMass //What is CFM/dt * softenedEffectiveMass? Substitute. //(stiffness * dt^2 + damping * dt)^-1 * softenedEffectiveMass //((effectiveMass * naturalFrequency^2) * dt^2 + (effectiveMass * 2 * dampingRatio * naturalFrequency * dt))^-1 * softenedEffectiveMass //Combine terms: //(effectiveMass * (naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt))^-1 * softenedEffectiveMass //Apply inverse: //(naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1 * effectiveMass^-1 * softenedEffectiveMass //Expand softened effective mass from earlier: //(naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1 * effectiveMass^-1 * effectiveMass * (1 + (naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1)^-1 //Cancel effective masses: (!) //(naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1 * (1 + (naturalFrequency^2 * dt^2 + 2 * dampingRatio * naturalFrequency * dt)^-1)^-1 //Because CFM was created from effectiveMass, the CFM/dt * effectiveMass term is actually independent of the effectiveMass! //The remaining expression is still a matrix, but fortunately it is a simple uniform scaling matrix that we can store and apply as a single scalar. //4) How do you compute ERP? //ERP = (stiffness * dt) * CFM //ERP = (stiffness * dt) * (stiffness * dt + damping)^-1 //ERP = ((effectiveMass * naturalFrequency^2) * dt) * ((effectiveMass * naturalFrequency^2) * dt + (effectiveMass * 2 * dampingRatio * naturalFrequency))^-1 //Combine denominator terms: //ERP = ((effectiveMass * naturalFrequency^2) * dt) * ((effectiveMass * (naturalFrequency^2 * dt + 2 * dampingRatio * naturalFrequency))^-1 //Apply denominator inverse: //ERP = ((effectiveMass * naturalFrequency^2) * dt) * (naturalFrequency^2 * dt + 2 * dampingRatio * naturalFrequency)^-1 * effectiveMass^-1 //Uniform scaling matrices commute: //ERP = (naturalFrequency^2 * dt) * effectiveMass * effectiveMass^-1 * (naturalFrequency^2 * dt + 2 * dampingRatio * naturalFrequency)^-1 //Cancellation! //ERP = (naturalFrequency^2 * dt) * (naturalFrequency^2 * dt + 2 * dampingRatio * naturalFrequency)^-1 //ERP = (naturalFrequency * dt) * (naturalFrequency * dt + 2 * dampingRatio)^-1 //ERP is a simple scalar, independent of mass. //5) So we can compute CFM, ERP, the softened effective mass matrix, and we have an interesting shortcut on the constraint force mixing term of the solve iterations. //Is there anything more that can be done? You bet! //Let's look at the post-distribution impulse computation again: //impulse = bias * effectiveMass - accumulatedImpulse * CFM/dt * effectiveMass - wsv * JT * effectiveMass //During the solve iterations, the only quantities that vary are the accumulated impulse and world space velocities. So the rest can be precomputed. //bias * effectiveMass, //CFM/dt * effectiveMass, //JT * effectiveMass //In other words, we bypass the intermediate velocity state and go directly from source velocities to an impulse. //Note the sizes of the precomputed types above: //bias * effective mass is the same size as bias (vector with dimension equal to constrained DOFs) //CFM/dt * effectiveMass is a single scalar regardless of constrained DOFs, //JT * effectiveMass is the same size as JT //But note that we no longer need to load the effective mass! It is implicit. //The resulting computation is: //impulse = a - accumulatedImpulse * b - wsv * c //two DOF-width adds (add/subtract), one DOF-width multiply, and a 1xDOF * DOFx12 jacobian-sized transform. //Compare to; //(bias - accumulatedImpulse * CFM/dt - wsv * JT) * effectiveMass //two DOF-width adds (add/subtract), one DOF width multiply, a 1xDOF * DOFx12 jacobian-sized transform, and a 1xDOF * DOFxDOF transform. //In other words, we shave off a whole 1xDOF * DOFxDOF transform per iteration. //So, taken in isolation, this is a strict win both in terms of memory and the amount of computation. //Unfortunately, it's not quite so simple- jacobians are ALSO used to transform the impulse into world space so that it can be used to change the body velocities. //We still need to have those around. So while we no longer store the effective mass, our jacobian has sort of been duplicated. //But wait, there's more! //That process looks like: //wsv += impulse * J * M^-1 //So while we need to store something here, we can take advantage of the fact that we aren't using the jacobian anywhere else (it's replaced by the JT * effectiveMass term above). //Precompute J*M^-1, too. //So you're still loading a jacobian-sized matrix, but you don't need to load M^-1! That saves you 14 scalars. (symmetric 3x3 + 1 + symmetric 3x3 + 1) //That saves you the multiplication of (impulse * J) * M^-1, which is 6 multiplies and 6 dot products. //Note that this optimization's value depends on the number of constrained DOFs. //Net memory change, opt vs no opt, in scalars: //1DOF: costs 1x12, saves 1x1 effective mass and the 14 scalar M^-1: -3 //2DOF: costs 2x12, saves 2x2 symmetric effective mass and the 14 scalar M^-1: 7 //3DOF: costs 3x12, saves 3x3 symmetric effective mass and the 14 scalar M^-1: 16 //4DOF: costs 4x12, saves 4x4 symmetric effective mass and the 14 scalar M^-1: 24 //5DOF: costs 5x12, saves 5x5 symmetric effective mass and the 14 scalar M^-1: 31 //6DOF: costs 6x12, saves 6x6 symmetric effective mass and the 14 scalar M^-1: 37 //Net compute savings, opt vs no opt: //DOF savings = 1xDOF * DOFxDOF (DOF DOFdot products), 2 1x3 * scalar (6 multiplies), 2 1x3 * 3x3 (6 3dot products) // = (DOF*DOF multiplies + DOF*(DOF-1) adds) + (6 multiplies) + (18 multiplies + 12 adds) // = DOF*DOF + 24 multiplies, DOF*DOF-DOF + 12 adds //1DOF: 25 multiplies, 12 adds //2DOF: 28 multiplies, 14 adds //3DOF: 33 multiplies, 18 adds //4DOF: 40 multiplies, 24 adds //5DOF: 49 multiplies, 32 adds //6DOF: 60 multiplies, 42 adds //So does our 'optimization' actually do anything useful? //In 1 DOF constraints, it's often a win with no downsides. //2+ are difficult to determine. //This depends on heavily on the machine's SIMD width. You do every lane's ALU ops in parallel, but the loads are still fundamentally bound by memory bandwidth. //The loads are coherent, at least- no gathers on this stuff. But I wouldn't be surprised if 3DOF+ constraints end up being faster *without* the pretransformations on wide SIMD. //This is just something that will require case by case analysis. Constraints can have special structure which change the judgment. //(Also, note that large DOF jacobians are often very sparse. Consider the jacobians used by a 6DOF weld joint. You could likely do special case optimizations to reduce the //load further. It is unlikely that you could find a way to do the same to JT * effectiveMass. J * M^-1 might have some savings, though. But J*M^-1 isn't *sparser* //than J by itself, so the space savings are limited. As long as you precompute, the above load requirement offset will persist.) //Good news, though! There are a lot of constraints where this trick is applicable. //We'll start with the unsoftened effective mass, constructed from the contributions computed above: var effectiveMass = Vector <float> .One / (linearA + linearB + angularA + angularB); SpringSettingsWide.ComputeSpringiness(springSettings, dt, out var positionErrorToVelocity, out var effectiveMassCFMScale, out projection.SoftnessImpulseScale); var softenedEffectiveMass = effectiveMass * effectiveMassCFMScale; //Note that we use a bit of a hack when computing the bias velocity- even if our damping ratio/natural frequency implies a strongly springy response //that could cause a significant velocity overshoot, we apply an arbitrary clamping value to keep it reasonable. //This is useful for a variety of inequality constraints (like contacts) because you don't always want them behaving as true springs. var biasVelocity = Vector.Min(positionError * positionErrorToVelocity, maximumRecoveryVelocity); projection.BiasImpulse = biasVelocity * softenedEffectiveMass; //Precompute the wsv * (JT * softenedEffectiveMass) term. //Note that we store it in a Vector3Wide as if it's a row vector, but this is really a column (because JT is a column vector). //So we're really storing (JT * softenedEffectiveMass)T = softenedEffectiveMassT * J. //Since this constraint is 1DOF, the softenedEffectiveMass is a scalar and the order doesn't matter. //In the solve iterations, the WSVtoCSI term will be transposed during transformation, //resulting in the proper wsv * (softenedEffectiveMassT * J)T = wsv * (JT * softenedEffectiveMass). //You'll see this pattern repeated in higher DOF constraints. We explicitly compute softenedEffectiveMassT * J, and then apply the transpose in the solves. //(Why? Because creating a Matrix3x2 and Matrix2x3 and 4x3 and 3x4 and 5x3 and 3x5 and so on just doubles the number of representations with little value.) Vector3Wide.Scale(jacobians.LinearA, softenedEffectiveMass, out projection.WSVtoCSILinearA); Vector3Wide.Scale(jacobians.AngularA, softenedEffectiveMass, out projection.WSVtoCSIAngularA); Vector3Wide.Scale(jacobians.LinearB, softenedEffectiveMass, out projection.WSVtoCSILinearB); Vector3Wide.Scale(jacobians.AngularB, softenedEffectiveMass, out projection.WSVtoCSIAngularB); }