internal static (__m128, __m128) Inverse(__m128 p1, __m128 p2) { // s, t computed as in the normalization __m128 b2 = Detail.dp_bc(p1, p1); __m128 s = Detail.rsqrt_nr1(b2); __m128 bc = Detail.dp_bc(_mm_xor_ps(p1, _mm_set_ss(-0f)), p2); __m128 b2Inv = Detail.rcp_nr1(b2); __m128 t = _mm_mul_ps(_mm_mul_ps(bc, b2Inv), s); __m128 neg = _mm_set_ps(-0f, -0f, -0f, 0f); // p1 * (s + t e0123)^2 = (s * p1 - t P1perp) * (s + t e0123) // = s^2 p1 - s t P1perp - s t P1perp // = s^2 p1 - 2 s t P1perp // (the scalar component above needs to be negated) // p2 * (s + t e0123)^2 = s^2 p2 NOTE: s^2 = b2_inv __m128 st = _mm_mul_ps(s, t); st = _mm_mul_ps(p1, st); p2 = _mm_sub_ps(_mm_mul_ps(p2, b2Inv), _mm_xor_ps(_mm_add_ps(st, st), _mm_set_ss(-0f))); p2 = _mm_xor_ps(p2, neg); p1 = _mm_xor_ps(_mm_mul_ps(p1, b2Inv), neg); return(p1, p2); }
internal static (__m128, __m128) Normalized(__m128 p1, __m128 p2) { // m = b + c where b is p1 and c is p2 // // m * ~m = |b|^2 + 2(b0 c0 - b1 c1 - b2 c2 - b3 c3)e0123 // // The square root is given as: // |b| + (b0 c0 - b1 c1 - b2 c2 - b3 c3)/|b| e0123 // // The inverse of this is given by: // 1/|b| + (-b0 c0 + b1 c1 + b2 c2 + b3 c3)/|b|^3 e0123 = s + t e0123 // // Multiplying our original Motor by this inverse will give us a // normalized Motor. __m128 b2 = Detail.dp_bc(p1, p1); __m128 s = Detail.rsqrt_nr1(b2); __m128 bc = Detail.dp_bc(_mm_xor_ps(p1, _mm_set_ss(-0f)), p2); __m128 t = _mm_mul_ps(_mm_mul_ps(bc, Detail.rcp_nr1(b2)), s); // (s + t e0123) * Motor = // // s b0 + // s b1 e23 + // s b2 e31 + // s b3 e12 + // (s c0 + t b0) e0123 + // (s c1 - t b1) e01 + // (s c2 - t b2) e02 + // (s c3 - t b3) e03 __m128 tmp = _mm_mul_ps(p2, s); p2 = _mm_sub_ps(tmp, _mm_xor_ps(_mm_mul_ps(p1, t), _mm_set_ss(-0f))); p1 = _mm_mul_ps(p1, s); return(p1, p2); }
internal static __m128 Normalized(__m128 p1) { __m128 invNorm = Detail.rsqrt_nr1(Detail.dp_bc(p1, p1)); return(_mm_mul_ps(p1, invNorm)); }