public override bool Solve(ref idMatX o_m, ref idVecX o_x, ref idVecX o_b, ref idVecX o_lo, ref idVecX o_hi, int[] o_boxIndex)
        {
            // true when the matrix rows are 16 byte padded
            padded = (((o_m.GetNumRows() + 3) & ~3) == o_m.GetNumColumns());
            Debug.Assert(padded || o_m.GetNumRows() == o_m.GetNumColumns());
            Debug.Assert(o_x.GetSize() == o_m.GetNumRows());
            Debug.Assert(o_b.GetSize() == o_m.GetNumRows());
            Debug.Assert(o_lo.GetSize() == o_m.GetNumRows());
            Debug.Assert(o_hi.GetSize() == o_m.GetNumRows());

            // allocate memory for permuted input
            f.SetData(o_m.GetNumRows(), VECX_ALLOCA(o_m.GetNumRows()));
            a.SetData(o_b.GetSize(), VECX_ALLOCA(o_b.GetSize()));
            b.SetData(o_b.GetSize(), VECX_ALLOCA(o_b.GetSize()));
            lo.SetData(o_lo.GetSize(), VECX_ALLOCA(o_lo.GetSize()));
            hi.SetData(o_hi.GetSize(), VECX_ALLOCA(o_hi.GetSize()));
            if (o_boxIndex != null)
            {
                boxIndex = new int[o_x.GetSize()];
                Array.Copy(boxIndex, o_boxIndex, o_x.GetSize());
            }
            else
            {
                boxIndex = null;
            }
            // we override the const on o_m here but on exit the matrix is unchanged
            m.SetData(o_m.GetNumRows(), o_m.GetNumColumns(), (float[])o_m[0]);
            f.Zero();
            a.Zero();
            b  = o_b;
            lo = o_lo;
            hi = o_hi;
            // pointers to the rows of m
            rowPtrs = new float[m.GetNumRows()][];
            for (int i = 0; i < m.GetNumRows(); i++)
            {
                rowPtrs[i] = m[i];
            }
            // tells if a variable is at the low boundary, high boundary or inbetween
            side = new int[m.GetNumRows()];
            // index to keep track of the permutation
            permuted = new int[m.GetNumRows()];
            for (int i = 0; i < m.GetNumRows(); i++)
            {
                permuted[i] = i;
            }
            // permute input so all unbounded variables come first
            numUnbounded = 0;
            for (int i = 0; i < m.GetNumRows(); i++)
            {
                if (lo[i] == -float.NegativeInfinity && hi[i] == float.PositiveInfinity)
                {
                    if (numUnbounded != i)
                    {
                        Swap(numUnbounded, i);
                    }
                    numUnbounded++;
                }
            }
            // permute input so all variables using the boxIndex come last
            int boxStartIndex = m.GetNumRows();

            if (boxIndex != null)
            {
                for (int i = m.GetNumRows() - 1; i >= numUnbounded; i--)
                {
                    if (boxIndex[i] >= 0 && (lo[i] != float.NegativeInfinity || hi[i] != float.PositiveInfinity))
                    {
                        boxStartIndex--;
                        if (boxStartIndex != i)
                        {
                            Swap(boxStartIndex, i);
                        }
                    }
                }
            }
            // sub matrix for factorization
            clamped.SetData(m.GetNumRows(), m.GetNumColumns(), MATX_ALLOCA(m.GetNumRows() * m.GetNumColumns()));
            diagonal.SetData(m.GetNumRows(), VECX_ALLOCA(m.GetNumRows()));
            // all unbounded variables are clamped
            numClamped = numUnbounded;
            // if there are unbounded variables
            if (numUnbounded != 0)
            {
                // factor and solve for unbounded variables
                if (!FactorClamped())
                {
                    idLib.common.Printf("idLCP_Square::Solve: unbounded factorization failed\n");
                    return(false);
                }
                SolveClamped(f, b.ToArray());
                // if there are no bounded variables we are done
                if (numUnbounded == m.GetNumRows())
                {
                    o_x = f; // the vector is not permuted
                    return(true);
                }
            }
#if IGNORE_UNSATISFIABLE_VARIABLES
            int numIgnored = 0;
#endif
            // allocate for delta force and delta acceleration
            delta_f.SetData(m.GetNumRows(), VECX_ALLOCA(m.GetNumRows()));
            delta_a.SetData(m.GetNumRows(), VECX_ALLOCA(m.GetNumRows()));
            //    // solve for bounded variables
            string failed = null;
            float  dot;
            for (int i = numUnbounded; i < m.GetNumRows(); i++)
            {
                // once we hit the box start index we can initialize the low and high boundaries of the variables using the box index
                if (i == boxStartIndex)
                {
                    for (int j = 0; j < boxStartIndex; j++)
                    {
                        o_x[permuted[j]] = f[j];
                    }
                    for (int j = boxStartIndex; j < m.GetNumRows(); j++)
                    {
                        float s = o_x[boxIndex[j]];
                        if (lo[j] != float.NegativeInfinity)
                        {
                            lo[j] = -idMath.Fabs(lo[j] * s);
                        }
                        if (hi[j] != float.PositiveInfinity)
                        {
                            hi[j] = idMath.Fabs(hi[j] * s);
                        }
                    }
                }
                // calculate acceleration for current variable
                SIMDProcessor.Dot(out dot, rowPtrs[i], f.ToArray(), i);
                a[i] = dot - b[i];
                // if already at the low boundary
                if (lo[i] >= -LCP_BOUND_EPSILON && a[i] >= -LCP_ACCEL_EPSILON)
                {
                    side[i] = -1; continue;
                }
                // if already at the high boundary
                if (hi[i] <= LCP_BOUND_EPSILON && a[i] <= LCP_ACCEL_EPSILON)
                {
                    side[i] = 1; continue;
                }
                // if inside the clamped region
                if (idMath.Fabs(a[i]) <= LCP_ACCEL_EPSILON)
                {
                    side[i] = 0; AddClamped(i); continue;
                }
                // drive the current variable into a valid region
                int n;
                for (n = 0; n < maxIterations; n++)
                {
                    // direction to move
                    float dir = (a[i] <= 0.0f ? 1.0f : -1.0f);
                    // calculate force delta
                    CalcForceDelta(i, dir);
                    // calculate acceleration delta: delta_a = m * delta_f;
                    CalcAccelDelta(i);
                    // maximum step we can take
                    float maxStep;
                    int   limit;
                    int   limitSide;
                    GetMaxStep(i, dir, out maxStep, out limit, out limitSide);
                    if (maxStep <= 0.0f)
                    {
#if IGNORE_UNSATISFIABLE_VARIABLES
                        // ignore the current variable completely
                        lo[i]   = hi[i] = 0.0f;
                        f[i]    = 0.0f;
                        side[i] = -1;
                        numIgnored++;
#else
                        failed = string.Format("invalid step size %.4f", maxStep);
#endif
                        break;
                    }
                    // change force
                    ChangeForce(i, maxStep);
                    // change acceleration
                    ChangeAccel(i, maxStep);
                    // clamp/unclamp the variable that limited this step
                    side[limit] = limitSide;
                    switch (limitSide)
                    {
                    case 0:
                        a[limit] = 0.0f;
                        AddClamped(limit);
                        break;

                    case -1:
                        f[limit] = lo[limit];
                        if (limit != i)
                        {
                            RemoveClamped(limit);
                        }
                        break;

                    case 1:
                        f[limit] = hi[limit];
                        if (limit != i)
                        {
                            RemoveClamped(limit);
                        }
                        break;
                    }
                    // if the current variable limited the step we can continue with the next variable
                    if (limit == i)
                    {
                        break;
                    }
                }
                if (n >= maxIterations)
                {
                    failed = string.Format("max iterations %d", maxIterations);
                    break;
                }
                if (failed != null)
                {
                    break;
                }
            }

#if IGNORE_UNSATISFIABLE_VARIABLES
            if (numIgnored > 0)
            {
                if (lcp_showFailures.GetBool())
                {
                    idLib.common.Printf("idLCP_Symmetric::Solve: %d of %d bounded variables ignored\n", numIgnored, m.GetNumRows() - numUnbounded);
                }
            }
#endif
            // if failed clear remaining forces
            if (failed != null)
            {
                if (lcp_showFailures.GetBool())
                {
                    idLib.common.Printf("idLCP_Square::Solve: %s (%d of %d bounded variables ignored)\n", failed, m.GetNumRows() - i, m.GetNumRows() - numUnbounded);
                }
                for (int j = i; j < m.GetNumRows(); j++)
                {
                    f[j] = 0.0f;
                }
            }
#if _DEBUG && false
            if (failed == null)
            {
                // test whether or not the solution satisfies the complementarity conditions
                for (int i = 0; i < m.GetNumRows(); i++)
                {
                    a[i] = -b[i];
                    for (int j = 0; j < m.GetNumRows(); j++)
                    {
                        a[i] += rowPtrs[i][j] * f[j];
                    }
                    if (f[i] == lo[i])
                    {
                        if (lo[i] != hi[i] && a[i] < -LCP_ACCEL_EPSILON)
                        {
                            int bah1 = 1;
                        }
                    }
                    else if (f[i] == hi[i])
                    {
                        if (lo[i] != hi[i] && a[i] > LCP_ACCEL_EPSILON)
                        {
                            int bah2 = 1;
                        }
                    }
                    else if (f[i] < lo[i] || f[i] > hi[i] || idMath.Fabs(a[i]) > 1.0f)
                    {
                        int bah3 = 1;
                    }
                }
            }
#endif
            // unpermute result
            for (int i = 0; i < f.GetSize(); i++)
            {
                o_x[permuted[i]] = f[i];
            }
            // unpermute original matrix
            for (int i = 0; i < m.GetNumRows(); i++)
            {
                for (int j = 0; j < m.GetNumRows(); j++)
                {
                    if (permuted[j] == i)
                    {
                        break;
                    }
                }
                if (i != j)
                {
                    m.SwapColumns(i, j);
                    idSwap(permuted[i], permuted[j]);
                }
            }
            return(true);
        }