//CONCURRENT_OMIT_END

        /**
         * @see CommonOps_DDRM#multTransB(double, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row)
         */
        public static void multTransB(double alpha, DMatrix1Row A, DMatrix1Row B, DMatrix1Row C)
        {
            UtilEjml.assertTrue(A != C && B != C, "Neither 'A' or 'B' can be the same matrix as 'C'");
            UtilEjml.assertShape(A.numCols, B.numCols, "The 'A' and 'B' matrices do not have compatible dimensions");
            C.reshape(A.numRows, B.numRows);

            //CONCURRENT_BELOW EjmlConcurrency.loopFor(0, A.numRows, xA -> {
            for (int xA = 0; xA < A.numRows; xA++)
            {
                int cIndex      = xA * B.numRows;
                int aIndexStart = xA * B.numCols;
                int end         = aIndexStart + B.numCols;
                int indexB      = 0;
                for (int xB = 0; xB < B.numRows; xB++)
                {
                    int indexA = aIndexStart;

                    double total = 0;
                    while (indexA < end)
                    {
                        total += A.data[indexA++] * B.data[indexB++];
                    }

                    C.set(cIndex++, alpha * total);
                }
            }
            //CONCURRENT_ABOVE });
        }
        /**
         * Computes the inner product of A times A and stores the results in B. The inner product is symmetric and this
         * function will only store the lower triangle. The value of the upper triangular matrix is undefined.
         *
         * <p>B = A<sup>T</sup>*A</sup>
         *
         * @param A (Input) Matrix
         * @param B (Output) Storage for output.
         */
        public static void inner_reorder_lower(DMatrix1Row A, DMatrix1Row B)
        {
            int cols = A.numCols;

            B.reshape(cols, cols);

            Arrays.Fill(B.data, 0);
            for (int i = 0; i < cols; i++)
            {
                for (int j = 0; j <= i; j++)
                {
                    B.data[i * cols + j] += A.data[i] * A.data[j];
                }

                for (int k = 1; k < A.numRows; k++)
                {
                    int    indexRow = k * cols;
                    double valI     = A.data[i + indexRow];
                    int    indexB   = i * cols;
                    for (int j = 0; j <= i; j++)
                    {
                        B.data[indexB++] += valI * A.data[indexRow++];
                    }
                }
            }
        }
        /**
         * @see CommonOps_DDRM#multTransA(double, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row)
         */
        public static void multTransA_small(double alpha, DMatrix1Row A, DMatrix1Row B, DMatrix1Row C)
        {
            UtilEjml.assertTrue(A != C && B != C, "Neither 'A' or 'B' can be the same matrix as 'C'");
            UtilEjml.assertShape(A.numRows, B.numRows, "The 'A' and 'B' matrices do not have compatible dimensions");
            C.reshape(A.numCols, B.numCols);

            //CONCURRENT_BELOW EjmlConcurrency.loopFor(0, A.numCols, i -> {
            for (int i = 0; i < A.numCols; i++)
            {
                int cIndex = i * B.numCols;
                for (int j = 0; j < B.numCols; j++)
                {
                    int indexA = i;
                    int indexB = j;
                    int end    = indexB + B.numRows * B.numCols;

                    double total = 0;
                    // loop for k
                    for (; indexB < end; indexB += B.numCols)
                    {
                        total  += A.data[indexA] * B.data[indexB];
                        indexA += A.numCols;
                    }

                    C.set(cIndex++, alpha * total);
                }
            }
            //CONCURRENT_ABOVE });
        }
        //CONCURRENT_OMIT_BEGIN

        /**
         * @see CommonOps_DDRM#mult(double, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row)
         */
        public static void mult_aux(double alpha, DMatrix1Row A, DMatrix1Row B, DMatrix1Row C, double[] aux)
        {
            UtilEjml.assertTrue(A != C && B != C, "Neither 'A' or 'B' can be the same matrix as 'C'");
            UtilEjml.assertShape(A.numCols, B.numRows, "The 'A' and 'B' matrices do not have compatible dimensions");
            C.reshape(A.numRows, B.numCols);

            if (aux == null)
            {
                aux = new double[B.numRows];
            }

            for (int j = 0; j < B.numCols; j++)
            {
                // create a copy of the column in B to avoid cache issues
                for (int k = 0; k < B.numRows; k++)
                {
                    aux[k] = B.unsafe_get(k, j);
                }

                int indexA = 0;
                for (int i = 0; i < A.numRows; i++)
                {
                    double total = 0;
                    for (int k = 0; k < B.numRows;)
                    {
                        total += A.data[indexA++] * aux[k++];
                    }
                    C.set(i * C.numCols + j, alpha * total);
                }
            }
        }
        /**
         * @see CommonOps_DDRM#mult(double, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row)
         */
        public static void mult_reorder(double alpha, DMatrix1Row A, DMatrix1Row B, DMatrix1Row C)
        {
            UtilEjml.assertTrue(A != C && B != C, "Neither 'A' or 'B' can be the same matrix as 'C'");
            UtilEjml.assertShape(A.numCols, B.numRows, "The 'A' and 'B' matrices do not have compatible dimensions");
            C.reshape(A.numRows, B.numCols);

            if (A.numCols == 0 || A.numRows == 0)
            {
                CommonOps_DDRM.fill(C, 0);
                return;
            }
            int endOfKLoop = B.numRows * B.numCols;

            //CONCURRENT_BELOW EjmlConcurrency.loopFor(0, A.numRows, i -> {
            for (int i = 0; i < A.numRows; i++)
            {
                int indexCbase = i * C.numCols;
                int indexA     = i * A.numCols;

                // need to assign C.data to a value initially
                int indexB = 0;
                int indexC = indexCbase;
                int end    = indexB + B.numCols;

                double valA = alpha * A.data[indexA++];

                while (indexB < end)
                {
                    C.set(indexC++, valA * B.data[indexB++]);
                }

                // now add to it
                while (indexB != endOfKLoop)
                { // k loop
                    indexC = indexCbase;
                    end    = indexB + B.numCols;

                    valA = alpha * A.data[indexA++];

                    while (indexB < end)
                    { // j loop
                        C.data[indexC++] += valA * B.data[indexB++];
                    }
                }
            }
            //CONCURRENT_ABOVE });
        }
        //CONCURRENT_OMIT_END

        /**
         * @see CommonOps_DDRM#multTransA(double, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row)
         */
        public static void multTransA_reorder(double alpha, DMatrix1Row A, DMatrix1Row B, DMatrix1Row C)
        {
            UtilEjml.assertTrue(A != C && B != C, "Neither 'A' or 'B' can be the same matrix as 'C'");
            UtilEjml.assertShape(A.numRows, B.numRows, "The 'A' and 'B' matrices do not have compatible dimensions");
            C.reshape(A.numCols, B.numCols);

            if (A.numCols == 0 || A.numRows == 0)
            {
                CommonOps_DDRM.fill(C, 0);
                return;
            }
            //CONCURRENT_BELOW EjmlConcurrency.loopFor(0, A.numRows, i -> {
            for (int i = 0; i < A.numCols; i++)
            {
                int indexC_start = i * C.numCols;

                // first assign R
                double valA   = alpha * A.data[i];
                int    indexB = 0;
                int    end    = indexB + B.numCols;
                int    indexC = indexC_start;
                while (indexB < end)
                {
                    C.set(indexC++, valA * B.data[indexB++]);
                }
                // now increment it
                for (int k = 1; k < A.numRows; k++)
                {
                    valA   = alpha * A.unsafe_get(k, i);
                    end    = indexB + B.numCols;
                    indexC = indexC_start;
                    // this is the loop for j
                    while (indexB < end)
                    {
                        C.data[indexC++] += valA * B.data[indexB++];
                    }
                }
            }
            //CONCURRENT_ABOVE });
        }
        //CONCURRENT_OMIT_BEGIN

        /**
         * @see CommonOps_DDRM#multTransAB(double, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row, org.ejml.data.DMatrix1Row)
         */
        public static void multTransAB_aux(double alpha, DMatrix1Row A, DMatrix1Row B, DMatrix1Row C, double[] aux)
        {
            UtilEjml.assertTrue(A != C && B != C, "Neither 'A' or 'B' can be the same matrix as 'C'");
            UtilEjml.assertShape(A.numRows, B.numCols, "The 'A' and 'B' matrices do not have compatible dimensions");
            C.reshape(A.numCols, B.numRows);

            if (aux == null)
            {
                aux = new double[A.numRows];
            }

            if (A.numCols == 0 || A.numRows == 0)
            {
                CommonOps_DDRM.fill(C, 0);
                return;
            }
            int indexC = 0;

            for (int i = 0; i < A.numCols; i++)
            {
                for (int k = 0; k < B.numCols; k++)
                {
                    aux[k] = A.unsafe_get(k, i);
                }

                for (int j = 0; j < B.numRows; j++)
                {
                    double total = 0;

                    for (int k = 0; k < B.numCols; k++)
                    {
                        total += aux[k] * B.unsafe_get(j, k);
                    }
                    C.set(indexC++, alpha * total);
                }
            }
        }