/// <summary>
        /// Linear algebraic matrix-matrix multiplication; <tt>C = alpha * A x B + beta*C</tt>.
        /// </summary>
        /// <param name="b">
        /// The econd source matrix.
        /// </param>
        /// <param name="c">
        /// The matrix where results are to be stored. Set this parameter to <tt>null</tt> to indicate that a new result matrix shall be constructed.
        /// </param>
        /// <param name="alpha">
        /// The alpha.
        /// </param>
        /// <param name="beta">
        /// The beta.
        /// </param>
        /// <param name="transposeA">
        /// Whether A must be transposed.
        /// </param>
        /// <param name="transposeB">
        /// Whether B must be transposed.
        /// </param>
        /// <returns>
        /// C (for convenience only).
        /// </returns>
        /// <exception cref="ArgumentOutOfRangeException">
        /// If <tt>B.rows() != A.columns()</tt>.
        /// </exception>
        /// <exception cref="ArgumentException">
        /// If <tt>C.rows() != A.rows() || C.columns() != B.columns()</tt>.
        /// </exception>
        /// <exception cref="ArithmeticException">
        /// If <tt>A == C || B == C</tt>.
        /// </exception>
        public override DoubleMatrix2D ZMult(DoubleMatrix2D b, DoubleMatrix2D c, double alpha, double beta, bool transposeA, bool transposeB)
        {
            // overriden for performance only
            if (transposeA)
            {
                return(ViewDice().ZMult(b, c, alpha, beta, false, transposeB));
            }
            if (b is SparseDoubleMatrix2D)
            {
                // exploit quick sparse mult
                // A*B = (B' * A')'
                if (c == null)
                {
                    return(b.ZMult(this, null, alpha, beta, !transposeB, true).ViewDice());
                }

                b.ZMult(this, c.ViewDice(), alpha, beta, !transposeB, true);
                return(c);
            }

            if (transposeB)
            {
                return(ZMult(b.ViewDice(), c, alpha, beta, false, false));
            }

            int m = Rows;
            int n = Columns;
            int p = b.Columns;

            if (c == null)
            {
                c = new DenseDoubleMatrix2D(m, p);
            }
            if (!(c is DenseDoubleMatrix2D))
            {
                return(base.ZMult(b, c, alpha, beta, false, false));
            }
            if (b.Rows != n)
            {
                throw new ArgumentOutOfRangeException("b", "Matrix2D inner dimensions must agree:" + this + ", " + b);
            }
            if (c.Rows != m || c.Columns != p)
            {
                throw new ArgumentException("Incompatible result matrix: " + this + ", " + b + ", " + c);
            }
            if (this == c || b == c)
            {
                throw new ArgumentException("Matrices must not be identical");
            }

            var bb = (DenseDoubleMatrix2D)b;
            var cc = (DenseDoubleMatrix2D)c;

            double[] aElems = Elements;
            double[] bElems = bb.Elements;
            double[] cElems = cc.Elements;
            if (aElems == null || bElems == null || cElems == null)
            {
                throw new ApplicationException();
            }

            int cA = ColumnStride;
            int cB = bb.ColumnStride;
            int cC = cc.ColumnStride;

            int rA = RowStride;
            int rB = bb.RowStride;
            int rC = cc.RowStride;

            /*
             * A is blocked to hide memory latency
             *      xxxxxxx B
             *      xxxxxxx
             *      xxxxxxx
             * A
             * xxx     xxxxxxx C
             * xxx     xxxxxxx
             * ---     -------
             * xxx     xxxxxxx
             * xxx     xxxxxxx
             * ---     -------
             * xxx     xxxxxxx
             */
            const int BLOCK_SIZE = 30000;
            int       m_optimal  = (BLOCK_SIZE - n) / (n + 1);

            if (m_optimal <= 0)
            {
                m_optimal = 1;
            }
            int blocks = m / m_optimal;
            int rr     = 0;

            if (m % m_optimal != 0)
            {
                blocks++;
            }
            for (; --blocks >= 0;)
            {
                int jB     = bb.Index(0, 0);
                int indexA = Index(rr, 0);
                int jC     = cc.Index(rr, 0);
                rr += m_optimal;
                if (blocks == 0)
                {
                    m_optimal += m - rr;
                }

                for (int j = p; --j >= 0;)
                {
                    int iA = indexA;
                    int iC = jC;
                    for (int i = m_optimal; --i >= 0;)
                    {
                        int    kA = iA;
                        int    kB = jB;
                        double s  = 0;

                        /*
                         * // not unrolled:
                         * for (int k = n; --k >= 0;) {
                         *  //s += getQuick(i,k) * B.getQuick(k,j);
                         *  s += AElems[kA] * BElems[kB];
                         *  kB += rB;
                         *  kA += cA;
                         * }
                         */

                        // loop unrolled
                        kA -= cA;
                        kB -= rB;

                        for (int k = n % 4; --k >= 0;)
                        {
                            s += aElems[kA += cA] * bElems[kB += rB];
                        }
                        for (int k = n / 4; --k >= 0;)
                        {
                            s += (aElems[kA += cA] * bElems[kB += rB]) +
                                 (aElems[kA += cA] * bElems[kB += rB]) +
                                 (aElems[kA += cA] * bElems[kB += rB]) +
                                 (aElems[kA += cA] * bElems[kB += rB]);
                        }

                        cElems[iC] = (alpha * s) + (beta * cElems[iC]);
                        iA        += rA;
                        iC        += rC;
                    }

                    jB += cB;
                    jC += cC;
                }
            }

            return(c);
        }
        public override DoubleMatrix2D ZMult(DoubleMatrix2D B, DoubleMatrix2D C, double alpha, double beta, Boolean transposeA, Boolean transposeB)
        {
            if (transposeB)
            {
                B = B.ViewDice();
            }
            int m = Rows;
            int n = Columns;

            if (transposeA)
            {
                m = Columns;
                n = Rows;
            }
            int     p      = B.Columns;
            Boolean ignore = (C == null);

            if (C == null)
            {
                C = new DenseDoubleMatrix2D(m, p);
            }

            if (B.Rows != n)
            {
                throw new ArgumentException("Matrix2D inner dimensions must agree:" + ToStringShort() + ", " + (transposeB ? B.ViewDice() : B).ToStringShort());
            }
            if (C.Rows != m || C.Columns != p)
            {
                throw new ArgumentException("Incompatibel result matrix: " + ToStringShort() + ", " + (transposeB ? B.ViewDice() : B).ToStringShort() + ", " + C.ToStringShort());
            }
            if (this == C || B == C)
            {
                throw new ArgumentException("Matrices must not be identical");
            }

            if (!ignore)
            {
                C.Assign(F1.Mult(beta));
            }

            // cache views
            DoubleMatrix1D[] Brows = new DoubleMatrix1D[n];
            for (int i = n; --i >= 0;)
            {
                Brows[i] = B.ViewRow(i);
            }
            DoubleMatrix1D[] Crows = new DoubleMatrix1D[m];
            for (int i = m; --i >= 0;)
            {
                Crows[i] = C.ViewRow(i);
            }


            ForEachNonZero(
                new Cern.Colt.Function.IntIntDoubleFunction((i, j, value) =>
            {
                var fun = F2.PlusMult(value * alpha);
                //fun.multiplicator = value * alpha;
                if (!transposeA)
                {
                    Crows[i].Assign(Brows[j], fun);
                }
                else
                {
                    Crows[j].Assign(Brows[i], fun);
                }
                return(value);
            }
                                                            ));

            return(C);
        }
        public override DoubleMatrix2D ZMult(DoubleMatrix2D B, DoubleMatrix2D C, double alpha, double beta, Boolean transposeA, Boolean transposeB)
        {
            if (transposeB)
            {
                B = B.ViewDice();
            }
            int m = Rows;
            int n = Columns;

            if (transposeA)
            {
                m = Columns;
                n = Rows;
            }
            int     p      = B.Columns;
            Boolean ignore = (C == null);

            if (C == null)
            {
                C = new DenseDoubleMatrix2D(m, p);
            }

            if (B.Rows != n)
            {
                throw new ArgumentException(String.Format(Cern.LocalizedResources.Instance().Exception_Matrix2DInnerDimensionMustAgree, ToStringShort(), (transposeB ? B.ViewDice() : B).ToStringShort()));
            }
            if (C.Rows != m || C.Columns != p)
            {
                throw new ArgumentException(String.Format(Cern.LocalizedResources.Instance().Exception_IncompatibleResultMatrix, ToStringShort(), (transposeB ? B.ViewDice() : B).ToStringShort(), C.ToStringShort()));
            }
            if (this == C || B == C)
            {
                throw new ArgumentException(Cern.LocalizedResources.Instance().Exception_MatricesMustNotBeIdentical);
            }

            if (!ignore)
            {
                C.Assign(F1.Mult(beta));
            }

            // cache views
            DoubleMatrix1D[] Brows = new DoubleMatrix1D[n];
            for (int i = n; --i >= 0;)
            {
                Brows[i] = B.ViewRow(i);
            }
            DoubleMatrix1D[] Crows = new DoubleMatrix1D[m];
            for (int i = m; --i >= 0;)
            {
                Crows[i] = C.ViewRow(i);
            }


            ForEachNonZero(
                new Cern.Colt.Function.IntIntDoubleFunction((i, j, value) =>
            {
                var fun = F2.PlusMult(value * alpha);
                //fun.multiplicator = value * alpha;
                if (!transposeA)
                {
                    Crows[i].Assign(Brows[j], fun);
                }
                else
                {
                    Crows[j].Assign(Brows[i], fun);
                }
                return(value);
            }
                                                            ));

            return(C);
        }
        public override DoubleMatrix2D ZMult(DoubleMatrix2D B, DoubleMatrix2D C, double alpha, double beta, Boolean transposeA, Boolean transposeB)
        {
            if (transposeB)
            {
                B = B.ViewDice();
            }
            int m = Rows;
            int n = Columns;

            if (transposeA)
            {
                m = Columns;
                n = Rows;
            }
            int     p      = B.Columns;
            Boolean ignore = (C == null);

            if (C == null)
            {
                C = new DenseDoubleMatrix2D(m, p);
            }

            if (B.Rows != n)
            {
                throw new ArgumentException(String.Format(Cern.LocalizedResources.Instance().Exception_Matrix2DInnerDimensionMustAgree, ToStringShort(), (transposeB ? B.ViewDice() : B).ToStringShort()));
            }
            if (C.Rows != m || C.Columns != p)
            {
                throw new ArgumentException(String.Format(Cern.LocalizedResources.Instance().Exception_IncompatibleResultMatrix, ToStringShort(), (transposeB ? B.ViewDice() : B).ToStringShort(), C.ToStringShort()));
            }
            if (this == C || B == C)
            {
                throw new ArgumentException(Cern.LocalizedResources.Instance().Exception_MatricesMustNotBeIdentical);
            }

            if (!ignore)
            {
                C.Assign(F1.Mult(beta));
            }

            // cache views
            DoubleMatrix1D[] Brows = new DoubleMatrix1D[n];
            for (int i = n; --i >= 0;)
            {
                Brows[i] = B.ViewRow(i);
            }
            DoubleMatrix1D[] Crows = new DoubleMatrix1D[m];
            for (int i = m; --i >= 0;)
            {
                Crows[i] = C.ViewRow(i);
            }

            int[]    idx  = Indexes.ToArray();
            double[] vals = Values.ToArray();
            for (int i = Starts.Length - 1; --i >= 0;)
            {
                int low = Starts[i];
                for (int k = Starts[i + 1]; --k >= low;)
                {
                    int j   = idx[k];
                    var fun = F2.PlusMult(vals[k] * alpha);
                    //fun.Multiplicator = vals[k] * alpha;
                    if (!transposeA)
                    {
                        Crows[i].Assign(Brows[j], fun);
                    }
                    else
                    {
                        Crows[j].Assign(Brows[i], fun);
                    }
                }
            }

            return(C);
        }