public virtual bool decompose(DMatrixRBlock orig)
        {
            if (orig.numCols != orig.numRows)
            {
                throw new ArgumentException("Input matrix must be square.");
            }

            init(orig);

            DSubmatrixD1 subA = new DSubmatrixD1(A);
            DSubmatrixD1 subV = new DSubmatrixD1(V);
            DSubmatrixD1 subU = new DSubmatrixD1(A);

            int N = orig.numCols;

            for (int i = 0; i < N; i += A.blockLength)
            {
//            Console.WriteLine("-------- triag i "+i);
                int height = Math.Min(A.blockLength, A.numRows - i);

                subA.col0 = subU.col0 = i;
                subA.row0 = subU.row0 = i;

                subU.row1 = subU.row0 + height;

                subV.col0 = i;
                subV.row1 = height;
                subV.original.reshape(subV.row1, subV.col1, false);

                // bidiagonalize the top row
                TridiagonalHelper_DDRB.tridiagUpperRow(A.blockLength, subA, gammas, subV);

                // apply Householder reflectors to the lower portion using block multiplication

                if (subU.row1 < orig.numCols)
                {
                    // take in account the 1 in the last row.  The others are skipped over.
                    double before = subU.get(A.blockLength - 1, A.blockLength);
                    subU.set(A.blockLength - 1, A.blockLength, 1);

                    // A = A + U*V^T + V*U^T
                    multPlusTransA(A.blockLength, subU, subV, subA);
                    multPlusTransA(A.blockLength, subV, subU, subA);

                    subU.set(A.blockLength - 1, A.blockLength, before);
                }
            }

            return(true);
        }
        public virtual DMatrixRBlock getQ(DMatrixRBlock Q, bool transposed)
        {
            Q = QRDecompositionHouseholder_DDRB.initializeQ(Q, A.numRows, A.numCols, A.blockLength, false);

            int height = Math.Min(A.blockLength, A.numRows);

            V.reshape(height, A.numCols, false);
            this.tmp.reshape(height, A.numCols, false);

            DSubmatrixD1 subQ = new DSubmatrixD1(Q);
            DSubmatrixD1 subU = new DSubmatrixD1(A);
            DSubmatrixD1 subW = new DSubmatrixD1(V);
            DSubmatrixD1 temp = new DSubmatrixD1(this.tmp);


            int N = A.numRows;

            int start = N - N % A.blockLength;

            if (start == N)
            {
                start -= A.blockLength;
            }
            if (start < 0)
            {
                start = 0;
            }

            // (Q1^T * (Q2^T * (Q3^t * A)))
            for (int i = start; i >= 0; i -= A.blockLength)
            {
                int blockSize = Math.Min(A.blockLength, N - i);

                subW.col0 = i;
                subW.row1 = blockSize;
                subW.original.reshape(subW.row1, subW.col1, false);

                if (transposed)
                {
                    temp.row0 = i;
                    temp.row1 = A.numCols;
                    temp.col0 = 0;
                    temp.col1 = blockSize;
                }
                else
                {
                    temp.col0 = i;
                    temp.row1 = blockSize;
                }
                temp.original.reshape(temp.row1, temp.col1, false);

                subU.col0 = i;
                subU.row0 = i;
                subU.row1 = subU.row0 + blockSize;

                // zeros and ones are saved and overwritten in U so that standard matrix multiplication can be used
                copyZeros(subU);

                // compute W for Q(i) = ( I + W*Y^T)
                TridiagonalHelper_DDRB.computeW_row(A.blockLength, subU, subW, gammas, i);

                subQ.col0 = i;
                subQ.row0 = i;

                // Apply the Qi to Q
                // Qi = I + W*U^T

                // Note that U and V are really row vectors.  but standard notation assumed they are column vectors.
                // which is why the functions called don't match the math above

                // (I + W*U^T)*Q
                // F=U^T*Q(i)
                if (transposed)
                {
                    MatrixMult_DDRB.multTransB(A.blockLength, subQ, subU, temp);
                }
                else
                {
                    MatrixMult_DDRB.mult(A.blockLength, subU, subQ, temp);
                }
                // Q(i+1) = Q(i) + W*F
                if (transposed)
                {
                    MatrixMult_DDRB.multPlus(A.blockLength, temp, subW, subQ);
                }
                else
                {
                    MatrixMult_DDRB.multPlusTransA(A.blockLength, subW, temp, subQ);
                }

                replaceZeros(subU);
            }

            return(Q);
        }