Example #1
0
        public void Dgemm(bool transposeA, bool transposeB, double alpha, DoubleMatrix2D A, DoubleMatrix2D B, double beta, DoubleMatrix2D C)
        {
            /*
             *  determine how to split and parallelize best into blocks
             *  if more B.columns than tasks --> split B.columns, as follows:
             *
             *                  xx|xx|xxx B
             *                  xx|xx|xxx
             *                  xx|xx|xxx
             *  A
             *  xxx     xx|xx|xxx C
             *  xxx		xx|xx|xxx
             *  xxx		xx|xx|xxx
             *  xxx		xx|xx|xxx
             *  xxx		xx|xx|xxx
             *
             *  if less B.columns than tasks --> split A.rows, as follows:
             *
             *                  xxxxxxx B
             *                  xxxxxxx
             *                  xxxxxxx
             *  A
             *  xxx     xxxxxxx C
             *  xxx     xxxxxxx
             *  ---     -------
             *  xxx     xxxxxxx
             *  xxx     xxxxxxx
             *  ---     -------
             *  xxx     xxxxxxx
             *
             */
            if (transposeA)
            {
                Dgemm(false, transposeB, alpha, A.ViewDice(), B, beta, C);
                return;
            }
            if (transposeB)
            {
                Dgemm(transposeA, false, alpha, A, B.ViewDice(), beta, C);
                return;
            }
            int m = A.Rows;
            int n = A.Columns;
            int p = B.Columns;

            if (B.Rows != n)
            {
                throw new ArgumentException("Matrix2D inner dimensions must agree:" + A.ToStringShort() + ", " + B.ToStringShort());
            }
            if (C.Rows != m || C.Columns != p)
            {
                throw new ArgumentException("Incompatibel result matrix: " + A.ToStringShort() + ", " + B.ToStringShort() + ", " + C.ToStringShort());
            }
            if (A == C || B == C)
            {
                throw new ArgumentException("Matrices must not be identical");
            }

            long    flops     = 2L * m * n * p;
            int     noOfTasks = (int)System.Math.Min(flops / 30000, this.maxThreads); // each thread should process at least 30000 flops
            Boolean splitB    = (p >= noOfTasks);
            int     width     = splitB ? p : m;

            noOfTasks = System.Math.Min(width, noOfTasks);

            if (noOfTasks < 2)
            { // parallelization doesn't pay off (too much start up overhead)
                seqBlas.Dgemm(transposeA, transposeB, alpha, A, B, beta, C);
                return;
            }

            // set up concurrent tasks
            int span = width / noOfTasks;

            //FJTask[] subTasks = new FJTask[noOfTasks];


            for (int i = 0; i < noOfTasks; i++)
            {
                int offset = i * span;
                if (i == noOfTasks - 1)
                {
                    span = width - span * i;                     // last span may be a bit larger
                }
                DoubleMatrix2D AA, BB, CC;
                if (splitB)
                {
                    // split B along columns into blocks
                    AA = A;
                    BB = B.ViewPart(0, offset, n, span);
                    CC = C.ViewPart(0, offset, m, span);
                }
                else
                {
                    // split A along rows into blocks
                    AA = A.ViewPart(offset, 0, span, n);
                    BB = B;
                    CC = C.ViewPart(offset, 0, span, p);
                }

                Action task = (() =>
                {
                    seqBlas.Dgemm(transposeA, transposeB, alpha, AA, BB, beta, CC);
                });

                // run tasks and wait for completion
                try
                {
                    this.smp.TaskGroup.QueueTask(() => task());
                }
                catch (TaskCanceledException exc) { }
            }
        }
Example #2
0
        /// <summary>
        /// Linear algebraic matrix power; <i>B = A<sup>k</sup> &lt;==> B = A*A*...*A</i>.
        /// <ul>
        /// <li><i>p &gt;= 1: B = A*A*...*A</i>.</li>
        /// <li><i>p == 0: B = identity matrix</i>.</li>
        /// <li><i>p &lt;  0: B = pow(inverse(A),-p)</i>.</li>
        /// </ul>
        /// Implementation: Based on logarithms of 2, memory usage minimized.
        /// </summary>
        /// <param name="A">the source matrix; must be square; stays unaffected by this operation.</param>
        /// <param name="p">the exponent, can be any number.</param>
        /// <returns><i>B</i>, a newly constructed result matrix; storage-independent of <i>A</i>.</returns>
        ///<exception cref="ArgumentException">if <i>!property().isSquare(A)</i>.</exception>
        public static DoubleMatrix2D Pow(DoubleMatrix2D A, int p)
        {
            // matrix multiplication based on log2 method: A*A*....*A is slow, ((A * A)^2)^2 * ..D is faster
            // allocates two auxiliary matrices as work space

            IBlas blas = SmpBlas.smpBlas; // for parallel matrix mult; if not initialized defaults to sequential blas

            Property.DEFAULT.CheckSquare(A);
            if (p < 0)
            {
                A = Inverse(A);
                p = -p;
            }
            if (p == 0)
            {
                return(DoubleFactory2D.Dense.Identity(A.Rows));
            }
            DoubleMatrix2D T = A.Like(); // temporary

            if (p == 1)
            {
                return(T.Assign(A));         // safes one auxiliary matrix allocation
            }
            if (p == 2)
            {
                blas.Dgemm(false, false, 1, A, A, 0, T); // mult(A,A); // safes one auxiliary matrix allocation
                return(T);
            }

            int k = Cern.Colt.Bitvector.QuickBitVector.MostSignificantBit(p); // index of highest bit in state "true"

            /*
             * this is the naive version:
             * DoubleMatrix2D B = A.Copy();
             * for (int i=0; i<p-1; i++) {
             *  B = mult(B,A);
             * }
             * return B;
             */

            // here comes the optimized version:
            //cern.colt.Timer timer = new cern.colt.Timer().start();

            int i = 0;

            while (i <= k && (p & (1 << i)) == 0)
            {                                             // while (bit i of p == false)
              // A = mult(A,A); would allocate a lot of temporary memory
                blas.Dgemm(false, false, 1, A, A, 0, T);  // A.zMult(A,T);
                DoubleMatrix2D swap = A; A = T; T = swap; // swap A with T
                i++;
            }

            DoubleMatrix2D B = A.Copy();

            i++;
            for (; i <= k; i++)
            {
                // A = mult(A,A); would allocate a lot of temporary memory
                blas.Dgemm(false, false, 1, A, A, 0, T);  // A.zMult(A,T);
                DoubleMatrix2D swap = A; A = T; T = swap; // swap A with T

                if ((p & (1 << i)) != 0)
                {                                            // if (bit i of p == true)
                  // B = mult(B,A); would allocate a lot of temporary memory
                    blas.Dgemm(false, false, 1, B, A, 0, T); // B.zMult(A,T);
                    swap = B; B = T; T = swap;               // swap B with T
                }
            }
            //timer.stop().Display();
            return(B);
        }