public void Dgemv(bool transposeA, double alpha, DoubleMatrix2D A, DoubleMatrix1D x, double beta, DoubleMatrix1D y) { /* * split A, as follows: * * x x * x * x * A * xxx x y * xxx x * --- - * xxx x * xxx x * --- - * xxx x * */ if (transposeA) { Dgemv(false, alpha, A.ViewDice(), x, beta, y); return; } int m = A.Rows; int n = A.Columns; long flops = 2L * m * n; int noOfTasks = (int)System.Math.Min(flops / 30000, this.maxThreads); // each thread should process at least 30000 flops int width = A.Rows; noOfTasks = System.Math.Min(width, noOfTasks); if (noOfTasks < 2) { // parallelization doesn't pay off (too much start up overhead) seqBlas.Dgemv(transposeA, alpha, A, x, beta, y); return; } // set up concurrent tasks int span = width / noOfTasks; //FJTask[] subTasks = new FJTask[noOfTasks]; for (int i = 0; i < noOfTasks; i++) { int offset = i * span; if (i == noOfTasks - 1) { span = width - span * i; // last span may be a bit larger } // split A along rows into blocks DoubleMatrix2D AA = A.ViewPart(offset, 0, span, n); DoubleMatrix1D yy = y.ViewPart(offset, span); // subTasks[i] = new FJTask() // { // public void run() // { // seqBlas.Dgemv(transposeA, alpha, AA, x, beta, yy); // //Console.WriteLine("Hello "+offset); // } //}; Action task = (() => { seqBlas.Dgemv(transposeA, alpha, AA, x, beta, yy); }); // run tasks and wait for completion try { this.smp.TaskGroup.QueueTask(() => task()); } catch (TaskCanceledException exc) { } } }