Example #1
0
        private void valueChangeEvent()
        {
            bitmapResult = BitmapOrigin.Clone() as Bitmap;
            x            = bitmapResult.Width;
            y            = bitmapResult.Height;
            Complex32[] matrix = new Complex32[x * y];
            for (int i = 0; i < x; i++)
            {
                for (int j = 0; j < y; j++)
                {
                    matrix[i * x + j] = bitmapResult.GetPixel(i, j).R;
                }
            }
            var ms = matrix.Clone();

            Fourier.Forward2D(matrix, x, y);

            //for (int i = 0; i < x; i++)
            //{
            //    for (int j = 0; j < y; j++)
            //    {
            //        var gray =  (byte)matrix[i * x + j].r
            //        bitmapResult.SetPixel(i, j, Color.FromArgb(matrix[i * x + j], matrix[i * x + j], matrix[i * x + j]));
            //    }
            //}
        }
        /// <summary>
        /// Multiplies two matrices and updates another with the result. <c>c = alpha*op(a)*op(b) + beta*c</c>
        /// </summary>
        /// <param name="transposeA">How to transpose the <paramref name="a"/> matrix.</param>
        /// <param name="transposeB">How to transpose the <paramref name="b"/> matrix.</param>
        /// <param name="alpha">The value to scale <paramref name="a"/> matrix.</param>
        /// <param name="a">The a matrix.</param>
        /// <param name="rowsA">The number of rows in the <paramref name="a"/> matrix.</param>
        /// <param name="columnsA">The number of columns in the <paramref name="a"/> matrix.</param>
        /// <param name="b">The b matrix</param>
        /// <param name="rowsB">The number of rows in the <paramref name="b"/> matrix.</param>
        /// <param name="columnsB">The number of columns in the <paramref name="b"/> matrix.</param>
        /// <param name="beta">The value to scale the <paramref name="c"/> matrix.</param>
        /// <param name="c">The c matrix.</param>
        public void MatrixMultiplyWithUpdate(Transpose transposeA, Transpose transposeB, Complex32 alpha, Complex32[] a, int rowsA, int columnsA, Complex32[] b, int rowsB, int columnsB, Complex32 beta, Complex32[] c)
        {
            // Choose nonsensical values for the number of rows in c; fill them in depending
            // on the operations on a and b.
            int rowsC;

            // First check some basic requirement on the parameters of the matrix multiplication.
            if (a == null)
            {
                throw new ArgumentNullException("a");
            }

            if (b == null)
            {
                throw new ArgumentNullException("b");
            }

            if ((int)transposeA > 111 && (int)transposeB > 111)
            {
                if (rowsA != columnsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (columnsA * rowsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                rowsC = columnsA;
            }
            else if ((int)transposeA > 111)
            {
                if (rowsA != rowsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (columnsA * columnsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                rowsC = columnsA;
            }
            else if ((int)transposeB > 111)
            {
                if (columnsA != columnsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (rowsA * rowsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                rowsC = rowsA;
            }
            else
            {
                if (columnsA != rowsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (rowsA * columnsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                rowsC = rowsA;
            }

            if (alpha.IsZero() && beta.IsZero())
            {
                Array.Clear(c, 0, c.Length);
                return;
            }

            // Check whether we will be overwriting any of our inputs and make copies if necessary.
            // TODO - we can don't have to allocate a completely new matrix when x or y point to the same memory
            // as result, we can do it on a row wise basis. We should investigate this.
            Complex32[] adata;
            if (ReferenceEquals(a, c))
            {
                adata = (Complex32[])a.Clone();
            }
            else
            {
                adata = a;
            }

            Complex32[] bdata;
            if (ReferenceEquals(b, c))
            {
                bdata = (Complex32[])b.Clone();
            }
            else
            {
                bdata = b;
            }

            if (alpha.IsOne())
            {
                if (beta.IsZero())
                {
                    if ((int)transposeA > 111 && (int)transposeB > 111)
                    {
                        CommonParallel.For(
                            0,
                            columnsA,
                            j =>
                            {
                                var jIndex = j * rowsC;
                                for (var i = 0; i != rowsB; i++)
                                {
                                    var iIndex = i * rowsA;
                                    Complex32 s = 0;
                                    for (var l = 0; l != columnsB; l++)
                                    {
                                        s += adata[iIndex + l] * bdata[(l * rowsB) + j];
                                    }

                                    c[jIndex + i] = s;
                                }
                            });
                    }
                    else if ((int)transposeA > 111)
                    {
                        CommonParallel.For(
                            0,
                            columnsB,
                            j =>
                            {
                                var jcIndex = j * rowsC;
                                var jbIndex = j * rowsB;
                                for (var i = 0; i != columnsA; i++)
                                {
                                    var iIndex = i * rowsA;
                                    Complex32 s = 0;
                                    for (var l = 0; l != rowsA; l++)
                                    {
                                        s += adata[iIndex + l] * bdata[jbIndex + l];
                                    }

                                    c[jcIndex + i] = s;
                                }
                            });
                    }
                    else if ((int)transposeB > 111)
                    {
                        CommonParallel.For(
                            0,
                            rowsB,
                            j =>
                            {
                                var jIndex = j * rowsC;
                                for (var i = 0; i != rowsA; i++)
                                {
                                    Complex32 s = 0;
                                    for (var l = 0; l != columnsA; l++)
                                    {
                                        s += adata[(l * rowsA) + i] * bdata[(l * rowsB) + j];
                                    }

                                    c[jIndex + i] = s;
                                }
                            });
                    }
                    else
                    {
                        CommonParallel.For(
                            0,
                            columnsB,
                            j =>
                            {
                                var jcIndex = j * rowsC;
                                var jbIndex = j * rowsB;
                                for (var i = 0; i != rowsA; i++)
                                {
                                    Complex32 s = 0;
                                    for (var l = 0; l != columnsA; l++)
                                    {
                                        s += adata[(l * rowsA) + i] * bdata[jbIndex + l];
                                    }

                                    c[jcIndex + i] = s;
                                }
                            });
                    }
                }
                else
                {
                    if ((int)transposeA > 111 && (int)transposeB > 111)
                    {
                        CommonParallel.For(
                            0,
                            columnsA,
                            j =>
                            {
                                var jIndex = j * rowsC;
                                for (var i = 0; i != rowsB; i++)
                                {
                                    var iIndex = i * rowsA;
                                    Complex32 s = 0;
                                    for (var l = 0; l != columnsB; l++)
                                    {
                                        s += adata[iIndex + l] * bdata[(l * rowsB) + j];
                                    }

                                    c[jIndex + i] = (c[jIndex + i] * beta) + s;
                                }
                            });
                    }
                    else if ((int)transposeA > 111)
                    {
                        CommonParallel.For(
                            0,
                            columnsB,
                            j =>
                            {
                                var jcIndex = j * rowsC;
                                var jbIndex = j * rowsB;
                                for (var i = 0; i != columnsA; i++)
                                {
                                    var iIndex = i * rowsA;
                                    Complex32 s = 0;
                                    for (var l = 0; l != rowsA; l++)
                                    {
                                        s += adata[iIndex + l] * bdata[jbIndex + l];
                                    }

                                    c[jcIndex + i] = s + (c[jcIndex + i] * beta);
                                }
                            });
                    }
                    else if ((int)transposeB > 111)
                    {
                        CommonParallel.For(
                            0,
                            rowsB,
                            j =>
                            {
                                var jIndex = j * rowsC;
                                for (var i = 0; i != rowsA; i++)
                                {
                                    Complex32 s = 0;
                                    for (var l = 0; l != columnsA; l++)
                                    {
                                        s += adata[(l * rowsA) + i] * bdata[(l * rowsB) + j];
                                    }

                                    c[jIndex + i] = s + (c[jIndex + i] * beta);
                                }
                            });
                    }
                    else
                    {
                        CommonParallel.For(
                            0,
                            columnsB,
                            j =>
                            {
                                var jcIndex = j * rowsC;
                                var jbIndex = j * rowsB;
                                for (var i = 0; i != rowsA; i++)
                                {
                                    Complex32 s = 0;
                                    for (var l = 0; l != columnsA; l++)
                                    {
                                        s += adata[(l * rowsA) + i] * bdata[jbIndex + l];
                                    }

                                    c[jcIndex + i] = s + (c[jcIndex + i] * beta);
                                }
                            });
                    }
                }
            }
            else
            {
                if ((int)transposeA > 111 && (int)transposeB > 111)
                {
                    CommonParallel.For(
                        0,
                        columnsA,
                        j =>
                        {
                            var jIndex = j * rowsC;
                            for (var i = 0; i != rowsB; i++)
                            {
                                var iIndex = i * rowsA;
                                Complex32 s = 0;
                                for (var l = 0; l != columnsB; l++)
                                {
                                    s += adata[iIndex + l] * bdata[(l * rowsB) + j];
                                }

                                c[jIndex + i] = (c[jIndex + i] * beta) + (alpha * s);
                            }
                        });
                }
                else if ((int)transposeA > 111)
                {
                    CommonParallel.For(
                        0,
                        columnsB,
                        j =>
                        {
                            var jcIndex = j * rowsC;
                            var jbIndex = j * rowsB;
                            for (var i = 0; i != columnsA; i++)
                            {
                                var iIndex = i * rowsA;
                                Complex32 s = 0;
                                for (var l = 0; l != rowsA; l++)
                                {
                                    s += adata[iIndex + l] * bdata[jbIndex + l];
                                }

                                c[jcIndex + i] = (alpha * s) + (c[jcIndex + i] * beta);
                            }
                        });
                }
                else if ((int)transposeB > 111)
                {
                    CommonParallel.For(
                        0,
                        rowsB,
                        j =>
                        {
                            var jIndex = j * rowsC;
                            for (var i = 0; i != rowsA; i++)
                            {
                                Complex32 s = 0;
                                for (var l = 0; l != columnsA; l++)
                                {
                                    s += adata[(l * rowsA) + i] * bdata[(l * rowsB) + j];
                                }

                                c[jIndex + i] = (alpha * s) + (c[jIndex + i] * beta);
                            }
                        });
                }
                else
                {
                    CommonParallel.For(
                        0,
                        columnsB,
                        j =>
                        {
                            var jcIndex = j * rowsC;
                            var jbIndex = j * rowsB;
                            for (var i = 0; i != rowsA; i++)
                            {
                                Complex32 s = 0;
                                for (var l = 0; l != columnsA; l++)
                                {
                                    s += adata[(l * rowsA) + i] * bdata[jbIndex + l];
                                }

                                c[jcIndex + i] = (alpha * s) + (c[jcIndex + i] * beta);
                            }
                        });
                }
            }
        }
        /// <summary>
        /// Multiples two matrices. <c>result = x * y</c>
        /// </summary>
        /// <param name="x">The x matrix.</param>
        /// <param name="rowsX">The number of rows in the x matrix.</param>
        /// <param name="columnsX">The number of columns in the x matrix.</param>
        /// <param name="y">The y matrix.</param>
        /// <param name="rowsY">The number of rows in the y matrix.</param>
        /// <param name="columnsY">The number of columns in the y matrix.</param>
        /// <param name="result">Where to store the result of the multiplication.</param>
        /// <remarks>This is a simplified version of the BLAS GEMM routine with alpha
        /// set to 1.0 and beta set to 0.0, and x and y are not transposed.</remarks>
        public void MatrixMultiply(Complex32[] x, int rowsX, int columnsX, Complex32[] y, int rowsY, int columnsY, Complex32[] result)
        {
            // First check some basic requirement on the parameters of the matrix multiplication.
            if (x == null)
            {
                throw new ArgumentNullException("x");
            }

            if (y == null)
            {
                throw new ArgumentNullException("y");
            }

            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            if (rowsX * columnsX != x.Length)
            {
                throw new ArgumentException("x.Length != xRows * xColumns");
            }

            if (rowsY * columnsY != y.Length)
            {
                throw new ArgumentException("y.Length != yRows * yColumns");
            }

            if (columnsX != rowsY)
            {
                throw new ArgumentException("xColumns != yRows");
            }

            if (rowsX * columnsY != result.Length)
            {
                throw new ArgumentException("xRows * yColumns != result.Length");
            }

            // Check whether we will be overwriting any of our inputs and make copies if necessary.
            // TODO - we can don't have to allocate a completely new matrix when x or y point to the same memory
            // as result, we can do it on a row wise basis. We should investigate this.
            Complex32[] xdata;
            if (ReferenceEquals(x, result))
            {
                xdata = (Complex32[])x.Clone();
            }
            else
            {
                xdata = x;
            }

            Complex32[] ydata;
            if (ReferenceEquals(y, result))
            {
                ydata = (Complex32[])y.Clone();
            }
            else
            {
                ydata = y;
            }

            // Start the actual matrix multiplication.
            // TODO - For small matrices we should get rid of the parallelism because of startup costs.
            // Perhaps the following implementations would be a good one
            // http://blog.feradz.com/2009/01/cache-efficient-matrix-multiplication/
            MatrixMultiplyWithUpdate(Transpose.DontTranspose, Transpose.DontTranspose, Complex32.One, xdata, rowsX, columnsX, ydata, rowsY, columnsY, Complex32.Zero, result);
        }
        /// <summary>
        /// Multiplies two matrices and updates another with the result. <c>c = alpha*op(a)*op(b) + beta*c</c>
        /// </summary>
        /// <param name="transposeA">How to transpose the <paramref name="a"/> matrix.</param>
        /// <param name="transposeB">How to transpose the <paramref name="b"/> matrix.</param>
        /// <param name="alpha">The value to scale <paramref name="a"/> matrix.</param>
        /// <param name="a">The a matrix.</param>
        /// <param name="rowsA">The number of rows in the <paramref name="a"/> matrix.</param>
        /// <param name="columnsA">The number of columns in the <paramref name="a"/> matrix.</param>
        /// <param name="b">The b matrix</param>
        /// <param name="rowsB">The number of rows in the <paramref name="b"/> matrix.</param>
        /// <param name="columnsB">The number of columns in the <paramref name="b"/> matrix.</param>
        /// <param name="beta">The value to scale the <paramref name="c"/> matrix.</param>
        /// <param name="c">The c matrix.</param>
        public virtual void MatrixMultiplyWithUpdate(Transpose transposeA, Transpose transposeB, Complex32 alpha, Complex32[] a, int rowsA, int columnsA, Complex32[] b, int rowsB, int columnsB, Complex32 beta, Complex32[] c)
        {
            int m; // The number of rows of matrix op(A) and of the matrix C.
            int n; // The number of columns of matrix op(B) and of the matrix C.
            int k; // The number of columns of matrix op(A) and the rows of the matrix op(B).

            // First check some basic requirement on the parameters of the matrix multiplication.
            if (a == null)
            {
                throw new ArgumentNullException("a");
            }

            if (b == null)
            {
                throw new ArgumentNullException("b");
            }

            if ((int)transposeA > 111 && (int)transposeB > 111)
            {
                if (rowsA != columnsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (columnsA * rowsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                m = columnsA;
                n = rowsB;
                k = rowsA;
            }
            else if ((int)transposeA > 111)
            {
                if (rowsA != rowsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (columnsA * columnsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                m = columnsA;
                n = columnsB;
                k = rowsA;
            }
            else if ((int)transposeB > 111)
            {
                if (columnsA != columnsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (rowsA * rowsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                m = rowsA;
                n = rowsB;
                k = columnsA;
            }
            else
            {
                if (columnsA != rowsB)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (rowsA * columnsB != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                m = rowsA;
                n = columnsB;
                k = columnsA;
            }

            if (alpha.IsZero() && beta.IsZero())
            {
                Array.Clear(c, 0, c.Length);
                return;
            }

            // Check whether we will be overwriting any of our inputs and make copies if necessary.
            // TODO - we can don't have to allocate a completely new matrix when x or y point to the same memory
            // as result, we can do it on a row wise basis. We should investigate this.
            Complex32[] adata;
            if (ReferenceEquals(a, c))
            {
                adata = (Complex32[])a.Clone();
            }
            else
            {
                adata = a;
            }

            Complex32[] bdata;
            if (ReferenceEquals(b, c))
            {
                bdata = (Complex32[])b.Clone();
            }
            else
            {
                bdata = b;
            }

            if (beta.IsZero())
            {
                Array.Clear(c, 0, c.Length);
            }
            else if (!beta.IsOne())
            {
                Control.LinearAlgebraProvider.ScaleArray(beta, c, c);
            }

            if (alpha.IsZero())
            {
                return;
            }

            CacheObliviousMatrixMultiply(transposeA, transposeB, alpha, adata, 0, 0, bdata, 0, 0, c, 0, 0, m, n, k, m, n, k, true);
        }
        /// <summary>
        /// Multiples two matrices. <c>result = x * y</c>
        /// </summary>
        /// <param name="x">The x matrix.</param>
        /// <param name="rowsX">The number of rows in the x matrix.</param>
        /// <param name="columnsX">The number of columns in the x matrix.</param>
        /// <param name="y">The y matrix.</param>
        /// <param name="rowsY">The number of rows in the y matrix.</param>
        /// <param name="columnsY">The number of columns in the y matrix.</param>
        /// <param name="result">Where to store the result of the multiplication.</param>
        /// <remarks>This is a simplified version of the BLAS GEMM routine with alpha
        /// set to 1.0 and beta set to 0.0, and x and y are not transposed.</remarks>
        public virtual void MatrixMultiply(Complex32[] x, int rowsX, int columnsX, Complex32[] y, int rowsY, int columnsY, Complex32[] result)
        {
            // First check some basic requirement on the parameters of the matrix multiplication.
            if (x == null)
            {
                throw new ArgumentNullException("x");
            }

            if (y == null)
            {
                throw new ArgumentNullException("y");
            }

            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            if (rowsX * columnsX != x.Length)
            {
                throw new ArgumentException("x.Length != xRows * xColumns");
            }

            if (rowsY * columnsY != y.Length)
            {
                throw new ArgumentException("y.Length != yRows * yColumns");
            }

            if (columnsX != rowsY)
            {
                throw new ArgumentException("xColumns != yRows");
            }

            if (rowsX * columnsY != result.Length)
            {
                throw new ArgumentException("xRows * yColumns != result.Length");
            }

            // Check whether we will be overwriting any of our inputs and make copies if necessary.
            // TODO - we can don't have to allocate a completely new matrix when x or y point to the same memory
            // as result, we can do it on a row wise basis. We should investigate this.
            Complex32[] xdata;
            if (ReferenceEquals(x, result))
            {
                xdata = (Complex32[])x.Clone();
            }
            else
            {
                xdata = x;
            }

            Complex32[] ydata;
            if (ReferenceEquals(y, result))
            {
                ydata = (Complex32[])y.Clone();
            }
            else
            {
                ydata = y;
            }

            MatrixMultiplyWithUpdate(Transpose.DontTranspose, Transpose.DontTranspose, Complex32.One, xdata, rowsX, columnsX, ydata, rowsY, columnsY, Complex32.Zero, result);
        }
        /// <summary>
        /// Multiplies two matrices and updates another with the result. <c>c = alpha*op(a)*op(b) + beta*c</c>
        /// </summary>
        /// <param name="transposeA">How to transpose the <paramref name="a"/> matrix.</param>
        /// <param name="transposeB">How to transpose the <paramref name="b"/> matrix.</param>
        /// <param name="alpha">The value to scale <paramref name="a"/> matrix.</param>
        /// <param name="a">The a matrix.</param>
        /// <param name="aRows">The number of rows in the <paramref name="a"/> matrix.</param>
        /// <param name="aColumns">The number of columns in the <paramref name="a"/> matrix.</param>
        /// <param name="b">The b matrix</param>
        /// <param name="bRows">The number of rows in the <paramref name="b"/> matrix.</param>
        /// <param name="bColumns">The number of columns in the <paramref name="b"/> matrix.</param>
        /// <param name="beta">The value to scale the <paramref name="c"/> matrix.</param>
        /// <param name="c">The c matrix.</param>
        public void MatrixMultiplyWithUpdate(Transpose transposeA, Transpose transposeB, Complex32 alpha, Complex32[] a, 
            int aRows, int aColumns, Complex32[] b, int bRows, int bColumns, Complex32 beta, Complex32[] c)
        {
            // Choose nonsensical values for the number of rows and columns in c; fill them in depending
            // on the operations on a and b.
            int cRows = -1;
            int cColumns = -1;

            // First check some basic requirement on the parameters of the matrix multiplication.
            if (a == null)
            {
                throw new ArgumentNullException("a");
            }

            if (b == null)
            {
                throw new ArgumentNullException("b");
            }

            if ((int)transposeA > 111 && (int)transposeB > 111)
            {
                if (aRows != bColumns)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (aColumns * bRows != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                cRows = aColumns;
                cColumns = bRows;
            }
            else if ((int)transposeA > 111)
            {
                if (aRows != bRows)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (aColumns * bColumns != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                cRows = aColumns;
                cColumns = bColumns;
            }
            else if ((int)transposeB > 111)
            {
                if (aColumns != bColumns)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (aRows * bRows != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                cRows = aRows;
                cColumns = bRows;
            }
            else
            {
                if (aColumns != bRows)
                {
                    throw new ArgumentOutOfRangeException();
                }

                if (aRows * bColumns != c.Length)
                {
                    throw new ArgumentOutOfRangeException();
                }

                cRows = aRows;
                cColumns = bColumns;
            }

            if (alpha.IsZero && beta.IsZero)
            {
                Array.Clear(c, 0, c.Length);
                return;
            }

            // Check whether we will be overwriting any of our inputs and make copies if necessary.
            // TODO - we can don't have to allocate a completely new matrix when x or y point to the same memory
            // as result, we can do it on a row wise basis. We should investigate this.
            Complex32[] adata;
            if (ReferenceEquals(a, c))
            {
                adata = (Complex32[])a.Clone();
            }
            else
            {
                adata = a;
            }

            Complex32[] bdata;
            if (ReferenceEquals(b, c))
            {
                bdata = (Complex32[])b.Clone();
            }
            else
            {
                bdata = b;
            }

            if (alpha.IsOne)
            {
                if (beta.IsZero)
                {
                    if ((int)transposeA > 111 && (int)transposeB > 111)
                    {
                        Parallel.For(0, aColumns, j =>
                        {
                            int jIndex = j * cRows;
                            for (int i = 0; i != bRows; i++)
                            {
                                int iIndex = i * aRows;
                                Complex32 s = 0;
                                for (int l = 0; l != bColumns; l++)
                                {
                                    s += adata[iIndex + l] * bdata[l * bRows + j];
                                }
                                c[jIndex + i] = s;
                            }
                        });
                    }
                    else if ((int)transposeA > 111)
                    {
                        Parallel.For(0, bColumns, j =>
                        {
                            int jcIndex = j * cRows;
                            int jbIndex = j * bRows;
                            for (int i = 0; i != aColumns; i++)
                            {
                                int iIndex = i * aRows;
                                Complex32 s = 0;
                                for (int l = 0; l != aRows; l++)
                                {
                                    s += adata[iIndex + l] * bdata[jbIndex + l];
                                }
                                c[jcIndex + i] = s;
                            }
                        });
                    }
                    else if ((int)transposeB > 111)
                    {
                        Parallel.For(0, bRows, j =>
                        {
                            int jIndex = j * cRows;
                            for (int i = 0; i != aRows; i++)
                            {
                                Complex32 s = 0;
                                for (int l = 0; l != aColumns; l++)
                                {
                                    s += adata[l * aRows + i] * bdata[l * bRows + j];
                                }
                                c[jIndex + i] = s;
                            }
                        });
                    }
                    else
                    {
                        Parallel.For(0, bColumns, j =>
                        {
                            int jcIndex = j * cRows;
                            int jbIndex = j * bRows;
                            for (int i = 0; i != aRows; i++)
                            {
                                Complex32 s = 0;
                                for (int l = 0; l != aColumns; l++)
                                {
                                    s += adata[l * aRows + i] * bdata[jbIndex + l];
                                }
                                c[jcIndex + i] = s;
                            }
                        });
                    }
                }
                else
                {
                    if ((int)transposeA > 111 && (int)transposeB > 111)
                    {
                        Parallel.For(0, aColumns, j =>
                        {
                            int jIndex = j * cRows;
                            for (int i = 0; i != bRows; i++)
                            {
                                int iIndex = i * aRows;
                                Complex32 s = 0;
                                for (int l = 0; l != bColumns; l++)
                                {
                                    s += adata[iIndex + l] * bdata[l * bRows + j];
                                }
                                c[jIndex + i] = c[jIndex + i] * beta + s;
                            }
                        });
                    }
                    else if ((int)transposeA > 111)
                    {
                        Parallel.For(0, bColumns, j =>
                        {
                            int jcIndex = j * cRows;
                            int jbIndex = j * bRows;
                            for (int i = 0; i != aColumns; i++)
                            {
                                int iIndex = i * aRows;
                                Complex32 s = 0;
                                for (int l = 0; l != aRows; l++)
                                {
                                    s += adata[iIndex + l] * bdata[jbIndex + l];
                                }
                                c[jcIndex + i] = s + c[jcIndex + i] * beta;
                            }
                        });
                    }
                    else if ((int)transposeB > 111)
                    {
                        Parallel.For(0, bRows, j =>
                        {
                            int jIndex = j * cRows;
                            for (int i = 0; i != aRows; i++)
                            {
                                Complex32 s = 0;
                                for (int l = 0; l != aColumns; l++)
                                {
                                    s += adata[l * aRows + i] * bdata[l * bRows + j];
                                }
                                c[jIndex + i] = s + c[jIndex + i] * beta;
                            }
                        });
                    }
                    else
                    {
                        Parallel.For(0, bColumns, j =>
                        {
                            int jcIndex = j * cRows;
                            int jbIndex = j * bRows;
                            for (int i = 0; i != aRows; i++)
                            {
                                Complex32 s = 0;
                                for (int l = 0; l != aColumns; l++)
                                {
                                    s += adata[l * aRows + i] * bdata[jbIndex + l];
                                }
                                c[jcIndex + i] = s + c[jcIndex + i] * beta;
                            }
                        });
                    }
                }
            }
            else
            {
                if ((int)transposeA > 111 && (int)transposeB > 111)
                {
                    Parallel.For(0, aColumns, j =>
                    {
                        int jIndex = j * cRows;
                        for (int i = 0; i != bRows; i++)
                        {
                            int iIndex = i * aRows;
                            Complex32 s = 0;
                            for (int l = 0; l != bColumns; l++)
                            {
                                s += adata[iIndex + l] * bdata[l * bRows + j];
                            }
                            c[jIndex + i] = c[jIndex + i] * beta + alpha * s;
                        }
                    });
                }
                else if ((int)transposeA > 111)
                {
                    Parallel.For(0, bColumns, j =>
                    {
                        int jcIndex = j * cRows;
                        int jbIndex = j * bRows;
                        for (int i = 0; i != aColumns; i++)
                        {
                            int iIndex = i * aRows;
                            Complex32 s = 0;
                            for (int l = 0; l != aRows; l++)
                            {
                                s += adata[iIndex + l] * bdata[jbIndex + l];
                            }
                            c[jcIndex + i] = alpha * s + c[jcIndex + i] * beta;
                        }
                    });
                }
                else if ((int)transposeB > 111)
                {
                    Parallel.For(0, bRows, j =>
                    {
                        int jIndex = j * cRows;
                        for (int i = 0; i != aRows; i++)
                        {
                            Complex32 s = 0;
                            for (int l = 0; l != aColumns; l++)
                            {
                                s += adata[l * aRows + i] * bdata[l * bRows + j];
                            }
                            c[jIndex + i] = alpha * s + c[jIndex + i] * beta;
                        }
                    });
                }
                else
                {
                    Parallel.For(0, bColumns, j =>
                    {
                        int jcIndex = j * cRows;
                        int jbIndex = j * bRows;
                        for (int i = 0; i != aRows; i++)
                        {
                            Complex32 s = 0;
                            for (int l = 0; l != aColumns; l++)
                            {
                                s += adata[l * aRows + i] * bdata[jbIndex + l];
                            }
                            c[jcIndex + i] = alpha * s + c[jcIndex + i] * beta;
                        }
                    });
                }
            }
        }