/// <summary> /// Calculate Cholesky step /// </summary> /// <param name="data">Factor matrix</param> /// <param name="rowDim">Number of rows</param> /// <param name="firstCol">Column start</param> /// <param name="colLimit">Total columns</param> /// <param name="multipliers">Multipliers calculated previously</param> /// <param name="availableCores">Number of available processors</param> static void DoCholeskyStep(Matrix <Complex32> data, int rowDim, int firstCol, int colLimit, Complex32[] multipliers, int availableCores) { var tmpColCount = colLimit - firstCol; if ((availableCores > 1) && (tmpColCount > 200)) { var tmpSplit = firstCol + (tmpColCount / 3); var tmpCores = availableCores / 2; CommonParallel.Invoke( () => DoCholeskyStep(data, rowDim, firstCol, tmpSplit, multipliers, tmpCores), () => DoCholeskyStep(data, rowDim, tmpSplit, colLimit, multipliers, tmpCores)); } else { for (var j = firstCol; j < colLimit; j++) { var tmpVal = multipliers[j]; for (var i = j; i < rowDim; i++) { data.At(i, j, data.At(i, j) - (multipliers[i] * tmpVal.Conjugate())); } } } }
/// <summary> /// Perform calculation of Q or R /// </summary> /// <param name="u">Work array</param> /// <param name="a">Q or R matrices</param> /// <param name="rowStart">The first row</param> /// <param name="rowDim">The last row</param> /// <param name="columnStart">The first column</param> /// <param name="columnDim">The last column</param> /// <param name="availableCores">Number of available CPUs</param> private static void ComputeQR(double[] u, Matrix <double> a, int rowStart, int rowDim, int columnStart, int columnDim, int availableCores) { if (rowDim < rowStart || columnDim < columnStart) { return; } var tmpColCount = columnDim - columnStart; if ((availableCores > 1) && (tmpColCount > 200)) { var tmpSplit = columnStart + (tmpColCount / 2); var tmpCores = availableCores / 2; CommonParallel.Invoke( () => ComputeQR(u, a, rowStart, rowDim, columnStart, tmpSplit, tmpCores), () => ComputeQR(u, a, rowStart, rowDim, tmpSplit, columnDim, tmpCores)); } else { for (var j = columnStart; j < columnDim; j++) { var scale = 0.0; for (var i = rowStart; i < rowDim; i++) { scale += u[i - rowStart] * a.At(i, j); } for (var i = rowStart; i < rowDim; i++) { a.At(i, j, a.At(i, j) - (u[i - rowStart] * scale)); } } } }
/// <summary> /// Calculate Cholesky step /// </summary> /// <param name="data">Factor matrix</param> /// <param name="rowDim">Number of rows</param> /// <param name="firstCol">Column start</param> /// <param name="colLimit">Total columns</param> /// <param name="multipliers">Multipliers calculated previously</param> /// <param name="availableCores">Number of available processors</param> private static void DoCholeskyStep(Matrix <double> data, int rowDim, int firstCol, int colLimit, double[] multipliers, int availableCores) { var tmpColCount = colLimit - firstCol; if (availableCores > 1 && tmpColCount > 200) { var tmpSplit = firstCol + tmpColCount / 3; var tmpCores = availableCores / 2; CommonParallel.Invoke( () => DoCholeskyStep(data, rowDim, firstCol, tmpSplit, multipliers, tmpCores), () => DoCholeskyStep(data, rowDim, tmpSplit, colLimit, multipliers, tmpCores)); } else { for (var j = firstCol; j < colLimit; j++) { var tmpVal = multipliers[j]; for (var i = j; i < rowDim; i++) { data.At(i, j, data.At(i, j) - multipliers[i] * tmpVal); } } } }
/// <summary> /// Calculate Cholesky step /// </summary> /// <param name="data">Factor matrix</param> /// <param name="rowDim">Number of rows</param> /// <param name="firstCol">Column start</param> /// <param name="colLimit">Total columns</param> /// <param name="multipliers">Multipliers calculated previously</param> /// <param name="availableCores">Number of available processors</param> private static void DoCholeskyStep(double[] data, int rowDim, int firstCol, int colLimit, double[] multipliers, int availableCores) { var tmpColCount = colLimit - firstCol; if (availableCores > 1 && tmpColCount > Control.ParallelizeElements) { var tmpSplit = firstCol + tmpColCount / 3; var tmpCores = availableCores / 2; CommonParallel.Invoke( () => DoCholeskyStep(data, rowDim, firstCol, tmpSplit, multipliers, tmpCores), () => DoCholeskyStep(data, rowDim, tmpSplit, colLimit, multipliers, tmpCores)); } else { for (var j = firstCol; j < colLimit; j++) { var tmpVal = multipliers[j]; for (var i = j; i < rowDim; i++) { data[j * rowDim + i] -= multipliers[i] * tmpVal; } } } }
/// <summary> /// Perform calculation of Q or R /// </summary> /// <param name="u">Work array</param> /// <param name="a">Q or R matrices</param> /// <param name="rowStart">The first row</param> /// <param name="rowDim">The last row</param> /// <param name="columnStart">The first column</param> /// <param name="columnDim">The last column</param> /// <param name="availableCores">Number of available CPUs</param> static void ComputeQR(Complex32[] u, Matrix <Complex32> a, int rowStart, int rowDim, int columnStart, int columnDim, int availableCores) { if ((rowDim < rowStart) || (columnDim < columnStart)) { return; } var tmpColCount = columnDim - columnStart; if ((availableCores > 1) && (tmpColCount > 200)) { var tmpSplit = columnStart + (tmpColCount / 2); var tmpCores = availableCores / 2; CommonParallel.Invoke( () => ComputeQR(u, a, rowStart, rowDim, columnStart, tmpSplit, tmpCores), () => ComputeQR(u, a, rowStart, rowDim, tmpSplit, columnDim, tmpCores)); } else { for (var j = columnStart; j < columnDim; j++) { var scale = Complex32.Zero; for (var i = rowStart; i < rowDim; i++) { scale += u[i - rowStart] * a.At(i, j); } for (var i = rowStart; i < rowDim; i++) { a.At(i, j, a.At(i, j) - (u[i - rowStart].Conjugate() * scale)); } } } }
static void CacheObliviousMatrixMultiply(double[] matrixA, int shiftArow, int shiftAcol, double[] matrixB, int shiftBrow, int shiftBcol, double[] result, int shiftCrow, int shiftCcol, int m, int n, int k, int constM, int constN, int constK, int level) { if (m + n <= Control.ParallelizeOrder) { for (var m1 = 0; m1 < m; m1++) { var matArowPos = m1 + shiftArow; var matCrowPos = m1 + shiftCrow; for (var n1 = 0; n1 < n; ++n1) { var boffset = ((n1 + shiftBcol) * constK) + shiftBrow; double sum = 0; for (var k1 = 0; k1 < k; ++k1) { sum += matrixA[((k1 + shiftAcol) * constM) + matArowPos] * matrixB[boffset + k1]; } result[((n1 + shiftCcol) * constM) + matCrowPos] += sum; } } return; } // divide and conquer int m2 = m / 2, n2 = n / 2, k2 = k / 2; level++; if (level <= 2) { CommonParallel.Invoke( () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k2, constM, constN, constK, level)); CommonParallel.Invoke( () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k - k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k - k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k - k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k - k2, constM, constN, constK, level)); } else { CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k - k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k - k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k - k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k - k2, constM, constN, constK, level); } }
/// <summary> /// Convolution with the bluestein sequence (Parallel Version). /// </summary> /// <param name="samples">Sample Vector.</param> private static void BluesteinConvolutionParallel(Complex[] samples) { int n = samples.Length; Complex[] sequence = BluesteinSequence(n); // Padding to power of two >= 2N–1 so we can apply Radix-2 FFT. int m = ((n << 1) - 1).CeilingToPowerOfTwo(); Complex[] b = new Complex[m]; Complex[] a = new Complex[m]; CommonParallel.Invoke( () => { // Build and transform padded sequence b_k = exp(I*Pi*k^2/N) for (int i = 0; i < n; i++) { b[i] = sequence[i]; } for (int i = m - n + 1; i < b.Length; i++) { b[i] = sequence[m - i]; } Radix2(b, -1); }, () => { // Build and transform padded sequence a_k = x_k * exp(-I*Pi*k^2/N) for (int i = 0; i < samples.Length; i++) { a[i] = sequence[i].Conjugate() * samples[i]; } Radix2(a, -1); }); for (int i = 0; i < a.Length; i++) { a[i] *= b[i]; } Radix2Parallel(a, 1); var nbinv = 1.0 / m; for (int i = 0; i < samples.Length; i++) { samples[i] = nbinv * sequence[i].Conjugate() * a[i]; } }
/// <summary> /// Perform calculation of Q or R /// </summary> /// <param name="work">Work array</param> /// <param name="workIndex">Index of column in work array</param> /// <param name="a">Q or R matrices</param> /// <param name="rowStart">The first row in </param> /// <param name="rowCount">The last row</param> /// <param name="columnStart">The first column</param> /// <param name="columnCount">The last column</param> /// <param name="availableCores">Number of available CPUs</param> private static void ComputeQR(double[] work, int workIndex, double[] a, int rowStart, int rowCount, int columnStart, int columnCount, int availableCores) { if (rowStart > rowCount || columnStart > columnCount) { return; } var tmpColCount = columnCount - columnStart; if (availableCores > 1 && tmpColCount > 200) { var tmpSplit = columnStart + tmpColCount / 2; var tmpCores = availableCores / 2; CommonParallel.Invoke( () => ComputeQR(work, workIndex, a, rowStart, rowCount, columnStart, tmpSplit, tmpCores), () => ComputeQR(work, workIndex, a, rowStart, rowCount, tmpSplit, columnCount, tmpCores)); } else { for (var j = columnStart; j < columnCount; j++) { var scale = 0.0; for (var i = rowStart; i < rowCount; i++) { scale += work[workIndex * rowCount + i - rowStart] * a[j * rowCount + i]; } for (var i = rowStart; i < rowCount; i++) { a[j * rowCount + i] -= work[workIndex * rowCount + i - rowStart] * scale; } } } }
static void CacheObliviousMatrixMultiply(double[] matrixA, int shiftArow, int shiftAcol, double[] matrixB, int shiftBrow, int shiftBcol, double[] result, int shiftCrow, int shiftCcol, int m, int n, int k, int constM, int constN, int constK, int level) { if (m + n <= Control.ParallelizeOrder) { fixed(double *resultPtr = &result[0]) fixed(double *aPtr = &matrixA[0]) fixed(double *bPtr = &matrixB[0]) { double *a = aPtr + shiftArow; double *c = resultPtr + shiftCrow; for (var m1 = 0; m1 < m; m1++) { for (var n1 = 0; n1 < n; ++n1) { double *b = bPtr + (n1 + shiftBcol) * constK + shiftBrow; double sum = 0; for (var k1 = 0; k1 < k; ++k1) { sum += a[((k1 + shiftAcol) * constM)] * b[k1]; } c[((n1 + shiftCcol) * constM)] += sum; } a++; c++; } } return; } // divide and conquer int m2 = m / 2, n2 = n / 2, k2 = k / 2; level++; if (level <= 2) { CommonParallel.Invoke( () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k2, constM, constN, constK, level)); CommonParallel.Invoke( () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k - k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k - k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k - k2, constM, constN, constK, level), () => CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k - k2, constM, constN, constK, level)); } else { CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow, shiftCcol, m2, n2, k - k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow, shiftCcol + n2, m2, n - n2, k - k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol, matrixB, shiftBrow, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol, result, shiftCrow + m2, shiftCcol, m - m2, n2, k - k2, constM, constN, constK, level); CacheObliviousMatrixMultiply(matrixA, shiftArow + m2, shiftAcol + k2, matrixB, shiftBrow + k2, shiftBcol + n2, result, shiftCrow + m2, shiftCcol + n2, m - m2, n - n2, k - k2, constM, constN, constK, level); } }