public static unsafe void MultiplyBlockUnroll8xhParallelWithPadding(float *Ap, int AN, int AM, float *Bp, int BN, int BM, float *Cp, int CN, int CM, int bs, bool transposeA = false, bool transposeB = false) { if (transposeA) { var tmp = AN; AN = AM; AM = tmp; } if (transposeB) { var tmp = BN; BN = BM; BM = tmp; } int N = AN; int M = AM; int K = BM; { Assert.IsTrue(bs >= 8, "Matrix Mul block size should be >= 8"); Parallel.For(0, (BM / bs) + (BM % bs > 0 ? 1 : 0), colB => { float[] blockA = new float[bs * bs]; float[] blockB = new float[bs * bs]; float[] blockC = new float[bs * bs]; for (int rowA = 0; rowA < N; rowA += bs) { //for (int colB = 0; colB < BM; colB += bs) { for (int l = 0; l < AM; l += bs) { CopyBlockWithPadding(Ap, rowA, AN, l, AM, blockA, bs, transposeA); CopyBlockWithPadding(Bp, l, BN, colB * bs, BM, blockB, bs, transposeB); CopyBlockWithPadding(Cp, rowA, CN, colB * bs, CM, blockC, bs); fixed(float *blockAp = blockA, blockBp = blockB, blockCp = blockC) { MatrixUtils.MultiplyBlockUnroll8xhPadded(blockAp, blockBp, blockCp, bs); } CopyBlockWithPadding(blockC, Cp, rowA, CN, colB * bs, CM, bs); } } } }); } }
private unsafe void ExecutOverRowA(float *blockA, float *blockB, float *blockC, int rowA) { //for (int rowA = 0; rowA < AN; rowA += bs) { for (int colB = 0; colB < BM; colB += bs) { for (int l = 0; l < AM; l += bs) { MatrixUtils.CopyBlockWithPadding(A, rowA, AN, l, AM, blockA, bs, transposeA); MatrixUtils.CopyBlockWithPadding(B, l, BN, colB, BM, blockB, bs, transposeB); MatrixUtils.CopyBlockWithPadding(C, rowA, CN, colB, CM, blockC, bs); MatrixUtils.MultiplyBlockUnroll8xhPadded(blockA, blockB, blockC, bs); MatrixUtils.CopyBlockWithPadding(blockC, C, rowA, CN, colB, CM, bs); } } } }