unsafe internal void GetCyclesAndRanks(int *cycRunA, int *cycRunB, int *lenSum, int *cycSumA, int *cycSumB, MultidimensionalArray A, MultidimensionalArray B)
            {
#if DEBUG
                for (int i = 0; i < NoOfSumCycles; i++)
                {
                    Debug.Assert(cycSumA[i] == 0);
                    Debug.Assert(cycSumB[i] == 0);
                }

                for (int i = 0; i < DT; i++)
                {
                    Debug.Assert(cycRunA[i] == 0);
                    Debug.Assert(cycRunB[i] == 0);
                }
#endif
                for (int i = 0; i < RunACount; i++)
                {
                    int  shift = i << 2;
                    uint mask  = ((uint)(0xf)) << shift;

                    int rnkA = (int)((RunRankAStore & mask) >> shift);
                    int runA = (int)((RunLoopAStore & mask) >> shift);
                    Debug.Assert(runA >= 0 && runA < DT);
                    cycRunA[runA] += A.GetCycle(rnkA);
                }
                for (int i = 0; i < RunBCount; i++)
                {
                    int  shift = i << 2;
                    uint mask  = ((uint)(0xf)) << shift;

                    int rnkB = (int)((RunRankBStore & mask) >> shift);
                    int runB = (int)((RunLoopBStore & mask) >> shift);
                    Debug.Assert(runB >= 0 && runB < DT);
                    cycRunB[runB] += B.GetCycle(rnkB);
                }


                for (int i = 0; i < SumACount; i++)
                {
                    int  shift = i << 2;
                    uint mask  = ((uint)(0xf)) << shift;

                    int rnkA = (int)((SumRankAStore & mask) >> shift);
                    int sumA = (int)((SumLoopAStore & mask) >> shift);
                    Debug.Assert(sumA >= 0 && sumA < NoOfSumCycles);
                    cycSumA[sumA] += A.GetCycle(rnkA);
                    lenSum[sumA]   = A.GetLength(rnkA);
                }
                for (int i = 0; i < SumBCount; i++)
                {
                    int  shift = i << 2;
                    uint mask  = ((uint)(0xf)) << shift;

                    int rnkB = (int)((SumRankBStore & mask) >> shift);
                    int sumB = (int)((SumLoopBStore & mask) >> shift);
                    Debug.Assert(sumB >= 0 && sumB < NoOfSumCycles);
                    cycSumB[sumB] += B.GetCycle(rnkB);
                }
            }