unsafe internal void GetCyclesAndRanks(int *cycRunA, int *cycRunB, int *lenSum, int *cycSumA, int *cycSumB, MultidimensionalArray A, MultidimensionalArray B) { #if DEBUG for (int i = 0; i < NoOfSumCycles; i++) { Debug.Assert(cycSumA[i] == 0); Debug.Assert(cycSumB[i] == 0); } for (int i = 0; i < DT; i++) { Debug.Assert(cycRunA[i] == 0); Debug.Assert(cycRunB[i] == 0); } #endif for (int i = 0; i < RunACount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkA = (int)((RunRankAStore & mask) >> shift); int runA = (int)((RunLoopAStore & mask) >> shift); Debug.Assert(runA >= 0 && runA < DT); cycRunA[runA] += A.GetCycle(rnkA); } for (int i = 0; i < RunBCount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkB = (int)((RunRankBStore & mask) >> shift); int runB = (int)((RunLoopBStore & mask) >> shift); Debug.Assert(runB >= 0 && runB < DT); cycRunB[runB] += B.GetCycle(rnkB); } for (int i = 0; i < SumACount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkA = (int)((SumRankAStore & mask) >> shift); int sumA = (int)((SumLoopAStore & mask) >> shift); Debug.Assert(sumA >= 0 && sumA < NoOfSumCycles); cycSumA[sumA] += A.GetCycle(rnkA); lenSum[sumA] = A.GetLength(rnkA); } for (int i = 0; i < SumBCount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkB = (int)((SumRankBStore & mask) >> shift); int sumB = (int)((SumLoopBStore & mask) >> shift); Debug.Assert(sumB >= 0 && sumB < NoOfSumCycles); cycSumB[sumB] += B.GetCycle(rnkB); } }
/// <summary> /// generalized multiplication, triple product. /// </summary> public void Multiply(double scale, MultidimensionalArray A, MultidimensionalArray B, MultidimensionalArray C, double thisscale, string Tindex, string Aindex, string Bindex, string Cindex) { // check arguments // =============== if (Tindex.Length != this.Dimension) { throw new ArgumentException(); } if (Aindex.Length != A.Dimension) { throw new ArgumentException(); } if (Bindex.Length != B.Dimension) { throw new ArgumentException(); } if (Cindex.Length != C.Dimension) { throw new ArgumentException(); } // determine indices for intermediate result // ========================================= var _Aindex = Aindex.ToCharArray(); var _Bindex = Bindex.ToCharArray(); var _Cindex = Cindex.ToCharArray(); var _Tindex = Tindex.ToCharArray(); List <char> _Qindex = new List <char>(); List <int> _Qlength = new List <int>(); for (int i = 0; i < _Aindex.Length; i++) { char g = _Aindex[i]; int L = A.GetLength(i); if (Array.IndexOf <char>(_Tindex, g) >= 0 || Array.IndexOf <char>(_Cindex, g) >= 0) { _Qindex.Add(g); _Qlength.Add(L); } } for (int i = 0; i < _Bindex.Length; i++) { char g = _Bindex[i]; int L = B.GetLength(i); if (!_Qindex.Contains(g)) { if (Array.IndexOf <char>(_Tindex, g) >= 0 || Array.IndexOf <char>(_Cindex, g) >= 0) { _Qindex.Add(g); _Qlength.Add(L); } } } // execute multiplication // ====================== // Q = A*B //var Q = MultidimensionalArray.Create(_Qlength.ToArray()); int iBuf; var Q = TempBuffer.GetTempMultidimensionalarray(out iBuf, _Qlength.ToArray()); string Qindex = new string(_Qindex.ToArray()); Q.Multiply(1.0, A, B, 0.0, Qindex, Aindex, Bindex); // this = scale*Q*C + this*thisscale this.Multiply(scale, Q, C, thisscale, Tindex, Qindex, Cindex); TempBuffer.FreeTempBuffer(iBuf); }
internal void CheckArgs(MultidimensionalArray T, MultidimensionalArray A, MultidimensionalArray B) { if (T.Dimension != DT) { throw new ArgumentException("Wrong dimension of result array."); } if (A.Dimension != DA) { throw new ArgumentException("Wrong dimension of array A."); } if (B.Dimension != DB) { throw new ArgumentException("Wrong dimension of array B."); } //if(object.ReferenceEquals(T, A)) // throw new ArgumentException("Result must be different from A."); //if(object.ReferenceEquals(T, B)) // throw new ArgumentException("Result must be different from B."); for (int i = 0; i < RunACount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkA = (int)((RunRankAStore & mask) >> shift); int sumA = (int)((RunLoopAStore & mask) >> shift); Debug.Assert(sumA >= 0 && sumA < DT); if ((T.GetLength(sumA) != A.GetLength(rnkA)) && (iTrafoIdx != sumA)) { throw new ArgumentException(string.Format("Wrong length of {0}-th rank of array A.", rnkA)); } } for (int i = 0; i < RunBCount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkB = (int)((RunRankBStore & mask) >> shift); int sumB = (int)((RunLoopBStore & mask) >> shift); Debug.Assert(sumB >= 0 && sumB < DT); if ((T.GetLength(sumB) != B.GetLength(rnkB)) && (iTrafoIdx != sumB)) { throw new ArgumentException(string.Format("Wrong length of {0}-th rank of array B.", rnkB)); } } int[] lenSum = new int[NoOfSumCycles]; for (int i = 0; i < SumACount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkA = (int)((SumRankAStore & mask) >> shift); int sumA = (int)((SumLoopAStore & mask) >> shift); Debug.Assert(sumA >= 0 && sumA < NoOfSumCycles); lenSum[sumA] = A.GetLength(rnkA); } for (int i = 0; i < SumBCount; i++) { int shift = i << 2; uint mask = ((uint)(0xf)) << shift; int rnkB = (int)((SumRankBStore & mask) >> shift); int sumB = (int)((SumLoopBStore & mask) >> shift); Debug.Assert(sumB >= 0 && sumB < NoOfSumCycles); if (B.GetLength(rnkB) != lenSum[sumB]) { throw new ArgumentException(string.Format("Wrong length of {0}-th rank of array B.", rnkB)); } } }
/// <summary> /// accumulates <paramref name="x"/>*<paramref name="alpha"/> /// to a sub-section of this array. /// </summary> /// <param name="x">values to accumulate</param> /// <param name="alpha"> /// scaling for the values <paramref name="x"/> /// </param> /// <param name="Istart">start indices (including)</param> /// <param name="Iend">end indices (including)</param> public void AccSubArray(double alpha, MultidimensionalArray x, int[] Istart, int[] Iend) { var sub = this.ExtractSubArrayShallow(Istart, Iend); sub.Acc(alpha, x); }
/// <summary> /// accumulates <paramref name="x"/>*<paramref name="alpha"/> /// to a sub-section of this array. /// </summary> /// <param name="x">values to accumulate</param> /// <param name="alpha"> /// scaling for the values <paramref name="x"/> /// </param> /// <param name="SubArrayIdx"></param> public void AccSubArray(double alpha, MultidimensionalArray x, params int[] SubArrayIdx) { var sub = this.ExtractSubArrayShallow(SubArrayIdx); sub.Acc(alpha, x); }
/// <summary> /// Generalized tensor multiplication with index-transformation. /// </summary> unsafe public void Multiply(double scale, MultidimensionalArray A, MultidimensionalArray B, double thisscale, ref MultiplyProgram mp, int *IndexTrafo, int IndexTrafo_Length, int trfPreOffset_A = 0, int trfCycle_A = 1, int trfPostOffset_A = 0, int trfPreOffset_B = 0, int trfCycle_B = 1, int trfPostOffset_B = 0) { if (mp.DT != this.Dimension) { throw new ArgumentException(); } if (mp.DA != A.Dimension) { throw new ArgumentException(); } if (mp.DB != B.Dimension) { throw new ArgumentException(); } unsafe { int DT = mp.DT; //int DA = mp.DA; //int DB = mp.DB; // running cycles: int *cycRunT = stackalloc int[3 * DT]; int *cycRunA = cycRunT + DT; int *cycRunB = cycRunA + DT; int *lenRun = stackalloc int[DT]; for (int i = 0; i < DT; i++) { cycRunT[i] = this.GetCycle(i); lenRun[i] = this.GetLength(i); } int *cycSumA = stackalloc int[MultiplyProgram.MAX_SUM_LOOPS]; int *cycSumB = stackalloc int[MultiplyProgram.MAX_SUM_LOOPS]; int *lenSum = stackalloc int[MultiplyProgram.MAX_SUM_LOOPS]; #if DEBUG mp.CheckArgs(this, A, B); #endif mp.GetCyclesAndRanks(cycRunA, cycRunB, lenSum, cycSumA, cycSumB, A, B); if (mp.NoOfSumCycles == 2) { // for better loop unrolling, make sure the inner loop is the smaller one if (lenSum[1] > lenSum[0]) { SwapInt(cycSumA + 0, cycSumA + 1); SwapInt(cycSumB + 0, cycSumB + 1); SwapInt(lenSum + 0, lenSum + 1); } } // Execute Tensor Multiplication // ============================= fixed(double *pTstor = this.m_Storage, pAstor = A.m_Storage, pBstor = B.m_Storage) { double *pT = pTstor + this.m_Offset; double *pA = pAstor + A.m_Offset; double *pB = pBstor + B.m_Offset; if (mp.iTrafoIdx >= 0) { if (mp.iTrafoIdx > 0) { // transformed cycle MUST be the outer-most, i.e. the first one. // => need to shift some cycles. int kk = mp.iTrafoIdx; SwapInt(lenRun + 0, lenRun + kk); SwapInt(cycRunT + 0, cycRunT + kk); SwapInt(cycRunA + 0, cycRunA + kk); SwapInt(cycRunB + 0, cycRunB + kk); } if (IndexTrafo == null) { throw new ArgumentException("Index transformation required."); } int *pIndexTrafo = IndexTrafo; { //Debug.Assert(mp.NoOfSumCycles == 1); Debug.Assert(DT == 2); __MultiplyWTrafo_Sum1_FOR2( MultiplyWTrafo_Dispatch( DT, mp.NoOfSumCycles, pT, pA, pB, lenRun, cycRunT, cycRunA, cycRunB, lenSum, cycSumA, cycSumB, scale, thisscale, pIndexTrafo, IndexTrafo_Length, mp.TrfT0Sw, mp.TrfA0Sw, mp.TrfB0Sw, 0, 1, 0, trfPreOffset_A, trfCycle_A, trfPostOffset_A, trfPreOffset_B, trfCycle_B, trfPostOffset_B); } } else { Multiply_Dispatch(DT, mp.NoOfSumCycles, pT, pA, pB, lenRun, cycRunT, cycRunA, cycRunB, lenSum, cycSumA, cycSumB, scale, thisscale); } } } }
/// <summary> /// adds <paramref name="other"/>*<paramref name="scl"/> to this array /// overwriting this array; this array must have the same dimension as /// <paramref name="other"/> and the length of each dimension must match. /// </summary> /// <param name="other"></param> /// <param name="scl"> /// scaling of <paramref name="other"/> during accumulation /// </param> public void Acc(double scl, MultidimensionalArray other) { if (m_LockedForever) { throw new ApplicationException("illegal call - object is locked."); } if (this.Dimension != other.Dimension) { throw new ArgumentException("mismatch in number of dimensions.", "other"); } if (other.m_Dimension != this.m_Dimension) { throw new ArgumentException("Number of dimensions must be equal."); } for (int i = this.m_Dimension - 1; i >= 0; i--) { if (other.GetLength(i) != this.GetLength(i)) { throw new ArgumentException("Mismatch in dimension " + i); } } // Optimized accumulation for continuous pieces of memory if (this.IsContinious && other.IsContinious) { int[] index = new int[Dimension]; int thisOffset = this.Index(index); int otherOffset = other.Index(index); unsafe { fixed(double *pThis = &this.Storage[thisOffset], pOther = &other.Storage[otherOffset]) { //for (int i = 0; i < this.Length; i++) { // *(pThis + i) += scl * *(pOther + i); //} BLAS.daxpy(this.Length, scl, pOther, 1, pThis, 1); } } return; } // Standard versions switch (this.Dimension) { case 2: { int L0 = this.GetLength(0); int L1 = this.GetLength(1); for (int i0 = 0; i0 < L0; i0++) { for (int i1 = 0; i1 < L1; i1++) { int ind_this = this.Index(i0, i1); int ind_othr = other.Index(i0, i1); this.m_Storage[ind_this] += other.m_Storage[ind_othr] * scl; } } return; } default: { double[] other_stor = other.m_Storage; ApplyAll(delegate(int[] idx, ref double entry) { int ind_other = other.Index(idx); entry += scl * other_stor[ind_other]; }); return; } } }
static (double[] EigenVals, MultidimensionalArray EigenVect) EigenspaceSymmInternal(this IMatrix Inp, bool ComputeVectors) { if (Inp.NoOfCols != Inp.NoOfRows) { throw new ArgumentException("Not supported for non-symmetrical matrices."); } int N = Inp.NoOfRows; int JOBZ = ComputeVectors ? 'V' : 'N'; // 'N': Compute eigenvalues only; // 'V': Compute eigenvalues and eigenvectors. int UPLO = 'L'; // 'U': Upper triangle of A is stored; // 'L': Lower triangle of A is stored. unsafe { double[] InpBuffer = TempBuffer.GetTempBuffer(out int RefInp, N * N); double[] Eigis = new double[N]; MultidimensionalArray EigiVect = ComputeVectors ? MultidimensionalArray.Create(N, N) : null; fixed(double *pInp = InpBuffer, pEigis = Eigis) { CopyToUnsafeBuffer(Inp, pInp, true); int LDA = N; int info; // phase 1: work size estimation double WorkSize; int LWORK = -1; // triggers estimation LAPACK.F77_LAPACK.DSYEV_(ref JOBZ, ref UPLO, ref N, pInp, ref LDA, pEigis, &WorkSize, ref LWORK, out info); if (info != 0) { TempBuffer.FreeTempBuffer(RefInp); throw new ArithmeticException("LAPACK DSYEV (symmetrical matrix eigenvalues) returned info " + info); } LWORK = (int)WorkSize; // phase 2: computation double[] WorkBuffer = TempBuffer.GetTempBuffer(out int RefWork, LWORK * 1); fixed(double *pWork = WorkBuffer) { LAPACK.F77_LAPACK.DSYEV_(ref JOBZ, ref UPLO, ref N, pInp, ref LDA, pEigis, pWork, ref LWORK, out info); TempBuffer.FreeTempBuffer(RefWork); if (info != 0) { TempBuffer.FreeTempBuffer(RefInp); throw new ArithmeticException("LAPACK DSYEV (symmetrical matrix eigenvalues) returned info " + info); } if (EigiVect != null) { CopyFromUnsafeBuffer(EigiVect, pInp, true); } } } TempBuffer.FreeTempBuffer(RefInp); return(Eigis, EigiVect); } }
public static void MultiplyTest0() // one summation index { foreach (int C in new int[] { 2, 60 }) // test unrolling and standard path { int I = 120; int J = 21; int K = 40; MultidimensionalArray A = MultidimensionalArray.Create(I, C, K); MultidimensionalArray B = MultidimensionalArray.Create(K, C, J); Console.WriteLine("number of operands in A: " + A.Length); Console.WriteLine("number of operands in B: " + B.Length); MultidimensionalArray ResTst1 = MultidimensionalArray.Create(J, K, I); MultidimensionalArray ResTst2 = MultidimensionalArray.Create(J, K, I); MultidimensionalArray ResChck = MultidimensionalArray.Create(J, K, I); // fill operands with random values Random rnd = new Random(); A.ApplyAll(x => rnd.NextDouble()); B.ApplyAll(x => rnd.NextDouble()); double alpha = 0.99; double beta = 0; Stopwatch sw = new Stopwatch(); // tensorized multiplication: sw.Reset(); sw.Start(); ResTst1.Multiply(alpha, A, B, beta, "jki", "ick", "kcj"); ResTst2.Multiply(alpha, B, A, beta, "jki", "kcj", "ick"); sw.Stop(); Console.WriteLine("runtime of tensorized multiplication: " + sw.ElapsedMilliseconds + " millisec."); // old-fashioned equivalent with loops: double errsum = 0; sw.Reset(); sw.Start(); for (int i = 0; i < I; i++) { for (int j = 0; j < J; j++) { for (int k = 0; k < K; k++) { // summation: double sum = 0; for (int c = 0; c < C; c++) { sum += A[i, c, k] * B[k, c, j]; } ResChck[j, k, i] = sum * alpha + ResChck[j, k, i] * beta; errsum += Math.Abs(ResTst1[j, k, i] - ResChck[j, k, i]); errsum += Math.Abs(ResTst2[j, k, i] - ResChck[j, k, i]); } } } sw.Stop(); Console.WriteLine("runtime of loop multiplication: " + sw.ElapsedMilliseconds + " millisec."); Console.WriteLine("total error: " + errsum); double thres = 1.0e-13; Assert.IsTrue(errsum < thres); } }
public static void MultiplyTrafoTest0() // two summation indices (k,r) { foreach (int K in new int[] { 210 }) // test unrolling and standard path { int I = 120; int M = 43; MultidimensionalArray A = MultidimensionalArray.Create(I, K, 2 * M); MultidimensionalArray B = MultidimensionalArray.Create(2 * M, K);; Console.WriteLine("number of operands in A: " + A.Length); Console.WriteLine("number of operands in B: " + B.Length); int[] mTrafo = new int[M]; MultidimensionalArray ResTst1 = MultidimensionalArray.Create(I, M); MultidimensionalArray ResTst2 = MultidimensionalArray.Create(I, M); MultidimensionalArray ResChck = MultidimensionalArray.Create(I, M); // fill operands with random values Random rnd = new Random(); A.ApplyAll(x => rnd.NextDouble()); B.ApplyAll(x => rnd.NextDouble()); ResTst1.ApplyAll(x => rnd.NextDouble()); ResChck.Set(ResTst1); ResTst2.Set(ResTst1); for (int m = 0; m < M; m++) { mTrafo[m] = rnd.Next(2 * M); //mTrafo[m] = m; Debug.Assert(mTrafo[m] < 2 * M); } double alpha = 0.67; double beta = 1.3; var mp1 = MultidimensionalArray.MultiplyProgram.Compile("im", "ikT(m)", "T(m)k", true); var mp2 = MultidimensionalArray.MultiplyProgram.Compile("im", "T(m)k", "ikT(m)", true); // tensorized multiplication: Stopwatch TenMult = new Stopwatch(); TenMult.Start(); ResTst1.Multiply(alpha, A, B, beta, ref mp1, mTrafo); TenMult.Stop(); ResTst2.Multiply(alpha, B, A, beta, ref mp2, mTrafo); Console.WriteLine("runtime of tensorized multiplication: " + TenMult.ElapsedMilliseconds + " millisec."); // comparison code Stopwatch RefMult = new Stopwatch(); RefMult.Start(); double errSum = 0; for (int i = 0; i < I; i++) { for (int m = 0; m < M; m++) { int m_trf = mTrafo[m]; // summation: double sum = 0; for (int k = 0; k < K; k++) { sum += A[i, k, m_trf] * B[m_trf, k]; } ResChck[i, m] = sum * alpha + ResChck[i, m] * beta; errSum += Math.Abs(ResTst1[i, m] - ResChck[i, m]); errSum += Math.Abs(ResTst2[i, m] - ResChck[i, m]); } } RefMult.Stop(); Console.WriteLine("runtime of loop multiplication: " + RefMult.ElapsedMilliseconds + " millisec."); Console.WriteLine("total error: " + errSum); double thres = 1.0e-6; Assert.IsTrue(errSum < thres); } }
public static void MultiplyTest3() // two summation indices (k,r) { foreach (int K in new int[] { 2, 21 }) // test unrolling and standard path { int I = 12; int M = 43; int N = 63; int R = 21; MultidimensionalArray A = MultidimensionalArray.Create(I, R, K, M); MultidimensionalArray B = MultidimensionalArray.Create(I, K, N, R); Console.WriteLine("number of operands in A: " + A.Length); Console.WriteLine("number of operands in B: " + B.Length); MultidimensionalArray ResTst1 = MultidimensionalArray.Create(I, M, N); MultidimensionalArray ResTst2 = MultidimensionalArray.Create(I, M, N); MultidimensionalArray ResChck = MultidimensionalArray.Create(I, M, N); // fill operands with random values Random rnd = new Random(); A.ApplyAll(x => rnd.NextDouble()); B.ApplyAll(x => rnd.NextDouble()); ResTst1.ApplyAll(x => rnd.NextDouble()); ResChck.Set(ResTst1); ResTst2.Set(ResTst1); double alpha = 0.67; double beta = 1.3; // tensorized multiplication: Stopwatch TenMult = new Stopwatch(); TenMult.Start(); ResTst1.Multiply(alpha, A, B, beta, "imn", "irkm", "iknr"); TenMult.Stop(); ResTst2.Multiply(alpha, B, A, beta, "imn", "iknr", "irkm"); Console.WriteLine("runtime of tensorized multiplication: " + TenMult.ElapsedMilliseconds + " millisec."); // comparison code Stopwatch RefMult = new Stopwatch(); RefMult.Start(); double errSum = 0; for (int i = 0; i < I; i++) { for (int n = 0; n < N; n++) { for (int m = 0; m < M; m++) { // summation: double sum = 0; for (int r = 0; r < R; r++) { for (int k = 0; k < K; k++) { sum += A[i, r, k, m] * B[i, k, n, r]; } } ResChck[i, m, n] = sum * alpha + ResChck[i, m, n] * beta; errSum += Math.Abs(ResTst1[i, m, n] - ResChck[i, m, n]); errSum += Math.Abs(ResTst2[i, m, n] - ResChck[i, m, n]); } } } RefMult.Stop(); Console.WriteLine("runtime of loop multiplication: " + RefMult.ElapsedMilliseconds + " millisec."); Console.WriteLine("total error: " + errSum); double thres = 1.0e-6; Assert.IsTrue(errSum < thres); } }
public static void MultiplyTest2() // no summation, only tenzorization { int I = 125; int K = 21; int M = 43; int N = 63; MultidimensionalArray A = MultidimensionalArray.Create(I, K, M); MultidimensionalArray B = MultidimensionalArray.Create(I, K, N); Console.WriteLine("number of operands in A: " + A.Length); Console.WriteLine("number of operands in B: " + B.Length); MultidimensionalArray ResTst1 = MultidimensionalArray.Create(I, K, M, N); MultidimensionalArray ResTst2 = MultidimensionalArray.Create(I, K, M, N); MultidimensionalArray ResChck = MultidimensionalArray.Create(I, K, M, N); // fill operands with random values Random rnd = new Random(); A.ApplyAll(x => rnd.NextDouble()); B.ApplyAll(x => rnd.NextDouble()); // tensorized multiplication: Stopwatch TenMult = new Stopwatch(); TenMult.Start(); ResTst1.Multiply(1.0, A, B, 0.0, "ikmn", "ikm", "ikn"); TenMult.Stop(); ResTst2.Multiply(1.0, B, A, 0.0, "ikmn", "ikn", "ikm"); Console.WriteLine("runtime of tensorized multiplication: " + TenMult.ElapsedMilliseconds + " millisec."); // comparison code Stopwatch RefMult = new Stopwatch(); RefMult.Start(); double errSum = 0; for (int i = 0; i < I; i++) { for (int k = 0; k < K; k++) { for (int n = 0; n < N; n++) { for (int m = 0; m < M; m++) { ResChck[i, k, m, n] = A[i, k, m] * B[i, k, n]; errSum += Math.Abs(ResChck[i, k, m, n] - ResTst1[i, k, m, n]); errSum += Math.Abs(ResChck[i, k, m, n] - ResTst2[i, k, m, n]); } } } } RefMult.Stop(); Console.WriteLine("runtime of loop multiplication: " + RefMult.ElapsedMilliseconds + " millisec."); Console.WriteLine("total error: " + errSum); double thres = 1.0e-13; Assert.IsTrue(errSum < thres); }