示例#1
0
		public static void ScalarAddVector(SparseColumnVector z, SparseColumnVector x, float y)
		{
			if (z.Dim != x.Dim || z.nNonzero != x.nNonzero)
			{
				throw new Exception("Dimension mismatch");
			}
			var zVal = z.Val;
			var xVal = x.Val;
			int nz = z.nNonzero;
			for (int IdxRow = 0; IdxRow < nz; ++IdxRow)
			{
				zVal[IdxRow] = xVal[IdxRow] + y;
			}
		}
示例#2
0
		public static void ScalarMultiplyVector(SparseColumnVector z, SparseColumnVector x, float y)
		{
			int zNz = z.nNonzero;
			var zVal = z.Val;
			var xVal = x.Val;
			for (int IdxRow = 0; IdxRow < zNz; ++IdxRow)
			{
				zVal[IdxRow] = xVal[IdxRow] * y;
			}
		}
示例#3
0
		public static void ScalarAddVector(SparseColumnVector z, float y)
		{
			var zVal = z.Val;
			int nNonzero = z.nNonzero;
			for (int IdxRow = 0; IdxRow < nNonzero; ++IdxRow)
			{
				zVal[IdxRow] += y;
			}
		}
示例#4
0
		public static void ElementwiseVectorMultiplyVector(SparseColumnVector x, SparseColumnVector y)
		{
			if (x.Dim != y.Dim || x.nNonzero != y.nNonzero)
			{
				throw new Exception("Dimension mismatch.");
			}
			int nz = x.nNonzero;
			var xVal = x.Val;
			var yVal = y.Val;
			for (int IdxRow = 0; IdxRow < nz; ++IdxRow)
			{
				xVal[IdxRow] *= yVal[IdxRow];
			}
		}
示例#5
0
		/*
         * z = x./y, where x and y are vectors
         */
		public static void ElementwiseVectorDivideVector(SparseColumnVector z, SparseColumnVector x, SparseColumnVector y)
		{
			if (z.Dim != x.Dim || z.Dim != y.Dim || z.nNonzero != x.nNonzero || z.nNonzero != y.nNonzero)
			{
				throw new Exception("Dimension mismatch.");
			}
			int nz = z.nNonzero;
			var zVal = z.Val;
			var xVal = x.Val;
			var yVal = y.Val;
			for (int IdxRow = 0; IdxRow < nz; ++IdxRow)
			{
				zVal[IdxRow] = xVal[IdxRow] / (yVal[IdxRow]+1e-12f);
			}
		}
示例#6
0
		public SparseMatrix(int NumRows, int NumCols, bool SameSparsePatternForAllColumn)
		{
			nRows = NumRows;
			nCols = NumCols;
			SparseColumnVectors = new SparseColumnVector[nCols];
			for (int IdxCol = 0; IdxCol < nCols; IdxCol++)
			{
				SparseColumnVectors[IdxCol] = new SparseColumnVector(nRows);
			}
			flag_SameSparsePatterForAllColumns = SameSparsePatternForAllColumn;
			if (flag_SameSparsePatterForAllColumns)
			{
				SparsePatternOfEachColumn = new int[nRows];
			}

		}
示例#7
0
		// This constructor performs deep copy from the source sparse matrix
		public SparseMatrix(SparseMatrix SourceSparseMatrix)
		{
			nRows = SourceSparseMatrix.nRows;
			nCols = SourceSparseMatrix.nCols;
			SparseColumnVectors = new SparseColumnVector[nCols];
			for (int IdxCol = 0; IdxCol < nCols; IdxCol++)
			{
				SparseColumnVectors[IdxCol] = new SparseColumnVector(SourceSparseMatrix.SparseColumnVectors[IdxCol]);                
			}
			flag_SameSparsePatterForAllColumns = SourceSparseMatrix.flag_SameSparsePatterForAllColumns;
			if (flag_SameSparsePatterForAllColumns)
			{
				SparsePatternOfEachColumn = new int[nRows];
				Array.Copy(SourceSparseMatrix.SparsePatternOfEachColumn, SparsePatternOfEachColumn, nRows);
			}
		}
示例#8
0
		public void DeepCopySparseColumnVectorFrom(SparseColumnVector SourceSparseColumnVector)
		{
			if (Dim != SourceSparseColumnVector.Dim || nNonzero != SourceSparseColumnVector.nNonzero)
			{
				throw new Exception("Dimension or nNonzero mismatch between the source and target SparseColumnVectors");
			}

			Array.Copy(SourceSparseColumnVector.Key, Key, nNonzero);
			Array.Copy(SourceSparseColumnVector.Val, Val, nNonzero);
		}
示例#9
0
		public SparseMatrix(int NumRows, int NumCols)
		{
			nRows = NumRows;
			nCols = NumCols;
			SparseColumnVectors = new SparseColumnVector[nCols];
			for (int IdxCol = 0; IdxCol < nCols; IdxCol++)
			{
				SparseColumnVectors[IdxCol] = new SparseColumnVector(nRows);
			}            
		}
示例#10
0
		/*
         * z = x^T y
         * Inner product
         */
		public static float InnerProduct(SparseColumnVector x, SparseColumnVector y)
		{
			if (x.Dim != y.Dim || x.nNonzero != y.nNonzero)
			{
				throw new Exception("Dimension mismatch.");
			}
			float z = 0.0f;
			int nNonzero = x.nNonzero;
			var xVal = x.Val;
			var yVal = y.Val;
			for (int Idx = 0; Idx < nNonzero; ++Idx)
			{
				z += xVal[Idx] * yVal[Idx];
			}
			return z;
		}
示例#11
0
		// This constructor initialize the sparse column vector by deep copy
		public SparseColumnVector(SparseColumnVector SourceSparseColumnVector)
		{
			Dim = SourceSparseColumnVector.Dim;
			nNonzero = SourceSparseColumnVector.nNonzero;
			Key = new int[nNonzero];
			Val = new float[nNonzero];
			DeepCopySparseColumnVectorFrom(SourceSparseColumnVector);
		}
示例#12
0
		public static void MatrixTransposeMultiplyVector(DenseColumnVector z, DenseMatrix X, SparseColumnVector y)
		{
			if (z.Dim != X.nCols || X.nRows != y.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}
			int zDim = z.Dim;
			float sum = 0.0f;
			int yNz = y.nNonzero;
			var XMat = X.DenseMatrixValue;
			var zVal = z.VectorValue;
			for (int IdxRow = 0; IdxRow < zDim; ++IdxRow)
			{
				sum = 0.0f;
				var XCol = XMat[IdxRow].VectorValue;
				var yKey = y.Key;
				var yVal = y.Val;
				for (int Idx = 0; Idx < yNz; ++Idx)
				{
					sum += XCol[yKey[Idx]] * yVal[Idx];
				}
				zVal[IdxRow] = sum;
			}

		}
示例#13
0
		public static void MatrixMultiplyVector(SparseColumnVector z, DenseMatrix X, DenseColumnVector y)
		{
			// Dimension check
			if (X.nCols != y.Dim || z.Dim != X.nRows)
			{
				throw new Exception("Dimension mismatch.");
			}
			// Computation
			var zVal = z.Val;
			var zKey = z.Key;
			Array.Clear(zVal, 0, zVal.Length);            
			var zNNonzero = z.nNonzero;
			var xnCols = X.nCols;
			float[] xColumn = null;
			float yValue;
			for (int Idx = 0; Idx < xnCols; ++ Idx)
			{
				xColumn = X.DenseMatrixValue[Idx].VectorValue;
				yValue = y.VectorValue[Idx];
				for (int IdxRow = 0; IdxRow < zNNonzero; ++ IdxRow)
				{
					zVal[IdxRow] += xColumn[zKey[IdxRow]] * yValue;
				}
			}
		}
示例#14
0
		public static void AtomicAddVectorMultiplyVectorTranspose(DenseMatrix Z, SparseColumnVector x, DenseColumnVector y, float a)
		{
			if (Z.nRows != x.Dim || Z.nCols != y.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}
			float product = 0.0f;
			float InitVal = 0.0f;
			float ComputedVal = 0.0f;
			int ZnCols = Z.nCols;
			int xNz = x.nNonzero;
			var xVal = x.Val;
			var xKey = x.Key;
			var yVal = y.VectorValue;
			for (int IdxCol = 0; IdxCol < ZnCols; ++IdxCol)            
			{
				var ZVal = Z.DenseMatrixValue[IdxCol].VectorValue;
				for (int IdxRow = 0; IdxRow < xNz; ++IdxRow)
				{
					product = xVal[IdxRow] * yVal[IdxCol] * a;
					do
					{
						InitVal = ZVal[xKey[IdxRow]];
						ComputedVal = InitVal + product;
					} while (InitVal != Interlocked.CompareExchange(ref ZVal[xKey[IdxRow]], ComputedVal, InitVal));
				}
			}
		}
示例#15
0
		public static void Log(SparseColumnVector z)
		{
			var zVal = z.Val;
			for (int IdxRow = 0; IdxRow < z.nNonzero; ++IdxRow)
			{
				zVal[IdxRow] = (float)Math.Log((double)zVal[IdxRow]);
			}
		}
示例#16
0
		/*
         * Back propagation of the unfolded LDA model (Mirror descent approach)
         */
		// Implemented without atomic operation
		public static void BackPropagation_LDA(SparseMatrix Xt, SparseMatrix Dt, DNNRun_t DNNRun, paramModel_t paramModel, Grad_t Grad)
		{
			// -------- Extract parameters --------
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			int nOutput = paramModel.nOutput;
			float To = paramModel.To;
			string OutputType = paramModel.OutputType;
			int BatchSize = Xt.nCols;
			int nInput = paramModel.nInput;



			// -------- Back propagation --------
			DenseMatrix grad_Q_po = new DenseMatrix(DNNRun.y);
			SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
			SparseMatrix grad_Q_po_Sparse = new SparseMatrix(Xt);
			DenseMatrix xi = new DenseMatrix(nHid, BatchSize);
			DenseMatrix TmpDenseMat = new DenseMatrix(nHid, BatchSize);
			DenseMatrix ThetaRatio = new DenseMatrix(nHid, BatchSize);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(BatchSize);            
			DenseMatrix tmp_theta_xi_b_T_OVER_theta_lm1_2 = new DenseMatrix(nHid, BatchSize);
			SparseMatrix tmp_Xt_OVER_Phitheta = new SparseMatrix(Xt);
			SparseMatrix tmp_Phi_theta_xi = new SparseMatrix(Xt);
			Grad.grad_Q_Phi.ClearValue();
			// ---- Offset of effective number of layers ----
			int[] OffsetEffNumLayer = new int[BatchSize];
			OffsetEffNumLayer[0] = 0;
			int NumTotalLayer = DNNRun.nHidLayerEffective[0];
			for (int IdxSample = 1; IdxSample < BatchSize; ++IdxSample)
			{
				OffsetEffNumLayer[IdxSample] = OffsetEffNumLayer[IdxSample - 1] + DNNRun.nHidLayerEffective[IdxSample-1];
				NumTotalLayer += DNNRun.nHidLayerEffective[IdxSample];
			}
			// ---- Temporary variables that stores the intermediate results for computing the gradients ----
			DenseMatrix tmp_theta_xi_pool = new DenseMatrix(nHid, NumTotalLayer, 0.0f);
			DenseMatrix tmp_theta_xi = new DenseMatrix(nHid, BatchSize, 0.0f);
			DenseMatrix theta_l_minus_one = new DenseMatrix(nHid, NumTotalLayer, 0.0f);
			SparseMatrix tmp_Xt_OVER_Phitheta_pool = new SparseMatrix(nInput, NumTotalLayer);
			SparseMatrix TmpSparseMat_pool = new SparseMatrix(nInput, NumTotalLayer);
			int NumTotalNz = 0;
			for (int IdxSample = 0; IdxSample < BatchSize; ++IdxSample)
			{
				int Layer_begin = OffsetEffNumLayer[IdxSample];
				int Layer_end = Layer_begin + DNNRun.nHidLayerEffective[IdxSample];
				SparseColumnVector[] tmp1 = tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors;
				SparseColumnVector[] tmp2 = TmpSparseMat_pool.SparseColumnVectors;
				SparseColumnVector xt = Xt.SparseColumnVectors[IdxSample];
				NumTotalNz += xt.nNonzero;
				for (int IdxLayer = Layer_begin; IdxLayer < Layer_end; ++IdxLayer)
				{
					tmp1[IdxLayer] = new SparseColumnVector(xt);
					tmp2[IdxLayer] = new SparseColumnVector(xt);
				}
			}
			int[] SparsePatternGradPhi = Xt.GetHorizontalUnionSparsePattern();
			SparseMatrix TmpGrad = new SparseMatrix(nInput, nHid, true);
			TmpGrad.SetSparsePatternForAllColumn(SparsePatternGradPhi);
			// ---- Compute grad Q wrt po if possible ----
			switch (OutputType)
			{
			case "softmaxCE":
				MatrixOperation.MatrixSubtractMatrix(grad_Q_po, Dt);
				MatrixOperation.ScalarMultiplyMatrix(grad_Q_po, To);
				Grad.grad_Q_U.ClearValue();
				break;
			case "linearQuad":
				MatrixOperation.MatrixSubtractMatrix(grad_Q_po, Dt);
				MatrixOperation.ScalarMultiplyMatrix(grad_Q_po, 2.0f);
				Grad.grad_Q_U.ClearValue();
				break;
			case "unsupLDA":
				Grad.grad_Q_TopPhi.SetAllValuesToZero();
				break;
			case "linearCE":
				throw new Exception("linearCE is not implemented.");
			default:
				throw new Exception("Unknown OutputType");
			}
			Parallel.For(0, BatchSize, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxSample =>
				{
					// ***************************************************************************

					// -------- Back propagation: top layer --------                    
					switch (OutputType)
					{
					case "softmaxCE":
						// ---- grad Q wrt pL (x_L) ----
						MatrixOperation.MatrixTransposeMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							paramModel.U,
							grad_Q_po.DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample]
						);
						TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarAddVector(
							xi.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							TmpDenseRowVec.VectorValue[IdxSample] * (-1.0f)
						);
						break;
					case "linearQuad":
						// ---- grad Q wrt pL (x_L) ----
						MatrixOperation.MatrixTransposeMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							paramModel.U,
							grad_Q_po.DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample]
						);
						TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarAddVector(
							xi.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							(-1.0f) * TmpDenseRowVec.VectorValue[IdxSample]
						);
						break;
					case "unsupLDA":
						// ---- grad Q wrt po ----
						MatrixOperation.MatrixMultiplyVector(
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample],
							paramModel.Phi,
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ElementwiseVectorDivideVector(
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample],
							Xt.SparseColumnVectors[IdxSample],
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample]
						);
						// ---- grad Q wrt pL (x_L) ----
						MatrixOperation.MatrixTransposeMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							paramModel.Phi,
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.ScalarMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							-1.0f
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarAddVector(
							xi.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							(-1.0f) * TmpDenseRowVec.VectorValue[IdxSample]
						);
						break;
					case "linearCE":
						throw new Exception("linearCE is not implemented.");
						//break;
					default:
						throw new Exception("Unknown OutputType");
					}


					// ***************************************************************************

					// -------- Back propagation: hidden layers --------
					for (int IdxLayer = DNNRun.nHidLayerEffective[IdxSample] - 1; IdxLayer >= 0; IdxLayer--)
					{
						// ---- Compute the position in the temporary variable for the current layer at the current sample ----
						int IdxTmpVar = OffsetEffNumLayer[IdxSample] + IdxLayer;
						// ---- grad wrt b ---
						// Not implemented at the moment. (Can be used to update the Dirichlet parameter automatically.)
						// ---- Compute the intermediate variables ----
						MatrixOperation.ElementwiseVectorMultiplyVector(
							tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar],
							DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample]
						);
						if (IdxLayer == 0)
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar],
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						if (IdxLayer == 0)
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						MatrixOperation.ElementwiseVectorMultiplyVector(
							tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
							paramModel.b
						);
						MatrixOperation.ScalarMultiplyVector(
							tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]
						);
						// Reset the elements to zero if theta_{l-1} is zero at these positions (mainly for alpha<1 case)
						if (IdxLayer > 0)
						{
							MatrixOperation.ResetVectorSparsePattern(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						// Continue to intermediate variable computation
						if (IdxLayer == 0) // TmpSparseMat is Phitheta_lm1
						{
							MatrixOperation.MatrixMultiplyVector(
								TmpSparseMat.SparseColumnVectors[IdxSample],
								paramModel.Phi,
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.MatrixMultiplyVector(
								TmpSparseMat.SparseColumnVectors[IdxSample],
								paramModel.Phi,
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						MatrixOperation.ElementwiseVectorDivideVector(
							tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors[IdxTmpVar],
							Xt.SparseColumnVectors[IdxSample],
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.ElementwiseVectorDivideVector(
							TmpSparseMat.SparseColumnVectors[IdxSample],
							tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors[IdxTmpVar],
							TmpSparseMat.SparseColumnVectors[IdxSample]
						); // TmpSparseMat is tmp_Xt_OVER_Phitheta2
						MatrixOperation.MatrixMultiplyVector(
							tmp_Phi_theta_xi.SparseColumnVectors[IdxSample],
							paramModel.Phi,
							tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar]
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpSparseMat.SparseColumnVectors[IdxSample],
							tmp_Phi_theta_xi.SparseColumnVectors[IdxSample]
						); // TmpSparseMat is ( tmp_Phi_theta_xi.*tmp_Xt_OVER_Phitheta2 )
						MatrixOperation.MatrixTransposeMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							paramModel.Phi,
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.ScalarMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]
						); // TmpDenseMat is tmp_Tl_Phit_xtPhiTheta2_Phi_theta_xi
						// ---- Compute the gradient wrt Phi ----     
						MatrixOperation.ScalarMultiplyVector(
							tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors[IdxTmpVar],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]
						);
						MatrixOperation.ScalarMultiplyVector(
							TmpSparseMat_pool.SparseColumnVectors[IdxTmpVar],
							TmpSparseMat.SparseColumnVectors[IdxSample],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]*(-1.0f)
						);                      
						if (IdxLayer == 0)
						{
							theta_l_minus_one.DenseMatrixValue[IdxTmpVar] = DNNRun.theta0.DenseMatrixValue[IdxSample];
						}
						else
						{
							theta_l_minus_one.DenseMatrixValue[IdxTmpVar] = DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample];
						}                    
						// ---- Compute xi_{l-1} via back propagation ----
						if (IdxLayer > 0)
						{
							// Reset the elements to zero if theta_{l-1} is zero at these positions (mainly for alpha<1 case)
							MatrixOperation.ElementwiseVectorDivideVector(
								ThetaRatio.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
							MatrixOperation.ResetVectorSparsePattern(
								ThetaRatio.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
							MatrixOperation.ElementwiseVectorMultiplyVector(
								xi.DenseMatrixValue[IdxSample],
								xi.DenseMatrixValue[IdxSample],
								ThetaRatio.DenseMatrixValue[IdxSample]
							);
							// Compute xi_{l-1} now
							MatrixOperation.VectorSubtractVector(
								TmpDenseMat.DenseMatrixValue[IdxSample],
								xi.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample]
							);
							MatrixOperation.VectorSubtractVector(
								TmpDenseMat.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample],
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample]
							);
							MatrixOperation.ElementwiseVectorMultiplyVector(
								tmp_theta_xi.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample]
							); // tmp_theta_xi is tmp1 in matlab code
							TmpDenseRowVec.VectorValue[IdxSample] = tmp_theta_xi.DenseMatrixValue[IdxSample].Sum();
							MatrixOperation.ScalarAddVector(
								xi.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample],
								TmpDenseRowVec.VectorValue[IdxSample] * (-1.0f)
							);
						}

					}
				});


			// -------- Compute the gradients --------
			// ---- Gradient with respect to U ----
			DenseMatrix Theta_Top = new DenseMatrix(nHid, BatchSize);
			for (int IdxSample = 0; IdxSample < BatchSize; ++IdxSample )
			{
				Theta_Top.DenseMatrixValue[IdxSample] = DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample];
			}
			switch (OutputType)
			{
			case "softmaxCE":
				// ---- grad Q wrt U ----
				MatrixOperation.MatrixMultiplyMatrixTranspose(Grad.grad_Q_U, grad_Q_po, Theta_Top);
				MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_U, (1.0f / (float)BatchSize));
				break;
			case "linearQuad":
				// ---- grad Q wrt U ----
				MatrixOperation.MatrixMultiplyMatrixTranspose(Grad.grad_Q_U, grad_Q_po, Theta_Top);
				MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_U, (1.0f / (float)BatchSize));
				break;
			case "unsupLDA":
				// ---- grad Q wrt Phi on top ----
				MatrixOperation.MatrixMultiplyMatrixTranspose(Grad.grad_Q_TopPhi, grad_Q_po_Sparse, Theta_Top, false);
				MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_TopPhi, Grad.grad_Q_TopPhi, (-1.0f / (float)BatchSize));
				break;
			case "linearCE":
				throw new Exception("linearCE is not implemented.");
				//break;
			default:
				throw new Exception("Unknown OutputType");
			}
			// ---- Gradient with respect to Phi ----
			TmpGrad.SetAllValuesToZero();
			MatrixOperation.MatrixMultiplyMatrixTranspose(TmpGrad, tmp_Xt_OVER_Phitheta_pool, tmp_theta_xi_pool, true);
			MatrixOperation.MatrixMultiplyMatrixTranspose(TmpGrad, TmpSparseMat_pool, theta_l_minus_one, true);
			MatrixOperation.ScalarMultiplyMatrix(TmpGrad, TmpGrad, (1.0f / (float)BatchSize));
			MatrixOperation.MatrixAddMatrix(Grad.grad_Q_Phi, TmpGrad);

		}