コード例 #1
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		/*
         * Back propagation of the unfolded LDA model (Mirror descent approach)
         */
		// Implemented without atomic operation
		public static void BackPropagation_LDA(SparseMatrix Xt, SparseMatrix Dt, DNNRun_t DNNRun, paramModel_t paramModel, Grad_t Grad)
		{
			// -------- Extract parameters --------
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			int nOutput = paramModel.nOutput;
			float To = paramModel.To;
			string OutputType = paramModel.OutputType;
			int BatchSize = Xt.nCols;
			int nInput = paramModel.nInput;



			// -------- Back propagation --------
			DenseMatrix grad_Q_po = new DenseMatrix(DNNRun.y);
			SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
			SparseMatrix grad_Q_po_Sparse = new SparseMatrix(Xt);
			DenseMatrix xi = new DenseMatrix(nHid, BatchSize);
			DenseMatrix TmpDenseMat = new DenseMatrix(nHid, BatchSize);
			DenseMatrix ThetaRatio = new DenseMatrix(nHid, BatchSize);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(BatchSize);            
			DenseMatrix tmp_theta_xi_b_T_OVER_theta_lm1_2 = new DenseMatrix(nHid, BatchSize);
			SparseMatrix tmp_Xt_OVER_Phitheta = new SparseMatrix(Xt);
			SparseMatrix tmp_Phi_theta_xi = new SparseMatrix(Xt);
			Grad.grad_Q_Phi.ClearValue();
			// ---- Offset of effective number of layers ----
			int[] OffsetEffNumLayer = new int[BatchSize];
			OffsetEffNumLayer[0] = 0;
			int NumTotalLayer = DNNRun.nHidLayerEffective[0];
			for (int IdxSample = 1; IdxSample < BatchSize; ++IdxSample)
			{
				OffsetEffNumLayer[IdxSample] = OffsetEffNumLayer[IdxSample - 1] + DNNRun.nHidLayerEffective[IdxSample-1];
				NumTotalLayer += DNNRun.nHidLayerEffective[IdxSample];
			}
			// ---- Temporary variables that stores the intermediate results for computing the gradients ----
			DenseMatrix tmp_theta_xi_pool = new DenseMatrix(nHid, NumTotalLayer, 0.0f);
			DenseMatrix tmp_theta_xi = new DenseMatrix(nHid, BatchSize, 0.0f);
			DenseMatrix theta_l_minus_one = new DenseMatrix(nHid, NumTotalLayer, 0.0f);
			SparseMatrix tmp_Xt_OVER_Phitheta_pool = new SparseMatrix(nInput, NumTotalLayer);
			SparseMatrix TmpSparseMat_pool = new SparseMatrix(nInput, NumTotalLayer);
			int NumTotalNz = 0;
			for (int IdxSample = 0; IdxSample < BatchSize; ++IdxSample)
			{
				int Layer_begin = OffsetEffNumLayer[IdxSample];
				int Layer_end = Layer_begin + DNNRun.nHidLayerEffective[IdxSample];
				SparseColumnVector[] tmp1 = tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors;
				SparseColumnVector[] tmp2 = TmpSparseMat_pool.SparseColumnVectors;
				SparseColumnVector xt = Xt.SparseColumnVectors[IdxSample];
				NumTotalNz += xt.nNonzero;
				for (int IdxLayer = Layer_begin; IdxLayer < Layer_end; ++IdxLayer)
				{
					tmp1[IdxLayer] = new SparseColumnVector(xt);
					tmp2[IdxLayer] = new SparseColumnVector(xt);
				}
			}
			int[] SparsePatternGradPhi = Xt.GetHorizontalUnionSparsePattern();
			SparseMatrix TmpGrad = new SparseMatrix(nInput, nHid, true);
			TmpGrad.SetSparsePatternForAllColumn(SparsePatternGradPhi);
			// ---- Compute grad Q wrt po if possible ----
			switch (OutputType)
			{
			case "softmaxCE":
				MatrixOperation.MatrixSubtractMatrix(grad_Q_po, Dt);
				MatrixOperation.ScalarMultiplyMatrix(grad_Q_po, To);
				Grad.grad_Q_U.ClearValue();
				break;
			case "linearQuad":
				MatrixOperation.MatrixSubtractMatrix(grad_Q_po, Dt);
				MatrixOperation.ScalarMultiplyMatrix(grad_Q_po, 2.0f);
				Grad.grad_Q_U.ClearValue();
				break;
			case "unsupLDA":
				Grad.grad_Q_TopPhi.SetAllValuesToZero();
				break;
			case "linearCE":
				throw new Exception("linearCE is not implemented.");
			default:
				throw new Exception("Unknown OutputType");
			}
			Parallel.For(0, BatchSize, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxSample =>
				{
					// ***************************************************************************

					// -------- Back propagation: top layer --------                    
					switch (OutputType)
					{
					case "softmaxCE":
						// ---- grad Q wrt pL (x_L) ----
						MatrixOperation.MatrixTransposeMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							paramModel.U,
							grad_Q_po.DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample]
						);
						TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarAddVector(
							xi.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							TmpDenseRowVec.VectorValue[IdxSample] * (-1.0f)
						);
						break;
					case "linearQuad":
						// ---- grad Q wrt pL (x_L) ----
						MatrixOperation.MatrixTransposeMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							paramModel.U,
							grad_Q_po.DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample]
						);
						TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarAddVector(
							xi.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							(-1.0f) * TmpDenseRowVec.VectorValue[IdxSample]
						);
						break;
					case "unsupLDA":
						// ---- grad Q wrt po ----
						MatrixOperation.MatrixMultiplyVector(
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample],
							paramModel.Phi,
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ElementwiseVectorDivideVector(
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample],
							Xt.SparseColumnVectors[IdxSample],
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample]
						);
						// ---- grad Q wrt pL (x_L) ----
						MatrixOperation.MatrixTransposeMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							paramModel.Phi,
							grad_Q_po_Sparse.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.ScalarMultiplyVector(
							xi.DenseMatrixValue[IdxSample],
							-1.0f
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarAddVector(
							xi.DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample],
							(-1.0f) * TmpDenseRowVec.VectorValue[IdxSample]
						);
						break;
					case "linearCE":
						throw new Exception("linearCE is not implemented.");
						//break;
					default:
						throw new Exception("Unknown OutputType");
					}


					// ***************************************************************************

					// -------- Back propagation: hidden layers --------
					for (int IdxLayer = DNNRun.nHidLayerEffective[IdxSample] - 1; IdxLayer >= 0; IdxLayer--)
					{
						// ---- Compute the position in the temporary variable for the current layer at the current sample ----
						int IdxTmpVar = OffsetEffNumLayer[IdxSample] + IdxLayer;
						// ---- grad wrt b ---
						// Not implemented at the moment. (Can be used to update the Dirichlet parameter automatically.)
						// ---- Compute the intermediate variables ----
						MatrixOperation.ElementwiseVectorMultiplyVector(
							tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar],
							DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
							xi.DenseMatrixValue[IdxSample]
						);
						if (IdxLayer == 0)
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar],
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						if (IdxLayer == 0)
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.ElementwiseVectorDivideVector(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						MatrixOperation.ElementwiseVectorMultiplyVector(
							tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
							paramModel.b
						);
						MatrixOperation.ScalarMultiplyVector(
							tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]
						);
						// Reset the elements to zero if theta_{l-1} is zero at these positions (mainly for alpha<1 case)
						if (IdxLayer > 0)
						{
							MatrixOperation.ResetVectorSparsePattern(
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						// Continue to intermediate variable computation
						if (IdxLayer == 0) // TmpSparseMat is Phitheta_lm1
						{
							MatrixOperation.MatrixMultiplyVector(
								TmpSparseMat.SparseColumnVectors[IdxSample],
								paramModel.Phi,
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.MatrixMultiplyVector(
								TmpSparseMat.SparseColumnVectors[IdxSample],
								paramModel.Phi,
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						MatrixOperation.ElementwiseVectorDivideVector(
							tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors[IdxTmpVar],
							Xt.SparseColumnVectors[IdxSample],
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.ElementwiseVectorDivideVector(
							TmpSparseMat.SparseColumnVectors[IdxSample],
							tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors[IdxTmpVar],
							TmpSparseMat.SparseColumnVectors[IdxSample]
						); // TmpSparseMat is tmp_Xt_OVER_Phitheta2
						MatrixOperation.MatrixMultiplyVector(
							tmp_Phi_theta_xi.SparseColumnVectors[IdxSample],
							paramModel.Phi,
							tmp_theta_xi_pool.DenseMatrixValue[IdxTmpVar]
						);
						MatrixOperation.ElementwiseVectorMultiplyVector(
							TmpSparseMat.SparseColumnVectors[IdxSample],
							tmp_Phi_theta_xi.SparseColumnVectors[IdxSample]
						); // TmpSparseMat is ( tmp_Phi_theta_xi.*tmp_Xt_OVER_Phitheta2 )
						MatrixOperation.MatrixTransposeMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							paramModel.Phi,
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.ScalarMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]
						); // TmpDenseMat is tmp_Tl_Phit_xtPhiTheta2_Phi_theta_xi
						// ---- Compute the gradient wrt Phi ----     
						MatrixOperation.ScalarMultiplyVector(
							tmp_Xt_OVER_Phitheta_pool.SparseColumnVectors[IdxTmpVar],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]
						);
						MatrixOperation.ScalarMultiplyVector(
							TmpSparseMat_pool.SparseColumnVectors[IdxTmpVar],
							TmpSparseMat.SparseColumnVectors[IdxSample],
							DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample]*(-1.0f)
						);                      
						if (IdxLayer == 0)
						{
							theta_l_minus_one.DenseMatrixValue[IdxTmpVar] = DNNRun.theta0.DenseMatrixValue[IdxSample];
						}
						else
						{
							theta_l_minus_one.DenseMatrixValue[IdxTmpVar] = DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample];
						}                    
						// ---- Compute xi_{l-1} via back propagation ----
						if (IdxLayer > 0)
						{
							// Reset the elements to zero if theta_{l-1} is zero at these positions (mainly for alpha<1 case)
							MatrixOperation.ElementwiseVectorDivideVector(
								ThetaRatio.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
							MatrixOperation.ResetVectorSparsePattern(
								ThetaRatio.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
							MatrixOperation.ElementwiseVectorMultiplyVector(
								xi.DenseMatrixValue[IdxSample],
								xi.DenseMatrixValue[IdxSample],
								ThetaRatio.DenseMatrixValue[IdxSample]
							);
							// Compute xi_{l-1} now
							MatrixOperation.VectorSubtractVector(
								TmpDenseMat.DenseMatrixValue[IdxSample],
								xi.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample]
							);
							MatrixOperation.VectorSubtractVector(
								TmpDenseMat.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample],
								tmp_theta_xi_b_T_OVER_theta_lm1_2.DenseMatrixValue[IdxSample]
							);
							MatrixOperation.ElementwiseVectorMultiplyVector(
								tmp_theta_xi.DenseMatrixValue[IdxSample],
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample]
							); // tmp_theta_xi is tmp1 in matlab code
							TmpDenseRowVec.VectorValue[IdxSample] = tmp_theta_xi.DenseMatrixValue[IdxSample].Sum();
							MatrixOperation.ScalarAddVector(
								xi.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample],
								TmpDenseRowVec.VectorValue[IdxSample] * (-1.0f)
							);
						}

					}
				});


			// -------- Compute the gradients --------
			// ---- Gradient with respect to U ----
			DenseMatrix Theta_Top = new DenseMatrix(nHid, BatchSize);
			for (int IdxSample = 0; IdxSample < BatchSize; ++IdxSample )
			{
				Theta_Top.DenseMatrixValue[IdxSample] = DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample];
			}
			switch (OutputType)
			{
			case "softmaxCE":
				// ---- grad Q wrt U ----
				MatrixOperation.MatrixMultiplyMatrixTranspose(Grad.grad_Q_U, grad_Q_po, Theta_Top);
				MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_U, (1.0f / (float)BatchSize));
				break;
			case "linearQuad":
				// ---- grad Q wrt U ----
				MatrixOperation.MatrixMultiplyMatrixTranspose(Grad.grad_Q_U, grad_Q_po, Theta_Top);
				MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_U, (1.0f / (float)BatchSize));
				break;
			case "unsupLDA":
				// ---- grad Q wrt Phi on top ----
				MatrixOperation.MatrixMultiplyMatrixTranspose(Grad.grad_Q_TopPhi, grad_Q_po_Sparse, Theta_Top, false);
				MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_TopPhi, Grad.grad_Q_TopPhi, (-1.0f / (float)BatchSize));
				break;
			case "linearCE":
				throw new Exception("linearCE is not implemented.");
				//break;
			default:
				throw new Exception("Unknown OutputType");
			}
			// ---- Gradient with respect to Phi ----
			TmpGrad.SetAllValuesToZero();
			MatrixOperation.MatrixMultiplyMatrixTranspose(TmpGrad, tmp_Xt_OVER_Phitheta_pool, tmp_theta_xi_pool, true);
			MatrixOperation.MatrixMultiplyMatrixTranspose(TmpGrad, TmpSparseMat_pool, theta_l_minus_one, true);
			MatrixOperation.ScalarMultiplyMatrix(TmpGrad, TmpGrad, (1.0f / (float)BatchSize));
			MatrixOperation.MatrixAddMatrix(Grad.grad_Q_Phi, TmpGrad);

		}
コード例 #2
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void ElementwiseVectorDivideVector(DenseRowVector z, DenseRowVector x, DenseRowVector y)
		{
			if (z.Dim != x.Dim || z.Dim != y.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}
			var zVal = z.VectorValue;
			var xVal = x.VectorValue;
			var yVal = y.VectorValue;
			int zDim = z.Dim;
			for (int IdxRow = 0; IdxRow < zDim; ++IdxRow)
			{
				zVal[IdxRow] = xVal[IdxRow] / (yVal[IdxRow]+1e-12f);
			}
		}
コード例 #3
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * z = z - x: vector subtracts vector
         */
		public static void VectorSubtractVector(DenseRowVector z, DenseRowVector x)
		{
			// Dimension check
			if (z.Dim != x.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}
			// Computation
			var zVal = z.VectorValue;
			var xVal = x.VectorValue;
			int Dim = z.Dim;
			for (int IdxCol=0; IdxCol<Dim; ++IdxCol)
			{
				zVal[IdxCol] -= xVal[IdxCol];
			}
		}
コード例 #4
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public DenseRowVector(DenseRowVector SourceVector)
		{
			Dim = SourceVector.Dim;
			VectorValue = new float[Dim];
			DeepCopyFrom(SourceVector);
		}
コード例 #5
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * z = x.^{1/2}
         */
		public static void ElementwiseSquareRoot(DenseRowVector z, DenseRowVector x)
		{
			if (z.Dim != x.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}
			int zDim = z.Dim;
			var zVal = z.VectorValue;
			var xVal = x.VectorValue;
			for (int Idx = 0; Idx < zDim; ++Idx )
			{
				zVal[Idx] = (float)Math.Sqrt(xVal[Idx]);
			}
		}
コード例 #6
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * Count the values less than a certain value at each column of a matrix
         */
		public static void CountValuesLessThanThreshold(DenseRowVector NumSpecialElementPerCol, DenseMatrix X, float Threshold)
		{
			if (NumSpecialElementPerCol.Dim != X.nCols)
			{
				throw new Exception("Dimension mismatch.");
			}

			for (int IdxCol = 0; IdxCol < X.nCols; IdxCol++ )
			{
				NumSpecialElementPerCol.VectorValue[IdxCol] = 0.0f;
				for (int IdxRow = 0; IdxRow < X.nRows; IdxRow++)
				{                    
					if (X.DenseMatrixValue[IdxCol].VectorValue[IdxRow]<Threshold)
					{
						NumSpecialElementPerCol.VectorValue[IdxCol]++;
					}
				}
			}
		}
コード例 #7
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void ProjCols2SimplexPlane(DenseMatrix Z, DenseMatrix X)
		{
			if (Z.nCols != X.nCols || Z.nRows != X.nRows)
			{
				throw new Exception("Dimension mismatch.");
			}
			DenseRowVector TmpDenseRowVec = new DenseRowVector(X.nCols);
			MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, X);
			MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / ((float)X.nRows));
			MatrixOperation.bsxfunMatrixSubtractVector(Z, X, TmpDenseRowVec);
			MatrixOperation.ScalarAddMatrix(Z, 1.0f / ((float)X.nRows));
		}
コード例 #8
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		/*
         * Training: unsupervised learning of feedforward (unfolding) LDA by back propagation
         */
		public static void TrainingBP_LDA(
			SparseMatrix TrainData,
			SparseMatrix TestData,
			paramModel_t paramModel,
			paramTrain_t paramTrain,
			string ModelFile,
			string ResultFile
		)
		{
			// ---- Extract the parameters ----
			// Model parameters
			int nInput = paramModel.nInput;
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			int nOutput = paramModel.nOutput;
			float eta = paramModel.eta;
			float T_value = paramModel.T_value;
			string OutputType = paramModel.OutputType;
			float beta = paramModel.beta;
			// Training parameters
			int nEpoch = paramTrain.nEpoch;
			float mu_Phi = paramTrain.mu_Phi;
			float mu_U = paramTrain.mu_U;
			int nTrain = paramTrain.nTrain;
			float mu_Phi_ReduceFactor = paramTrain.mu_Phi_ReduceFactor;
			string LearnRateSchedule = paramTrain.LearnRateSchedule;
			int nSamplesPerDisplay = paramTrain.nSamplesPerDisplay;
			int nEpochPerSave = paramTrain.nEpochPerSave;
			int nEpochPerTest = paramTrain.nEpochPerTest;
			int nEpochPerDump = paramTrain.nEpochPerDump;

			// ---- Initialize the model ----
			ModelInit_LDA_Feedforward(paramModel);

			// ---- Initialize the training algorithm ----
			Console.WriteLine("#################################################################");
			Console.WriteLine("jvking version of  BP-LDA: Mirror-Descent Back Propagation");
			Console.WriteLine("#################################################################");
			float TotLoss = 0.0f;
			float TotCE = 0.0f;
			double TotTime = 0.0f;
			double TotTimeThisEpoch = 0.0f;
			int TotSamples = 0;
			int TotSamplesThisEpoch = 0;
			double AvgnHidLayerEffective = 0.0;
			int CntRunningAvg = 0;
			int CntModelUpdate = 0;
			DenseRowVector mu_phi_search = new DenseRowVector(nHid, mu_Phi);
			DenseRowVector TestLoss_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestLoss_epoch = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestLoss_time = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			int CountTest = 0;
			DenseRowVector G_Phi_pool = new DenseRowVector(paramModel.nHidLayer);
			DenseRowVector G_Phi_trunc_pool = new DenseRowVector(paramModel.nHidLayer, 0.0f);
			DenseRowVector AdaGradSum = new DenseRowVector(nHid, 0.0f);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(nHid, 0.0f);
			int[] SparsePatternGradPhi = null;
			float nLearnLineSearch = 0.0f;
			int[] IdxPerm = null;
			int BatchSize_NormalBatch = paramTrain.BatchSize;
			int BatchSize_tmp = paramTrain.BatchSize;
			int nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
			DNNRun_t DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun = null;
			Grad_t Grad = new Grad_t(nHid, nOutput, nInput, paramModel.nHidLayer, OutputType);
			DenseMatrix TmpGradDense = new DenseMatrix(nInput, nHid);
			DenseMatrix TmpMatDensePhi = new DenseMatrix(nInput, nHid);
			paramModel_t paramModel_avg = new paramModel_t(paramModel);
			Stopwatch stopWatch = new Stopwatch();
			// ---- Compute the schedule of the learning rate
			double[] stepsize_pool = null;
			switch (LearnRateSchedule)
			{
			case "PreCompute":
				stepsize_pool = PrecomputeLearningRateSchedule(nBatch, nEpoch, mu_Phi, mu_Phi / mu_Phi_ReduceFactor, 1e-8f);
				break;
			case "Constant":
				stepsize_pool = new double[nEpoch];
				for (int Idx = 0; Idx < nEpoch; Idx++)
				{
					stepsize_pool[Idx] = mu_Phi;
				}
				break;
			default:
				throw new Exception("Unknown type of LearnRateSchedule");
			}
			// Now start training.........................
			for (int epoch = 0; epoch < nEpoch; epoch++)
			{
				TotSamplesThisEpoch = 0;
				TotTimeThisEpoch = 0.0;
				AvgnHidLayerEffective = 0.0;
				// -- Set the batch size if there is schedule --
				if (paramTrain.flag_BachSizeSchedule)
				{
					if (paramTrain.BachSizeSchedule.TryGetValue(epoch + 1, out BatchSize_tmp))
					{
						BatchSize_NormalBatch = BatchSize_tmp;
						nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
						DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
						DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
					}
				}

				// -- Shuffle the data (generating shuffled index) --
				IdxPerm = Statistics.RandPerm(nTrain);
				// -- Reset the (MDA) inference step-sizes --
				if (epoch > 0)
				{
					for (int Idx = 0; Idx < paramModel.nHidLayer; Idx++)
					{
						paramModel.T[Idx] = T_value;
					}
				}
				// -- Take the learning rate for the current epoch --
				mu_Phi = (float)stepsize_pool[epoch];
				// -- Start this epoch --
				Console.WriteLine("############## Epoch #{0}. BatchSize: {1} Learning Rate: {2} ##################", epoch + 1, BatchSize_NormalBatch, mu_Phi);
				for (int IdxBatch = 0; IdxBatch < nBatch; IdxBatch++)
				{
					stopWatch.Start();
					// Extract the batch
					int BatchSize = 0;
					if (IdxBatch < nBatch - 1)
					{
						BatchSize = BatchSize_NormalBatch;
						DNNRun = DNNRun_NormalBatch;
					}
					else
					{
						BatchSize = nTrain - IdxBatch * BatchSize_NormalBatch;
						DNNRun = DNNRun_EndBatch;
					}
					SparseMatrix Xt = new SparseMatrix(nInput, BatchSize);
					SparseMatrix Dt = null;
					int[] IdxSample = new int[BatchSize];
					Array.Copy(IdxPerm, IdxBatch * BatchSize_NormalBatch, IdxSample, 0, BatchSize);
					TrainData.GetColumns(Xt, IdxSample);

					// Set the sparse pattern for the gradient
					SparsePatternGradPhi = Xt.GetHorizontalUnionSparsePattern();
					Grad.SetSparsePatternForAllGradPhi(SparsePatternGradPhi);

					// Forward activation
					LDA_Learn.ForwardActivation_LDA(Xt, DNNRun, paramModel, true);

					// Back propagation
					LDA_Learn.BackPropagation_LDA(Xt, Dt, DNNRun, paramModel, Grad);

					// Compute the gradient and update the model (All gradients of Phi are accumulated into Grad.grad_Q_Phi)
					MatrixOperation.ScalarDivideMatrix(Grad.grad_Q_Phi, (-1.0f) * ((beta - 1) / ((float)nTrain)), paramModel.Phi, true);
					MatrixOperation.MatrixAddMatrix(Grad.grad_Q_Phi, Grad.grad_Q_TopPhi);                  
					mu_phi_search.FillValue(mu_Phi);
					// Different learning rate for different columns of Phi: Similar to AdaGrad but does not decay with time
					++CntModelUpdate;
					MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpMatDensePhi, Grad.grad_Q_Phi, Grad.grad_Q_Phi);
					MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpMatDensePhi);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / ((float)nInput));
					MatrixOperation.VectorSubtractVector(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / CntModelUpdate);
					MatrixOperation.VectorAddVector(AdaGradSum, TmpDenseRowVec);
					MatrixOperation.ElementwiseSquareRoot(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarAddVector(TmpDenseRowVec, mu_Phi);
					MatrixOperation.ElementwiseVectorDivideVector(mu_phi_search, mu_phi_search, TmpDenseRowVec);
					nLearnLineSearch = SMD_Update(paramModel.Phi, Grad.grad_Q_Phi, mu_phi_search, eta);
					// Running average of the model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						++CntRunningAvg;
						MatrixOperation.MatrixSubtractMatrix(TmpMatDensePhi, paramModel.Phi, paramModel_avg.Phi);
						MatrixOperation.ScalarMultiplyMatrix(TmpMatDensePhi, 1.0f / CntRunningAvg);
						MatrixOperation.MatrixAddMatrix(paramModel_avg.Phi, TmpMatDensePhi);
					}


					// Display the result
					TotCE += ComputeCrossEntropy(Xt, paramModel.Phi,DNNRun.theta_pool, DNNRun.nHidLayerEffective);
					TotLoss = TotCE;
					TotSamples += BatchSize;
					TotSamplesThisEpoch += BatchSize;
					AvgnHidLayerEffective = (((float)(TotSamplesThisEpoch-BatchSize))/((float)TotSamplesThisEpoch))*AvgnHidLayerEffective
						+ (1.0/((float)TotSamplesThisEpoch))*( DNNRun.nHidLayerEffective.Sum());
					stopWatch.Stop();
					TimeSpan ts = stopWatch.Elapsed;
					TotTime += ts.TotalSeconds;
					TotTimeThisEpoch += ts.TotalSeconds;
					stopWatch.Reset();
					if (TotSamplesThisEpoch % nSamplesPerDisplay == 0)
					{
						// Display results
						Console.WriteLine(
							"* Ep#{0}/{1} Bat#{2}/{3}. Loss={4:F3}. CE={5:F3}.  Speed={6} Samples/Sec.",
							epoch + 1, nEpoch,
							IdxBatch + 1, nBatch,
							TotLoss / TotSamples, TotCE / TotSamples,
							(int)((double)TotSamplesThisEpoch / TotTimeThisEpoch)
						);
						if (paramTrain.DebugLevel == DebugLevel_t.medium)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine();
						}
						if (paramTrain.DebugLevel == DebugLevel_t.high)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine(
								"  AvgnHidLayerEff={0:F1}. G_Phi={1:F3}.",
								AvgnHidLayerEffective,
								Grad.grad_Q_Phi.MaxAbsValue()
							);
							Console.WriteLine();
						}


					}
				}
				// -- Test --
				if ((epoch + 1) % nEpochPerTest == 0)
				{
					TestLoss_epoch.VectorValue[(epoch + 1) / nEpochPerTest - 1] = epoch + 1;
					TestLoss_time.VectorValue[(epoch + 1) / nEpochPerTest - 1] = (float)TotTime;
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						TestLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1] = Testing_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test);
					}
					else
					{
						TestLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1] = Testing_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test);
					}
					CountTest++;
				}

				// -- Save --
				if ((epoch + 1) % nEpochPerSave == 0)
				{
					// Save model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						string PhiCol = null;
						(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
						StreamWriter FileSaveModel = new StreamWriter(ResultFile + ".model.Phi", false);
						for (int IdxCol = 0; IdxCol < paramModel_avg.Phi.nCols; IdxCol++)
						{
							PhiCol = String.Join("\t", paramModel_avg.Phi.DenseMatrixValue[IdxCol].VectorValue);
							FileSaveModel.WriteLine(PhiCol);
						}
						FileSaveModel.Close();
						// Save the final learning curves
						StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false);
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_epoch.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_time.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_pool.VectorValue));
						FileSavePerf.Close();
					}
					{
						string PhiCol = null;
						(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
						StreamWriter FileSaveModel = new StreamWriter(ResultFile + ".model.Phi", false);
						for (int IdxCol = 0; IdxCol < paramModel.Phi.nCols; IdxCol++)
						{
							PhiCol = String.Join("\t", paramModel.Phi.DenseMatrixValue[IdxCol].VectorValue);
							FileSaveModel.WriteLine(PhiCol);
						}
						FileSaveModel.Close();
						// Save the final learning curves
						StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false);
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_epoch.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_time.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_pool.VectorValue));
						FileSavePerf.Close();
					}
				}

				// -- Dump feature --
				if (paramTrain.flag_DumpFeature && (epoch + 1) % nEpochPerDump == 0)
				{
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						DumpingFeature_BP_LDA(TrainData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
					}
					{
						DumpingFeature_BP_LDA(TrainData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
					}
				}


			}
		}
コード例 #9
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		public static float ComputeSupervisedLoss(SparseMatrix Dt, SparseMatrix y, string OutputType)
		{
			if (Dt.nCols != y.nCols || Dt.nRows != y.nRows)
			{
				throw new Exception("The numbers of samples from label and prediction do not match.");
			}
			SparseMatrix SparseMat = new SparseMatrix(y);
			SparseMatrix TmpSparseMat = new SparseMatrix(Dt);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(Dt.nCols);
			float TrainingLoss = 0.0f;
			switch (OutputType)
			{
			case "softmaxCE":
				MatrixOperation.ScalarAddMatrix(SparseMat, y, 1e-20f);
				MatrixOperation.Log(SparseMat);
				MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpSparseMat, Dt, SparseMat);
				MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpSparseMat);
				TrainingLoss = TmpDenseRowVec.Sum() * (-1.0f);
				break;
			case "linearQuad":
				MatrixOperation.MatrixSubtractMatrix(SparseMat, Dt);
				MatrixOperation.ElementwiseSquare(SparseMat);
				MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, SparseMat);
				TrainingLoss = TmpDenseRowVec.Sum();
				break;
			case "linearCE":
				MatrixOperation.ScalarAddMatrix(SparseMat, y, 1e-20f);
				MatrixOperation.Log(SparseMat);
				MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpSparseMat, Dt, SparseMat);
				MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpSparseMat);
				TrainingLoss = TmpDenseRowVec.Sum() * (-1.0f);
				break;
			default:
				throw new Exception("Unknown OutputType.");
			}

			return TrainingLoss;
		}
コード例 #10
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		/*
         * Compute Regularized Cross Entropy between the reconstructed input and the actual input. (Loss funtion for the unsupervised learning case)
         */
		public static float ComputeRegularizedCrossEntropy(SparseMatrix Xt, DenseMatrix Phi, DenseMatrix theta_top, DenseColumnVector b)
		{
			SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(Xt.nCols);
			MatrixOperation.MatrixMultiplyMatrix(TmpSparseMat, Phi, theta_top);
			MatrixOperation.Log(TmpSparseMat);
			MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpSparseMat, Xt);
			MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpSparseMat);
			float CE = (-1.0f) * TmpDenseRowVec.VectorValue.Sum();
			DenseMatrix TmpDenseMat = new DenseMatrix(theta_top.nRows, theta_top.nCols);
			MatrixOperation.Log(TmpDenseMat, theta_top);
			MatrixOperation.bsxfunVectorMultiplyMatrix(TmpDenseMat, b);
			MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpDenseMat);
			CE = CE - TmpDenseRowVec.VectorValue.Sum();
			return CE;
		}
コード例 #11
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		/*
         * Training: supervised learning of feedforward (unfolding) LDA by back propagation
         */
		public static void TrainingBP_sLDA(
			SparseMatrix TrainData,
			SparseMatrix TrainLabel,
			SparseMatrix TestData,
			SparseMatrix TestLabel,
			SparseMatrix ValidData,
			SparseMatrix ValidLabel,
			paramModel_t paramModel,
			paramTrain_t paramTrain,
			string ModelFile,
			string ResultFile
		)
		{
			Console.WriteLine("*****************************************************************");
			Console.WriteLine("jvking version of BP-sLDA: Mirror-Descent Back Propagation");
			Console.WriteLine("*****************************************************************");
			// ---- Extract the parameters ----
			// Model parameters
			int nInput = paramModel.nInput;
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			int nOutput = paramModel.nOutput;
			float eta = paramModel.eta;
			float T_value = paramModel.T_value;
			string OutputType = paramModel.OutputType;
			float beta = paramModel.beta;
			// Training parameters
			int nEpoch = paramTrain.nEpoch;
			float mu_Phi = paramTrain.mu_Phi;
			float mu_U = paramTrain.mu_U;
			int nTrain = paramTrain.nTrain;
			float mu_ReduceFactor = paramTrain.mu_Phi_ReduceFactor;
			string LearnRateSchedule = paramTrain.LearnRateSchedule;
			int nSamplesPerDisplay = paramTrain.nSamplesPerDisplay;
			int nEpochPerSave = paramTrain.nEpochPerSave;
			int nEpochPerTest = paramTrain.nEpochPerTest;
			int nEpochPerDump = paramTrain.nEpochPerDump;


			// ---- Initialize the model ----            
			ModelInit_LDA_Feedforward(paramModel);

			// ---- Initialize the training algorithm ----
			float TotLoss = 0.0f;
			float TotTrErr = 0.0f;
			double TotTime = 0.0f;
			double TotTimeThisEpoch = 0.0f;
			int TotSamples = 0;
			int TotSamplesThisEpoch = 0;
			float CntRunningAvg = 0.0f;
			float CntModelUpdate = 0.0f;
			double AvgnHidLayerEffective = 0.0f;
			DenseRowVector mu_phi_search = new DenseRowVector(nHid, mu_Phi);
			DenseRowVector mu_U_search = new DenseRowVector(nHid, mu_U);
			DenseRowVector AdaGradSum = new DenseRowVector(nHid, 0.0f);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(nHid, 0.0f);
			DenseRowVector TestError_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector ValidError_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TrainError_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TrainLoss_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestError_epoch = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestError_time = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			int CountTest = 0;
			float nLearnLineSearch = 0.0f;
			int[] IdxPerm = null;
			int BatchSize_NormalBatch = paramTrain.BatchSize;
			int BatchSize_tmp = paramTrain.BatchSize;
			int nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
			DNNRun_t DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun = null;
			Grad_t Grad = new Grad_t(nHid, nOutput, nInput, paramModel.nHidLayer, OutputType);
			SparseMatrix TmpGrad = new SparseMatrix(nInput, nHid, true);
			DenseMatrix TmpMatDensePhi = new DenseMatrix(nInput, nHid);
			DenseMatrix TmpMatDenseU = new DenseMatrix(nOutput, nHid);
			paramModel_t paramModel_avg = new paramModel_t(paramModel);          
			Stopwatch stopWatch = new Stopwatch();            
			// ---- Compute the schedule of the learning rate
			double[] stepsize_pool_Phi = null;
			double[] stepsize_pool_U = null;
			switch (LearnRateSchedule)
			{
			case "PreCompute":
				stepsize_pool_Phi = PrecomputeLearningRateSchedule(nBatch, nEpoch, mu_Phi, mu_Phi / mu_ReduceFactor, 1e-8f);
				stepsize_pool_U = PrecomputeLearningRateSchedule(nBatch, nEpoch, mu_U, mu_U / mu_ReduceFactor, 1e-8f);
				break;
			case "Constant":
				stepsize_pool_Phi = new double[nEpoch];
				stepsize_pool_U = new double[nEpoch];
				for (int Idx = 0; Idx < nEpoch; Idx++)
				{
					stepsize_pool_Phi[Idx] = mu_Phi;
					stepsize_pool_U[Idx] = mu_U;
				}
				break;
			default:
				throw new Exception("Unknown type of LearnRateSchedule");
			}
			// Now start training.........................
			for (int epoch = 0; epoch < nEpoch; epoch++)
			{
				TotSamplesThisEpoch = 0;
				TotTimeThisEpoch = 0.0;
				AvgnHidLayerEffective = 0.0f;
				// -- Set the batch size if there is schedule --
				if (paramTrain.flag_BachSizeSchedule)
				{
					if (paramTrain.BachSizeSchedule.TryGetValue(epoch + 1, out BatchSize_tmp))
					{
						BatchSize_NormalBatch = BatchSize_tmp;
						nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
						DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
						DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
					}
				}

				// -- Shuffle the data (generating shuffled index) --
				IdxPerm = Statistics.RandPerm(nTrain);
				// -- Reset the (MDA) inference step-sizes --
				if (epoch > 0)
				{
					for (int Idx = 0; Idx < paramModel.nHidLayer; Idx++)
					{
						paramModel.T[Idx] = T_value;
					}
				}
				// -- Take the learning rate for the current epoch --
				mu_Phi = (float)stepsize_pool_Phi[epoch];
				mu_U = (float)stepsize_pool_U[epoch];
				// -- Start this epoch --
				Console.WriteLine("############## Epoch #{0}. BatchSize: {1} Learning Rate: Phi:{2}, U:{3} ##################",
					epoch + 1, BatchSize_NormalBatch, mu_Phi, mu_U);
				for (int IdxBatch = 0; IdxBatch < nBatch; IdxBatch++)
				{
					stopWatch.Start();
					// Extract the batch
					int BatchSize = 0;
					if (IdxBatch < nBatch - 1)
					{
						BatchSize = BatchSize_NormalBatch;
						DNNRun = DNNRun_NormalBatch;
					}
					else
					{
						BatchSize = nTrain - IdxBatch * BatchSize_NormalBatch;
						DNNRun = DNNRun_EndBatch;
					}
					SparseMatrix Xt = new SparseMatrix(nInput, BatchSize);
					SparseMatrix Dt = new SparseMatrix(nOutput, BatchSize);
					int[] IdxSample = new int[BatchSize];
					Array.Copy(IdxPerm, IdxBatch * BatchSize_NormalBatch, IdxSample, 0, BatchSize);
					TrainData.GetColumns(Xt, IdxSample);
					TrainLabel.GetColumns(Dt, IdxSample);

					// Forward activation
					LDA_Learn.ForwardActivation_LDA(Xt, DNNRun, paramModel, true);

					// Back propagation
					LDA_Learn.BackPropagation_LDA(Xt, Dt, DNNRun, paramModel, Grad);

					// Compute the gradient and update the model (All gradients of Phi are accumulated into Grad.grad_Q_Phi)
					// (i) Update Phi
					MatrixOperation.ScalarDivideMatrix(Grad.grad_Q_Phi, (-1.0f) * ((beta - 1) / ((float)nTrain)), paramModel.Phi, true);       
					mu_phi_search.FillValue(mu_Phi);
					// Different learning rate for different columns of Phi: Similar to AdaGrad but does not decay with time
					++CntModelUpdate;
					MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpMatDensePhi, Grad.grad_Q_Phi, Grad.grad_Q_Phi);
					MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpMatDensePhi);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / ((float)nInput));
					MatrixOperation.VectorSubtractVector(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / CntModelUpdate);
					MatrixOperation.VectorAddVector(AdaGradSum, TmpDenseRowVec);
					MatrixOperation.ElementwiseSquareRoot(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarAddVector(TmpDenseRowVec, mu_Phi);
					MatrixOperation.ElementwiseVectorDivideVector(mu_phi_search, mu_phi_search, TmpDenseRowVec);
					nLearnLineSearch = SMD_Update(paramModel.Phi, Grad.grad_Q_Phi, mu_phi_search, eta);
					// (ii) Update U                    
					MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_U, (-1.0f) * mu_U);
					MatrixOperation.MatrixAddMatrix(paramModel.U, Grad.grad_Q_U);
					// (iii) Running average of the model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch)/2.0f))
					{
						++CntRunningAvg;
						MatrixOperation.MatrixSubtractMatrix(TmpMatDensePhi, paramModel.Phi, paramModel_avg.Phi);
						MatrixOperation.MatrixSubtractMatrix(TmpMatDenseU, paramModel.U, paramModel_avg.U);
						MatrixOperation.ScalarMultiplyMatrix(TmpMatDensePhi, 1.0f / CntRunningAvg);
						MatrixOperation.ScalarMultiplyMatrix(TmpMatDenseU, 1.0f / CntRunningAvg);
						MatrixOperation.MatrixAddMatrix(paramModel_avg.Phi, TmpMatDensePhi);
						MatrixOperation.MatrixAddMatrix(paramModel_avg.U, TmpMatDenseU);
					}

					// Display the result
					TotTrErr += 100 * ComputeNumberOfErrors(Dt, DNNRun.y);
					TotLoss += ComputeSupervisedLoss(Dt, DNNRun.y, paramModel.OutputType);
					TotSamples += BatchSize;
					TotSamplesThisEpoch += BatchSize;
					AvgnHidLayerEffective =
						(((double)(TotSamplesThisEpoch - BatchSize)) / ((double)TotSamplesThisEpoch)) * AvgnHidLayerEffective 
						+
						1.0 / ((double)TotSamplesThisEpoch) * DNNRun.nHidLayerEffective.Sum();
					stopWatch.Stop();
					TimeSpan ts = stopWatch.Elapsed;
					TotTime += ts.TotalSeconds;
					TotTimeThisEpoch += ts.TotalSeconds;
					stopWatch.Reset();
					if (TotSamplesThisEpoch % nSamplesPerDisplay == 0)
					{
						// Display results
						Console.WriteLine(
							"* Ep#{0}/{1} Bat#{2}/{3}. Loss={4:F3}. TrErr={5:F3}%. Speed={6} Samples/Sec.",
							epoch + 1, nEpoch,
							IdxBatch + 1, nBatch,
							TotLoss / TotSamples, TotTrErr / TotSamples,
							(int)((double)TotSamplesThisEpoch / TotTimeThisEpoch)
						);
						if (paramTrain.DebugLevel == DebugLevel_t.medium)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine();
						}
						if (paramTrain.DebugLevel == DebugLevel_t.high)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							float MaxAbsVal_Grad_Q_Phi = Grad.grad_Q_Phi.MaxAbsValue();
							float MaxAbsVal_Grad_Q_U = Grad.grad_Q_U.MaxAbsValue();
							Console.WriteLine(
								"  AvgnHidLayerEff={0:F1}. G_Phi={1:F3}. G_U={2:F3}",
								AvgnHidLayerEffective,
								MaxAbsVal_Grad_Q_Phi,
								MaxAbsVal_Grad_Q_U
							);
							// Save the screen into a log file
							(new FileInfo(ResultFile + ".log")).Directory.Create();
							using (StreamWriter LogFile = File.AppendText(ResultFile + ".log"))
							{
								LogFile.WriteLine(
									"- Ep#{0}/{1} Bat#{2}/{3}. Loss={4:F3}. TrErr={5:F3}%. Speed={6} Samples/Sec.",
									epoch + 1, nEpoch,
									IdxBatch + 1, nBatch,
									TotLoss / TotSamples, TotTrErr / TotSamples,
									(int)((double)TotSamplesThisEpoch / TotTimeThisEpoch)
								);
								LogFile.WriteLine(
									"  muPhiMax={0} \n  muPhiMin={1}",
									mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
								);
								LogFile.WriteLine(
									"  AvgnHidLayerEff={0:F1}. G_Phi={1:F3}. G_U={2:F3}",
									AvgnHidLayerEffective,
									MaxAbsVal_Grad_Q_Phi,
									MaxAbsVal_Grad_Q_U
								);
								Console.WriteLine();
							}
							Console.WriteLine();
						}

					}
				}
				// -- Test --
				if ((epoch + 1) % nEpochPerTest == 0)
				{
					// Standard performance metric
					TestError_epoch.VectorValue[(epoch + 1) / nEpochPerTest - 1] = epoch + 1;
					TestError_time.VectorValue[(epoch + 1) / nEpochPerTest - 1] = (float)TotTime;
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						if (paramTrain.flag_HasValidSet)
						{
							ValidError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
							= Testing_BP_sLDA(
								ValidData, 
								ValidLabel, 
								paramModel_avg, 
								paramTrain.BatchSize_Test, 
								ResultFile + ".validscore", 
								"Validation Set"
							);
						}
						TestError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
						= Testing_BP_sLDA(
							TestData, 
							TestLabel, 
							paramModel_avg, 
							paramTrain.BatchSize_Test, 
							ResultFile + ".testscore", 
							"Test Set"
						);                        
					}
					else
					{
						if (paramTrain.flag_HasValidSet)
						{
							ValidError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
							= Testing_BP_sLDA(
								ValidData, 
								ValidLabel, 
								paramModel, 
								paramTrain.BatchSize_Test, 
								ResultFile + ".validscore", 
								"Validation Set"
							);
						}
						TestError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
						= Testing_BP_sLDA(
							TestData, 
							TestLabel, 
							paramModel, 
							paramTrain.BatchSize_Test, 
							ResultFile + ".testscore", 
							"Test Set"
						);
					}
					TrainError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
					= TotTrErr / TotSamples;
					TrainLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
					= TotLoss / TotSamples;

					// Performance metric evaluated using external evaluation tools, e.g., AUC, Top@K accuracy, etc.
					if (paramTrain.flag_ExternalEval)
					{
						ExternalEvaluation(
							paramTrain.ExternalEval, 
							ResultFile, 
							paramTrain.TestLabelFile, 
							epoch, 
							"Test Set"
						);
						if (paramTrain.flag_HasValidSet)
						{
							ExternalEvaluation(
								paramTrain.ExternalEval, 
								ResultFile, 
								paramTrain.ValidLabelFile, 
								epoch, 
								"Validation Set"
							);
						}
					}

					CountTest++;
				}

				// -- Save --
				if ((epoch + 1) % nEpochPerSave == 0)
				{
					// Save model
					string PhiCol = null;
					string UCol = null;
					(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
					string ModelName_Phi;
					string ModelName_U;
					if (paramTrain.flag_SaveAllModels)
					{
						ModelName_Phi = ResultFile + ".model.Phi" + ".iter" + (epoch + 1).ToString();
						ModelName_U = ResultFile + ".model.U" + ".iter" + (epoch + 1).ToString();
					}
					else
					{
						ModelName_Phi = ResultFile + ".model.Phi";
						ModelName_U = ResultFile + ".model.U";
					}
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						using (StreamWriter FileSaveModel_Phi = new StreamWriter(ModelName_Phi, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel_avg.Phi.nCols; IdxCol++)
							{
								PhiCol = String.Join("\t", paramModel_avg.Phi.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_Phi.WriteLine(PhiCol);
							}
						}
						using (StreamWriter FileSaveModel_U = new StreamWriter(ModelName_U, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel_avg.U.nCols; IdxCol++)
							{
								UCol = String.Join("\t", paramModel_avg.U.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_U.WriteLine(UCol);
							}
						}
					}
					else
					{
						using (StreamWriter FileSaveModel_Phi = new StreamWriter(ModelName_Phi, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel.Phi.nCols; IdxCol++)
							{
								PhiCol = String.Join("\t", paramModel.Phi.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_Phi.WriteLine(PhiCol);
							}
						}
						using (StreamWriter FileSaveModel_U = new StreamWriter(ModelName_U, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel.U.nCols; IdxCol++)
							{
								UCol = String.Join("\t", paramModel.U.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_U.WriteLine(UCol);
							}
						}
					}
					// Save the final learning curves
					using (StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false))
					{
						FileSavePerf.Write("Epoch:\t");
						FileSavePerf.WriteLine(String.Join("\t", TestError_epoch.VectorValue));
						FileSavePerf.Write("TrainTime:\t");
						FileSavePerf.WriteLine(String.Join("\t", TestError_time.VectorValue));
						if (paramTrain.flag_HasValidSet)
						{
							FileSavePerf.Write("Validation:\t");
							FileSavePerf.WriteLine(String.Join("\t", ValidError_pool.VectorValue));
						}
						FileSavePerf.Write("Test:\t");
						FileSavePerf.WriteLine(String.Join("\t", TestError_pool.VectorValue));
						FileSavePerf.Write("TrainError:\t");
						FileSavePerf.WriteLine(String.Join("\t", TrainError_pool.VectorValue));
						FileSavePerf.Write("TrainLoss:\t");
						FileSavePerf.WriteLine(String.Join("\t", TrainLoss_pool.VectorValue));
					}
				}

				// -- Dump feature --
				if (paramTrain.flag_DumpFeature && (epoch + 1) % nEpochPerDump == 0)
				{
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						DumpingFeature_BP_LDA(TrainData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
						if (paramTrain.flag_HasValidSet)
						{
							DumpingFeature_BP_LDA(ValidData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".valid.fea", "Validation");
						}
					}
					{
						DumpingFeature_BP_LDA(TrainData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
						if (paramTrain.flag_HasValidSet)
						{
							DumpingFeature_BP_LDA(ValidData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".valid.fea", "Validation");
						}
					}
				}


			}


		}
コード例 #12
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		public static float ComputeCrossEntropy(SparseMatrix Xt, DenseMatrix Phi, DenseMatrix[] theta_pool, int[] nHidLayerEffective)
		{
			SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(Xt.nCols);
			Parallel.For(0, Xt.nCols, IdxSample =>
				{
					MatrixOperation.MatrixMultiplyVector(
						TmpSparseMat.SparseColumnVectors[IdxSample], 
						Phi, 
						theta_pool[nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
					);
				});
			MatrixOperation.Log(TmpSparseMat);
			MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpSparseMat, Xt);
			MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpSparseMat);
			return (-1.0f) * TmpDenseRowVec.VectorValue.Sum();
		}
コード例 #13
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		/*
         * Compute Cross Entropy between the reconstructed input and the actual input. (Unsupervised learning case)
         */
		public static float ComputeCrossEntropy(SparseMatrix Xt, DenseMatrix Phi, DenseMatrix theta_top)
		{
			SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(Xt.nCols);
			MatrixOperation.MatrixMultiplyMatrix(TmpSparseMat, Phi, theta_top);
			MatrixOperation.Log(TmpSparseMat);
			MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpSparseMat, Xt);
			MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpSparseMat);
			return (-1.0f) * TmpDenseRowVec.VectorValue.Sum();
		}
コード例 #14
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		public static float SMD_Update(DenseMatrix X, DenseMatrix Grad, DenseRowVector LearningRatePerCol, float eta)
		{
			if (X.nCols != Grad.nCols || X.nRows != Grad.nRows)
			{
				throw new Exception("Dimension mismatch.");
			}
			DenseRowVector nLearnLineSearchPerCol = new DenseRowVector(X.nCols, 0.0f);
			DenseMatrix Update = new DenseMatrix(Grad.nRows, Grad.nCols);
			DenseRowVector TmpRowVec = new DenseRowVector(LearningRatePerCol);
			MatrixOperation.ScalarMultiplyVector(TmpRowVec, -1.0f);
			MatrixOperation.bsxfunVectorMultiplyMatrix(Update, Grad, TmpRowVec);
			MatrixOperation.VerticalMaxMatrix(TmpRowVec, Update);
			MatrixOperation.bsxfunMatrixSubtractVector(Update, Update, TmpRowVec);
			MatrixOperation.Exp(Update);
			MatrixOperation.ElementwiseMatrixMultiplyMatrix(X, X, Update);
			MatrixOperation.VerticalSumMatrix(TmpRowVec, X);
			MatrixOperation.bsxfunMatrixRightDivideVector(X, TmpRowVec);

			return 0.0f;
		}
コード例 #15
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void bsxfunMatrixSubtractVector(SparseMatrix Z, SparseMatrix X, DenseRowVector y)
		{
			if (Z.nCols != X.nCols || Z.nRows != X.nRows || Z.nCols != y.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}

			int total = Z.nCols;
			int process_len = (total + THREADNUM - 1) / THREADNUM;
			Parallel.For(0, THREADNUM, new ParallelOptions{ MaxDegreeOfParallelism = MaxMultiThreadDegree}, thread_idx =>
				{
					for (int t = 0; t < process_len; t++)
					{
						int IdxCol = thread_idx * process_len + t;
						if (IdxCol < total)
						{
							var zVal = Z.SparseColumnVectors[IdxCol].Val;
							var xVal = X.SparseColumnVectors[IdxCol].Val;
							var yVal = y.VectorValue[IdxCol];
							int nNonzero = Z.SparseColumnVectors[IdxCol].nNonzero;
							for (int IdxRow = 0; IdxRow < nNonzero; ++IdxRow)
							{
								zVal[IdxRow] = xVal[IdxRow] - yVal;
							}
						}
					}
				});
		}
コード例 #16
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void VerticalSumMatrix(DenseRowVector z, DenseMatrix X)
		{
			z.FillValue(0.0f);
			int nRows = X.nRows;
			Parallel.For(0, X.nCols, new ParallelOptions { MaxDegreeOfParallelism = MaxMultiThreadDegree }, IdxCol =>
				{
					var zVal = z.VectorValue;
					var xVal = X.DenseMatrixValue[IdxCol].VectorValue;
					for (int IdxRow = 0; IdxRow < nRows; ++IdxRow)
					{
						zVal[IdxCol] += xVal[IdxRow];
					}
				});
		}
コード例 #17
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void VerticalMaxMatrix(DenseRowVector z, SparseMatrix X)
		{
			int zDim = z.Dim;
			var zVal = z.VectorValue;
			var XMat = X.SparseColumnVectors;
			Parallel.For(0, zDim, new ParallelOptions { MaxDegreeOfParallelism = MaxMultiThreadDegree }, IdxCol =>
				{
					zVal[IdxCol] = XMat[IdxCol].Val.Max();
				});
		}
コード例 #18
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void VerticalSumMatrix(DenseRowVector z, SparseMatrix X)
		{
			Array.Clear(z.VectorValue, 0, z.VectorValue.Length);
			Parallel.For(0, X.nCols, new ParallelOptions { MaxDegreeOfParallelism = MaxMultiThreadDegree }, IdxCol =>
				{
					var zVal = z.VectorValue;
					var xVal = X.SparseColumnVectors[IdxCol].Val;
					int nNonzero = X.SparseColumnVectors[IdxCol].nNonzero;
					for (int IdxRow = 0; IdxRow < nNonzero; ++IdxRow)
					{
						zVal[IdxCol] += xVal[IdxRow];
					}
				});
		}
コード例 #19
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * Project each column of the input matrix X onto the affine space defined by 1^T x = 1
         */
		public static void ProjCols2SimplexPlane(DenseMatrix X)
		{
			DenseRowVector TmpDenseRowVec = new DenseRowVector(X.nCols);
			MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, X);
			MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / ((float)X.nRows));
			MatrixOperation.bsxfunMatrixSubtractVector(X, X, TmpDenseRowVec);
			MatrixOperation.ScalarAddMatrix(X, 1.0f / ((float)X.nRows));
		}
コード例 #20
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * X = bsxfun(@times, X, y) or X = X * y, where y is a dense row or column vector
         */
		public static void bsxfunVectorMultiplyMatrix(DenseMatrix X, DenseRowVector y)
		{
			if (X.nCols != y.Dim)
			{
				throw new Exception("The Number of columns in the two inputs does not match!");
			}
			Parallel.For(0, X.nCols, new ParallelOptions { MaxDegreeOfParallelism = MaxMultiThreadDegree }, IdxCol =>
				{
					var xVal = X.DenseMatrixValue[IdxCol].VectorValue;
					var yVal = y.VectorValue[IdxCol];
					int nRows = X.nRows;
					for (int IdxRow = 0; IdxRow < nRows; ++IdxRow)
					{
						xVal[IdxRow] *= yVal;
					}
				});
		}
コード例 #21
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public DenseMatrix(int NumRows, int NumCols, bool IsPerColumn)
		{
			if (IsPerColumn)
			{
				nRows = NumRows;
				nCols = NumCols;
				isPerColumn = true;
				DenseMatrixValue = new DenseColumnVector[nCols];
				for (int IdxCol = 0; IdxCol < nCols; IdxCol++)
				{
					DenseMatrixValue[IdxCol] = new DenseColumnVector(nRows);
				}
			}
			else
			{
				nRows = NumRows;
				nCols = NumCols;
				isPerColumn = false;
				DenseMatrixValuePerRow = new DenseRowVector[nRows];
				for (int IdxRow = 0; IdxRow < nRows; IdxRow++)
				{
					DenseMatrixValuePerRow[IdxRow] = new DenseRowVector(nCols);
				}
			}
		}
コード例 #22
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void bsxfunVectorMultiplyMatrix(SparseMatrix X, DenseRowVector y)
		{
			if (X.nCols != y.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}
			Parallel.For(0, X.nCols, new ParallelOptions { MaxDegreeOfParallelism = MaxMultiThreadDegree }, IdxCol =>
				{
					var xVal = X.SparseColumnVectors[IdxCol].Val;
					var yVal = y.VectorValue[IdxCol];
					var nNonzero = X.SparseColumnVectors[IdxCol].nNonzero;
					for (int IdxRow = 0; IdxRow < nNonzero; ++IdxRow)
					{
						xVal[IdxRow] *= yVal;
					}
				});
		}
コード例 #23
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public void DeepCopyFrom(DenseRowVector SourceVector)
		{
			// Check dimension
			if (Dim != SourceVector.Dim)
			{
				throw new Exception("Dimension mismatch during deep copy of DenseRowVector.");
			}
			// Deep copy of the float array
			Array.Copy(SourceVector.VectorValue,VectorValue,Dim);
		}
コード例 #24
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * Z = bsxfun(@times, X, y) or Z = X * y, where y is a dense row or column vector
         */
		public static void bsxfunVectorMultiplyMatrix(SparseMatrix Z, SparseMatrix X, DenseRowVector y)
		{
			if (Z.nCols != X.nCols || Z.nRows != X.nRows || Z.nCols != y.Dim)
			{
				throw new Exception("Dimension mismatch!");
			}
			int ZnCols = Z.nCols;
			Parallel.For(0, ZnCols, new ParallelOptions { MaxDegreeOfParallelism = MaxMultiThreadDegree }, IdxCol =>
				{
					int nNz = Z.SparseColumnVectors[IdxCol].nNonzero;
					var ZVal = Z.SparseColumnVectors[IdxCol].Val;
					var XVal = X.SparseColumnVectors[IdxCol].Val;
					var yVal = y.VectorValue;
					for (int IdxRow = 0; IdxRow < nNz; ++IdxRow)
					{
						ZVal[IdxRow] = XVal[IdxRow] * yVal[IdxCol];
					}
				});
		}
コード例 #25
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static DenseRowVector ElementwiseVectorMultiplyVector(DenseRowVector x, DenseRowVector y)
		{
			if (x.Dim != y.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}
			DenseRowVector z = new DenseRowVector(x.Dim);
			for (int IdxCol = 0; IdxCol < z.Dim; IdxCol++ )
			{
				z.VectorValue[IdxCol] = x.VectorValue[IdxCol] * y.VectorValue[IdxCol];
			}
			return z;
		}
コード例 #26
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void bsxfunVectorMultiplyMatrix(DenseMatrix Z, DenseMatrix X, DenseRowVector y)
		{
			if (X.nCols != y.Dim || Z.nCols != X.nCols || Z.nRows != X.nRows)
			{
				throw new Exception("The Number of columns in the two inputs does not match!");
			}

			int total = Z.nCols * Z.nRows;
			int process_len = (total + THREADNUM - 1) / THREADNUM;
			Parallel.For(0, THREADNUM, new ParallelOptions{ MaxDegreeOfParallelism = MaxMultiThreadDegree}, thread_idx =>
				{
					for (int t = 0; t < process_len; t++)
					{
						int id = thread_idx * process_len + t;
						if (id < total)
						{
							int IdxCol = id / Z.nRows;
							int IdxRow = id % Z.nRows;
							Z.DenseMatrixValue[IdxCol].VectorValue[IdxRow] = X.DenseMatrixValue[IdxCol].VectorValue[IdxRow] * y.VectorValue[IdxCol];
						}
						else
						{
							break;
						}
					}
				});
		}
コード例 #27
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * z = x * y, where y is a scalar
         */
		public static void ScalarMultiplyVector(DenseRowVector z, DenseRowVector x, float y)
		{
			var zVal = z.VectorValue;
			var xVal = x.VectorValue;
			int Dim = z.Dim;
			for (int IdxCol = 0; IdxCol < Dim; ++IdxCol)
			{
				zVal[IdxCol] = xVal[IdxCol] * y;
			}
		}
コード例 #28
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		/*
         * Z = bsxfun(@minus, X, y)
         */
		public static void bsxfunMatrixSubtractVector(DenseMatrix Z, DenseMatrix X, DenseRowVector y)
		{
			if (Z.nCols != X.nCols || Z.nRows != X.nRows || Z.nCols != y.Dim)
			{
				throw new Exception("Dimension mismatch.");
			}

			int total = Z.nCols * Z.nRows;
			int process_len = (total + THREADNUM - 1) / THREADNUM;
			Parallel.For(0, THREADNUM, new ParallelOptions{ MaxDegreeOfParallelism = MaxMultiThreadDegree}, thread_idx =>
				{
					for (int t = 0; t < process_len; t++)
					{
						int id = thread_idx * process_len + t;
						if (id < total)
						{
							int IdxCol = id / Z.nRows;
							int IdxRow = id % Z.nRows;
							Z.DenseMatrixValue[IdxCol].VectorValue[IdxRow] = X.DenseMatrixValue[IdxCol].VectorValue[IdxRow] - y.VectorValue[IdxCol];
						}
						else
							break;
					}
				});
		}
コード例 #29
0
ファイル: LinearAlgebra.cs プロジェクト: jvking/bp-lda
		public static void ScalarAddVector(DenseRowVector z, float y)
		{
			var zVal = z.VectorValue;
			for (int IdxCol = 0; IdxCol < z.Dim; IdxCol++)
			{
				zVal[IdxCol] += y;
			}
		}
コード例 #30
0
ファイル: LDA_Learn.cs プロジェクト: jvking/bp-lda
		/*
         * Forward activation of Latent Dirichlet Allocation model (Mirror descent approach)
         */
		public static void ForwardActivation_LDA(SparseMatrix Xt, DNNRun_t DNNRun, paramModel_t paramModel, bool flag_IsTraining)
		{
			// -------- Extract parameters --------
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			float eta = paramModel.eta;
			float T_value = paramModel.T_value;
			string OutputType = paramModel.OutputType;
			float To = paramModel.To;
			int BatchSize = Xt.nCols;

			// -------- Hidden activations --------
			// ---- Reset the effective number of hidden layers (mainly for alpha<1 case) ----
			Array.Clear(DNNRun.nHidLayerEffective,0,DNNRun.nHidLayerEffective.Length);
			// ---- T is different over layers (adaptive step-size MDA) ----
			DenseRowVector T = new DenseRowVector(BatchSize, T_value);
			SparseMatrix Phitheta = new SparseMatrix(Xt);
			DenseRowVector loss_pre = new DenseRowVector(BatchSize);
			DenseRowVector loss_post = new DenseRowVector(BatchSize);
			DenseRowVector loss_gap = new DenseRowVector(BatchSize);
			DenseRowVector loss_gap_thresh = new DenseRowVector(BatchSize);
			DenseRowVector gradproj = new DenseRowVector(BatchSize);
			SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
			DenseMatrix TmpDenseMat = new DenseMatrix(nHid, BatchSize);
			DenseMatrix LogTheta = new DenseMatrix(nHid, BatchSize);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(BatchSize);
			DenseMatrix NegGrad = new DenseMatrix(nHid, BatchSize);
			DenseMatrix LLR = new DenseMatrix(nHid, BatchSize);            
			//for (int IdxSample = 0; IdxSample < BatchSize; IdxSample++)
			Parallel.For(0, BatchSize, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxSample =>
				{
					float KLDivergence = 0.0f;
					// The forward activation for each data sample
					for (int IdxLayer = 0; IdxLayer < nHidLayer; IdxLayer++)
					{
						// Compute the loss before unfolding the current layer
						if (IdxLayer == 0)
						{
							MatrixOperation.MatrixMultiplyVector(
								Phitheta.SparseColumnVectors[IdxSample], 
								paramModel.Phi, 
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.MatrixMultiplyVector(
								Phitheta.SparseColumnVectors[IdxSample], 
								paramModel.Phi, 
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						if (IdxLayer > 1)
						{
							loss_pre.VectorValue[IdxSample] = loss_post.VectorValue[IdxSample];
						}
						else
						{
							MatrixOperation.ScalarAddVector(TmpSparseMat.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample], 1e-12f);
							MatrixOperation.Log(TmpSparseMat.SparseColumnVectors[IdxSample]);
							MatrixOperation.ElementwiseVectorMultiplyVector(TmpSparseMat.SparseColumnVectors[IdxSample], Xt.SparseColumnVectors[IdxSample]);
							loss_pre.VectorValue[IdxSample] = (-1.0f)*TmpSparseMat.SparseColumnVectors[IdxSample].Sum();
							if (IdxLayer == 0)
							{
								MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta0.DenseMatrixValue[IdxSample], 1e-12f);
							}
							else
							{
								MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample], 1e-12f);
							}
							MatrixOperation.Log(TmpDenseMat.DenseMatrixValue[IdxSample]);
							MatrixOperation.ElementwiseVectorMultiplyVector(TmpDenseMat.DenseMatrixValue[IdxSample], paramModel.b);
							TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
							loss_pre.VectorValue[IdxSample] -= TmpDenseRowVec.VectorValue[IdxSample];
						}
						// Compute the hidden activation of the current layer
						MatrixOperation.ScalarAddVector(TmpSparseMat.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample], 1e-12f);
						MatrixOperation.ElementwiseVectorDivideVector(
							TmpSparseMat.SparseColumnVectors[IdxSample], 
							Xt.SparseColumnVectors[IdxSample], 
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.MatrixTransposeMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample], 
							paramModel.Phi, 
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						if (IdxLayer == 0)
						{
							MatrixOperation.ScalarAddVector(
								NegGrad.DenseMatrixValue[IdxSample], 
								DNNRun.theta0.DenseMatrixValue[IdxSample], 
								1e-12f
							);
						}
						else
						{
							MatrixOperation.ScalarAddVector(
								NegGrad.DenseMatrixValue[IdxSample], 
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample], 
								1e-12f
							);
						}
						MatrixOperation.ElementwiseVectorDivideVector(NegGrad.DenseMatrixValue[IdxSample], paramModel.b, NegGrad.DenseMatrixValue[IdxSample]);
						MatrixOperation.VectorAddVector(NegGrad.DenseMatrixValue[IdxSample], TmpDenseMat.DenseMatrixValue[IdxSample]);
						// Line search for the parameter T
						if (paramModel.alpha >= 1)
						{
							T.VectorValue[IdxSample] *= (1.0f / eta);
						} // only perform line search for alpha>=1 case (convex)
						loss_post.VectorValue[IdxSample] = loss_pre.VectorValue[IdxSample];
						if (IdxLayer == 0)
						{
							MatrixOperation.Log(LogTheta.DenseMatrixValue[IdxSample], DNNRun.theta0.DenseMatrixValue[IdxSample]);
						}
						else
						{
							MatrixOperation.Log(LogTheta.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]);
						}
						while (true)
						{
							MatrixOperation.ScalarMultiplyVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								NegGrad.DenseMatrixValue[IdxSample], T.VectorValue[IdxSample]);
							MatrixOperation.VectorAddVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								LogTheta.DenseMatrixValue[IdxSample]);
							MatrixOperation.ScalarAddVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								(-1.0f) * DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample].MaxValue());
							MatrixOperation.Exp(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]);
							MatrixOperation.ScalarMultiplyVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								(1.0f / DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample].Sum()));
							// Compute the loss after undfolding the current layer
							MatrixOperation.MatrixMultiplyVector(Phitheta.SparseColumnVectors[IdxSample],
								paramModel.Phi, DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]);
							MatrixOperation.Log(Phitheta.SparseColumnVectors[IdxSample]);
							loss_post.VectorValue[IdxSample]
							= (-1.0f) * MatrixOperation.InnerProduct(Xt.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample]);
							MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample], 1e-12f);
							MatrixOperation.Log(TmpDenseMat.DenseMatrixValue[IdxSample]);
							loss_post.VectorValue[IdxSample] -= MatrixOperation.InnerProduct(TmpDenseMat.DenseMatrixValue[IdxSample], paramModel.b);
							if (IdxLayer == 0)
							{
								MatrixOperation.VectorSubtractVector(TmpDenseMat.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta0.DenseMatrixValue[IdxSample]);
							}
							else
							{
								MatrixOperation.VectorSubtractVector(TmpDenseMat.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]);
							}
							loss_gap.VectorValue[IdxSample] = loss_post.VectorValue[IdxSample] - loss_pre.VectorValue[IdxSample];
							gradproj.VectorValue[IdxSample]
							= (-1.0f) * MatrixOperation.InnerProduct(NegGrad.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample]);
							loss_gap_thresh.VectorValue[IdxSample] = gradproj.VectorValue[IdxSample]
								+ (0.5f / T.VectorValue[IdxSample]) * (float)Math.Pow((double)TmpDenseMat.DenseMatrixValue[IdxSample].L1Norm(), 2.0);
							if (loss_gap.VectorValue[IdxSample] > loss_gap_thresh.VectorValue[IdxSample] + 1e-12 && paramModel.alpha>=1)
							{
								T.VectorValue[IdxSample] *= eta;
							} // Only perform line search for alpha>=1 case (convex)
							else
							{
								DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample] = T.VectorValue[IdxSample];
								break;
							}
						}
						// Count the effective number of hidden layers
						++DNNRun.nHidLayerEffective[IdxSample];
						// stop MDA if termination condition holds
						if (paramModel.flag_AdaptivenHidLayer)
						{
							if (IdxLayer == 0)
							{
								MatrixOperation.ElementwiseVectorDivideVector(
									LLR.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta0.DenseMatrixValue[IdxSample]
								);
								MatrixOperation.Log(LLR.DenseMatrixValue[IdxSample]);
							}
							else
							{
								MatrixOperation.ElementwiseVectorDivideVector(
									LLR.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
								);
								MatrixOperation.Log(LLR.DenseMatrixValue[IdxSample]);
								MatrixOperation.ResetVectorSparsePattern(
									LLR.DenseMatrixValue[IdxSample], 
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]
								);
							}
							KLDivergence = MatrixOperation.InnerProduct(
								LLR.DenseMatrixValue[IdxSample], 
								DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]
							);
							if (KLDivergence < 1e-12f)
							{
								break;
							}
						}
					}
					// ---- Generate output ----
					switch (OutputType)
					{
					case "softmaxCE":
						MatrixOperation.MatrixMultiplyVector(
							DNNRun.y.DenseMatrixValue[IdxSample],
							paramModel.U,
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ScalarAddVector(DNNRun.y.DenseMatrixValue[IdxSample], To);
						TmpDenseRowVec.VectorValue[IdxSample] = DNNRun.y.DenseMatrixValue[IdxSample].MaxValue();
						MatrixOperation.ScalarAddVector(DNNRun.y.DenseMatrixValue[IdxSample], (-1.0f) * TmpDenseRowVec.VectorValue[IdxSample]);
						MatrixOperation.Exp(DNNRun.y.DenseMatrixValue[IdxSample]);
						TmpDenseRowVec.VectorValue[IdxSample] = DNNRun.y.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarMultiplyVector(DNNRun.y.DenseMatrixValue[IdxSample], (1.0f) / TmpDenseRowVec.VectorValue[IdxSample]);
						break;
					case "unsupLDA":
						// Will not compute the reconstructed input at forward activation to save time during training.
						break;
					case "linearQuad":
						MatrixOperation.MatrixMultiplyVector(
							DNNRun.y.DenseMatrixValue[IdxSample],
							paramModel.U,
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						break;
					case "linearCE":
						throw new Exception("linearCE not implemented.");
					default:
						throw new Exception("Unknown OutputType.");
					}
				});            
		}