Пример #1
0
		/*
         * Setup the default parameters
         */
		public static void SetupDefaultParams(paramModel_t paramModel, paramTrain_t paramTrain)
		{
			// ---- Model parameters ----
			paramModel.nHid = 5;
			paramModel.nHidLayer = 10;
			paramModel.To = 1;
			paramModel.eta = 0.5f;
			paramModel.alpha = 1.001f;
			paramModel.beta = 1.0001f;
			paramModel.OutputType = "linearQuad";
			paramModel.nInput = 5000;
			// ---- Training parameters ----
			paramTrain.nEpoch = 100;
			paramTrain.BatchSize = 1000;
			paramTrain.BatchSize_Test = 10000;
			paramTrain.mu_Phi = 0.01f;
			paramTrain.mu_Phi_ReduceFactor = 10.0f;
			paramTrain.mu_U = 1.0f;
			paramTrain.LearnRateSchedule = "Constant";
			paramTrain.nSamplesPerDisplay = 1000;
			paramTrain.nEpochPerSave = 1;
			paramTrain.nEpochPerTest = 1;
			paramTrain.flag_DumpFeature = false;
			paramTrain.nEpochPerDump = 5;
			paramTrain.flag_BachSizeSchedule = false;
			paramTrain.ThreadNum = 32;
			paramTrain.MaxMultiThreadDegree = 32;
			paramTrain.flag_ExternalEval = false;
			paramTrain.flag_SaveAllModels = false;
			paramTrain.flag_HasValidSet = false;
			paramTrain.flag_RunningAvg = true;
			paramTrain.DebugLevel = DebugLevel_t.high;
		}
Пример #2
0
 /*
  * Setup the default parameters
  */
 public static void SetupDefaultParams(paramModel_t paramModel, paramTrain_t paramTrain)
 {
     // ---- Model parameters ----
     paramModel.nHid       = 5;
     paramModel.nHidLayer  = 10;
     paramModel.To         = 1;
     paramModel.eta        = 0.5f;
     paramModel.alpha      = 1.001f;
     paramModel.beta       = 1.0001f;
     paramModel.OutputType = "unsupLDA";
     paramModel.nInput     = 5000;
     // ---- Training parameters ----
     paramTrain.nEpoch              = 100;
     paramTrain.BatchSize           = 1000;
     paramTrain.BatchSize_Test      = 10000;
     paramTrain.mu_Phi              = 0.01f;
     paramTrain.mu_Phi_ReduceFactor = 10.0f;
     paramTrain.mu_U = 1.0f;
     paramTrain.LearnRateSchedule     = "Constant";
     paramTrain.nSamplesPerDisplay    = 10000;
     paramTrain.nEpochPerSave         = 1;
     paramTrain.nEpochPerTest         = 1;
     paramTrain.flag_DumpFeature      = false;
     paramTrain.nEpochPerDump         = 5;
     paramTrain.flag_BachSizeSchedule = false;
     paramTrain.ThreadNum             = 32;
     paramTrain.MaxMultiThreadDegree  = 32;
     paramTrain.DebugLevel            = DebugLevel_t.high;
     paramTrain.flag_RunningAvg       = true;
 }
Пример #3
0
		static void Main(string[] args)
		{
			// ======== Setup the default parameters ========
			paramModel_t paramModel = new paramModel_t();
			paramTrain_t paramTrain = new paramTrain_t();
			SetupDefaultParams(paramModel, paramTrain);
			// ---- Data Files ----
			string ModelFile = "";
			string ResultFile = "";

			// ======== Parse the input parameters ========
			if (
				!ParseArgument(
					args,
					paramModel,
					paramTrain,
					ref ModelFile,
					ref ResultFile
				)
			)
			{
				return;
			}
			paramModel.T = new float[paramModel.nHidLayer];
			for (int IdxLayer = 0; IdxLayer < paramModel.nHidLayer; IdxLayer++)
			{
				paramModel.T[IdxLayer] = paramModel.T_value;
			}

			// ======== Set the number of threads ========
			MatrixOperation.THREADNUM = paramTrain.ThreadNum;
			MatrixOperation.MaxMultiThreadDegree = paramTrain.MaxMultiThreadDegree;

			// ======== Load data from file ========
			SparseMatrix TrainData = DataLoader.InputDataLoader(paramTrain.TrainInputFile, paramModel.nInput);
			SparseMatrix TrainLabel = DataLoader.LabelDataLoader(paramTrain.TrainLabelFile, paramModel.nOutput, paramModel.OutputType);
			SparseMatrix TestData = DataLoader.InputDataLoader(paramTrain.TestInputFile, paramModel.nInput);
			SparseMatrix TestLabel = DataLoader.LabelDataLoader(paramTrain.TestLabelFile, paramModel.nOutput, paramModel.OutputType);
			SparseMatrix ValidData = null;
			SparseMatrix ValidLabel = null;
			if (paramTrain.flag_HasValidSet)
			{
				ValidData = DataLoader.InputDataLoader(paramTrain.ValidInputFile, paramModel.nInput);
				ValidLabel = DataLoader.LabelDataLoader(paramTrain.ValidLabelFile, paramModel.nOutput, paramModel.OutputType);
			}
			paramTrain.nTrain = TrainData.nCols;
			paramTrain.nTest = TestData.nCols;
			if (paramTrain.flag_HasValidSet)
			{
				paramTrain.nValid = ValidData.nCols;
			}

			// ======== Supervised learning of BP-sLDA model: mirror-descent back-propagation
			// (i) Inference: Feedforward network via MDA unfolding
			// (ii) Learning: Projected (mini-batch) stochastic gradient descent (P-SGD) using back propagation
			LDA_Learn.TrainingBP_sLDA(TrainData, TrainLabel, TestData, TestLabel, ValidData, ValidLabel, paramModel, paramTrain, ModelFile, ResultFile);

		}
Пример #4
0
        static void Main(string[] args)
        {
            // ======== Setup the default parameters ========
            paramModel_t paramModel = new paramModel_t();
            paramTrain_t paramTrain = new paramTrain_t();

            SetupDefaultParams(paramModel, paramTrain);
            // ---- Data Files ----
            string TrainInputFile = "";
            string TestInputFile  = "";
            string ModelFile      = "";
            string ResultFile     = "";

            // ======== Parse the input parameters ========
            if (
                !ParseArgument(
                    args,
                    paramModel,
                    paramTrain,
                    ref TrainInputFile,
                    ref TestInputFile,
                    ref ModelFile,
                    ref ResultFile
                    )
                )
            {
                return;
            }
            paramModel.T = new float[paramModel.nHidLayer];
            for (int IdxLayer = 0; IdxLayer < paramModel.nHidLayer; IdxLayer++)
            {
                paramModel.T[IdxLayer] = paramModel.T_value;
            }

            // ======== Set the number of threads ========
            MatrixOperation.THREADNUM            = paramTrain.ThreadNum;
            MatrixOperation.MaxMultiThreadDegree = paramTrain.MaxMultiThreadDegree;

            // ======== Load data from file ========
            SparseMatrix TrainData = DataLoader.InputDataLoader(TrainInputFile, paramModel.nInput);
            SparseMatrix TestData  = DataLoader.InputDataLoader(TestInputFile, paramModel.nInput);

            paramTrain.nTrain = TrainData.nCols;
            paramTrain.nTest  = TestData.nCols;

            // ======== Unsupervised learning of LDA model: unfolding and back-propagation
            // (i) Inference: Feedforward network via MDA unfolding
            // (ii) Learning: Projected (mini-batch) stochastic gradient descent (P-SGD) using back propagation
            LDA_Learn.TrainingBP_LDA(TrainData, TestData, paramModel, paramTrain, ModelFile, ResultFile);
        }
Пример #5
0
		/*
         * Parse the input arguments
         */
		public static bool ParseArgument(
			string[] args,
			paramModel_t paramModel,
			paramTrain_t paramTrain,
			ref string ModelFile,
			ref string ResultFile
		)
		{
			string ArgKey;
			string ArgValue;
			for (int IdxArg = 0; IdxArg < args.Length - 1; IdxArg += 2)
			{
				ArgKey = args[IdxArg];
				ArgValue = args[IdxArg + 1];
				switch (ArgKey)
				{
				case "--nHid":
					paramModel.nHid = int.Parse(ArgValue);
					break;
				case "--nHidLayer":
					paramModel.nHidLayer = int.Parse(ArgValue);
					break;
				case "--To":
					paramModel.To = float.Parse(ArgValue);
					break;
				case "--alpha":
					paramModel.alpha = float.Parse(ArgValue);
					break;
				case "--beta":
					paramModel.beta = float.Parse(ArgValue);
					break;
				case "--nEpoch":
					paramTrain.nEpoch = int.Parse(ArgValue);
					break;
				case "--BatchSize":
					paramTrain.BatchSize = int.Parse(ArgValue);
					break;
				case "--BatchSize_Test":
					paramTrain.BatchSize_Test = int.Parse(ArgValue);
					break;
				case "--mu_Phi":
					paramTrain.mu_Phi = float.Parse(ArgValue);
					break;
				case "--mu_U":
					paramTrain.mu_U = float.Parse(ArgValue);
					break;
				case "--nSamplesPerDisplay":
					paramTrain.nSamplesPerDisplay = int.Parse(ArgValue);
					break;
				case "--nEpochPerSave":
					paramTrain.nEpochPerSave = int.Parse(ArgValue);
					break;
				case "--nEpochPerTest":
					paramTrain.nEpochPerTest = int.Parse(ArgValue);
					break;
				case "--TrainInputFile":
					paramTrain.TrainInputFile = ArgValue;
					break;
				case "--TestInputFile":
					paramTrain.TestInputFile = ArgValue;
					break;
				case "--TrainLabelFile":
					paramTrain.TrainLabelFile = ArgValue;
					break;
				case "--TestLabelFile":
					paramTrain.TestLabelFile = ArgValue;
					break;
				case "--ResultFile":
					ResultFile = ArgValue;
					break;
				case "--nInput":
					paramModel.nInput = int.Parse(ArgValue);
					break;
				case "--nOutput":
					paramModel.nOutput = int.Parse(ArgValue);
					break;
				case "--OutputType":
					paramModel.OutputType = ArgValue;
					if (paramModel.OutputType != "softmaxCE" && paramModel.OutputType != "linearQuad" && paramModel.OutputType != "linearCE")
					{
						throw new Exception("Unknown OutputType for supervised learning. Only softmaxCE/linearQuad/linearCE is supported.");
					}
					break;
				case "--LearnRateSchedule":
					paramTrain.LearnRateSchedule = ArgValue;
					break;
				case "--flag_DumpFeature":
					paramTrain.flag_DumpFeature = bool.Parse(ArgValue);
					break;
				case "--nEpochPerDump":
					paramTrain.nEpochPerDump = int.Parse(ArgValue);
					break;
				case "--BatchSizeSchedule":
					paramTrain.flag_BachSizeSchedule = true;
					paramTrain.BachSizeSchedule = new Dictionary<int, int>();
					string[] StrBatSched = ArgValue.Split(',');
					for (int Idx = 0; Idx < StrBatSched.Length; Idx++)
					{
						string[] KeyValPair = StrBatSched[Idx].Split(':');
						paramTrain.BachSizeSchedule.Add(int.Parse(KeyValPair[0]), int.Parse(KeyValPair[1]));
					}
					break;
				case "--ThreadNum":
					paramTrain.ThreadNum = int.Parse(ArgValue);
					break;
				case "--MaxThreadDeg":
					paramTrain.MaxMultiThreadDegree = int.Parse(ArgValue);
					break;
				case "--ExternalEval":
					paramTrain.flag_ExternalEval = true;
					paramTrain.ExternalEval = ArgValue;
					break;
				case "--flag_SaveAllModels":
					paramTrain.flag_SaveAllModels = bool.Parse(ArgValue);
					break;
				case "--ValidLabelFile":
					paramTrain.ValidLabelFile = ArgValue;
					paramTrain.flag_HasValidSet = true;
					break;
				case "--ValidInputFile":
					paramTrain.ValidInputFile = ArgValue;
					paramTrain.flag_HasValidSet = true;
					break;
				case "--T_value":
					paramModel.T_value = float.Parse(ArgValue);
					break;
				case "--eta":
					paramModel.eta = float.Parse(ArgValue);
					break;
				case "--DebugLevel":
					paramTrain.DebugLevel = (DebugLevel_t)Enum.Parse(typeof(DebugLevel_t), ArgValue, true);
					break;
				case "--flag_AdaptivenHidLayer":
					paramModel.flag_AdaptivenHidLayer = bool.Parse(ArgValue);
					break;
				case "--flag_RunningAvg":
					paramTrain.flag_RunningAvg = bool.Parse(ArgValue);
					break;
				default:
					Console.WriteLine("Unknown ArgKey: {0}", ArgKey);
					Program.DispHelp();
					return false;
				}
			}

			if (paramModel.alpha >= 1.0f) {
				paramModel.T_value = 1.0f;
				paramModel.flag_AdaptivenHidLayer = false;
			} else if (paramModel.alpha < 1.0f && paramModel.alpha > 0.0f) {
				paramModel.T_value = 0.01f;
				paramModel.flag_AdaptivenHidLayer = true;
			} else {
				throw new Exception ("Invalid alpha.");
			}

			if (String.IsNullOrEmpty(paramTrain.TrainInputFile) || String.IsNullOrEmpty(paramTrain.TestInputFile)
				|| String.IsNullOrEmpty(paramTrain.TrainLabelFile) || String.IsNullOrEmpty(paramTrain.TestLabelFile))
			{
				Console.WriteLine("Empty TrainInputFile, TestInputFile, TrainLabelFile, or TestLabelFile!");
				return false;
			}
			return true;
		}
Пример #6
0
		/*
         * Training: unsupervised learning of feedforward (unfolding) LDA by back propagation
         */
		public static void TrainingBP_LDA(
			SparseMatrix TrainData,
			SparseMatrix TestData,
			paramModel_t paramModel,
			paramTrain_t paramTrain,
			string ModelFile,
			string ResultFile
		)
		{
			// ---- Extract the parameters ----
			// Model parameters
			int nInput = paramModel.nInput;
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			int nOutput = paramModel.nOutput;
			float eta = paramModel.eta;
			float T_value = paramModel.T_value;
			string OutputType = paramModel.OutputType;
			float beta = paramModel.beta;
			// Training parameters
			int nEpoch = paramTrain.nEpoch;
			float mu_Phi = paramTrain.mu_Phi;
			float mu_U = paramTrain.mu_U;
			int nTrain = paramTrain.nTrain;
			float mu_Phi_ReduceFactor = paramTrain.mu_Phi_ReduceFactor;
			string LearnRateSchedule = paramTrain.LearnRateSchedule;
			int nSamplesPerDisplay = paramTrain.nSamplesPerDisplay;
			int nEpochPerSave = paramTrain.nEpochPerSave;
			int nEpochPerTest = paramTrain.nEpochPerTest;
			int nEpochPerDump = paramTrain.nEpochPerDump;

			// ---- Initialize the model ----
			ModelInit_LDA_Feedforward(paramModel);

			// ---- Initialize the training algorithm ----
			Console.WriteLine("#################################################################");
			Console.WriteLine("jvking version of  BP-LDA: Mirror-Descent Back Propagation");
			Console.WriteLine("#################################################################");
			float TotLoss = 0.0f;
			float TotCE = 0.0f;
			double TotTime = 0.0f;
			double TotTimeThisEpoch = 0.0f;
			int TotSamples = 0;
			int TotSamplesThisEpoch = 0;
			double AvgnHidLayerEffective = 0.0;
			int CntRunningAvg = 0;
			int CntModelUpdate = 0;
			DenseRowVector mu_phi_search = new DenseRowVector(nHid, mu_Phi);
			DenseRowVector TestLoss_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestLoss_epoch = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestLoss_time = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			int CountTest = 0;
			DenseRowVector G_Phi_pool = new DenseRowVector(paramModel.nHidLayer);
			DenseRowVector G_Phi_trunc_pool = new DenseRowVector(paramModel.nHidLayer, 0.0f);
			DenseRowVector AdaGradSum = new DenseRowVector(nHid, 0.0f);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(nHid, 0.0f);
			int[] SparsePatternGradPhi = null;
			float nLearnLineSearch = 0.0f;
			int[] IdxPerm = null;
			int BatchSize_NormalBatch = paramTrain.BatchSize;
			int BatchSize_tmp = paramTrain.BatchSize;
			int nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
			DNNRun_t DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun = null;
			Grad_t Grad = new Grad_t(nHid, nOutput, nInput, paramModel.nHidLayer, OutputType);
			DenseMatrix TmpGradDense = new DenseMatrix(nInput, nHid);
			DenseMatrix TmpMatDensePhi = new DenseMatrix(nInput, nHid);
			paramModel_t paramModel_avg = new paramModel_t(paramModel);
			Stopwatch stopWatch = new Stopwatch();
			// ---- Compute the schedule of the learning rate
			double[] stepsize_pool = null;
			switch (LearnRateSchedule)
			{
			case "PreCompute":
				stepsize_pool = PrecomputeLearningRateSchedule(nBatch, nEpoch, mu_Phi, mu_Phi / mu_Phi_ReduceFactor, 1e-8f);
				break;
			case "Constant":
				stepsize_pool = new double[nEpoch];
				for (int Idx = 0; Idx < nEpoch; Idx++)
				{
					stepsize_pool[Idx] = mu_Phi;
				}
				break;
			default:
				throw new Exception("Unknown type of LearnRateSchedule");
			}
			// Now start training.........................
			for (int epoch = 0; epoch < nEpoch; epoch++)
			{
				TotSamplesThisEpoch = 0;
				TotTimeThisEpoch = 0.0;
				AvgnHidLayerEffective = 0.0;
				// -- Set the batch size if there is schedule --
				if (paramTrain.flag_BachSizeSchedule)
				{
					if (paramTrain.BachSizeSchedule.TryGetValue(epoch + 1, out BatchSize_tmp))
					{
						BatchSize_NormalBatch = BatchSize_tmp;
						nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
						DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
						DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
					}
				}

				// -- Shuffle the data (generating shuffled index) --
				IdxPerm = Statistics.RandPerm(nTrain);
				// -- Reset the (MDA) inference step-sizes --
				if (epoch > 0)
				{
					for (int Idx = 0; Idx < paramModel.nHidLayer; Idx++)
					{
						paramModel.T[Idx] = T_value;
					}
				}
				// -- Take the learning rate for the current epoch --
				mu_Phi = (float)stepsize_pool[epoch];
				// -- Start this epoch --
				Console.WriteLine("############## Epoch #{0}. BatchSize: {1} Learning Rate: {2} ##################", epoch + 1, BatchSize_NormalBatch, mu_Phi);
				for (int IdxBatch = 0; IdxBatch < nBatch; IdxBatch++)
				{
					stopWatch.Start();
					// Extract the batch
					int BatchSize = 0;
					if (IdxBatch < nBatch - 1)
					{
						BatchSize = BatchSize_NormalBatch;
						DNNRun = DNNRun_NormalBatch;
					}
					else
					{
						BatchSize = nTrain - IdxBatch * BatchSize_NormalBatch;
						DNNRun = DNNRun_EndBatch;
					}
					SparseMatrix Xt = new SparseMatrix(nInput, BatchSize);
					SparseMatrix Dt = null;
					int[] IdxSample = new int[BatchSize];
					Array.Copy(IdxPerm, IdxBatch * BatchSize_NormalBatch, IdxSample, 0, BatchSize);
					TrainData.GetColumns(Xt, IdxSample);

					// Set the sparse pattern for the gradient
					SparsePatternGradPhi = Xt.GetHorizontalUnionSparsePattern();
					Grad.SetSparsePatternForAllGradPhi(SparsePatternGradPhi);

					// Forward activation
					LDA_Learn.ForwardActivation_LDA(Xt, DNNRun, paramModel, true);

					// Back propagation
					LDA_Learn.BackPropagation_LDA(Xt, Dt, DNNRun, paramModel, Grad);

					// Compute the gradient and update the model (All gradients of Phi are accumulated into Grad.grad_Q_Phi)
					MatrixOperation.ScalarDivideMatrix(Grad.grad_Q_Phi, (-1.0f) * ((beta - 1) / ((float)nTrain)), paramModel.Phi, true);
					MatrixOperation.MatrixAddMatrix(Grad.grad_Q_Phi, Grad.grad_Q_TopPhi);                  
					mu_phi_search.FillValue(mu_Phi);
					// Different learning rate for different columns of Phi: Similar to AdaGrad but does not decay with time
					++CntModelUpdate;
					MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpMatDensePhi, Grad.grad_Q_Phi, Grad.grad_Q_Phi);
					MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpMatDensePhi);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / ((float)nInput));
					MatrixOperation.VectorSubtractVector(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / CntModelUpdate);
					MatrixOperation.VectorAddVector(AdaGradSum, TmpDenseRowVec);
					MatrixOperation.ElementwiseSquareRoot(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarAddVector(TmpDenseRowVec, mu_Phi);
					MatrixOperation.ElementwiseVectorDivideVector(mu_phi_search, mu_phi_search, TmpDenseRowVec);
					nLearnLineSearch = SMD_Update(paramModel.Phi, Grad.grad_Q_Phi, mu_phi_search, eta);
					// Running average of the model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						++CntRunningAvg;
						MatrixOperation.MatrixSubtractMatrix(TmpMatDensePhi, paramModel.Phi, paramModel_avg.Phi);
						MatrixOperation.ScalarMultiplyMatrix(TmpMatDensePhi, 1.0f / CntRunningAvg);
						MatrixOperation.MatrixAddMatrix(paramModel_avg.Phi, TmpMatDensePhi);
					}


					// Display the result
					TotCE += ComputeCrossEntropy(Xt, paramModel.Phi,DNNRun.theta_pool, DNNRun.nHidLayerEffective);
					TotLoss = TotCE;
					TotSamples += BatchSize;
					TotSamplesThisEpoch += BatchSize;
					AvgnHidLayerEffective = (((float)(TotSamplesThisEpoch-BatchSize))/((float)TotSamplesThisEpoch))*AvgnHidLayerEffective
						+ (1.0/((float)TotSamplesThisEpoch))*( DNNRun.nHidLayerEffective.Sum());
					stopWatch.Stop();
					TimeSpan ts = stopWatch.Elapsed;
					TotTime += ts.TotalSeconds;
					TotTimeThisEpoch += ts.TotalSeconds;
					stopWatch.Reset();
					if (TotSamplesThisEpoch % nSamplesPerDisplay == 0)
					{
						// Display results
						Console.WriteLine(
							"* Ep#{0}/{1} Bat#{2}/{3}. Loss={4:F3}. CE={5:F3}.  Speed={6} Samples/Sec.",
							epoch + 1, nEpoch,
							IdxBatch + 1, nBatch,
							TotLoss / TotSamples, TotCE / TotSamples,
							(int)((double)TotSamplesThisEpoch / TotTimeThisEpoch)
						);
						if (paramTrain.DebugLevel == DebugLevel_t.medium)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine();
						}
						if (paramTrain.DebugLevel == DebugLevel_t.high)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine(
								"  AvgnHidLayerEff={0:F1}. G_Phi={1:F3}.",
								AvgnHidLayerEffective,
								Grad.grad_Q_Phi.MaxAbsValue()
							);
							Console.WriteLine();
						}


					}
				}
				// -- Test --
				if ((epoch + 1) % nEpochPerTest == 0)
				{
					TestLoss_epoch.VectorValue[(epoch + 1) / nEpochPerTest - 1] = epoch + 1;
					TestLoss_time.VectorValue[(epoch + 1) / nEpochPerTest - 1] = (float)TotTime;
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						TestLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1] = Testing_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test);
					}
					else
					{
						TestLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1] = Testing_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test);
					}
					CountTest++;
				}

				// -- Save --
				if ((epoch + 1) % nEpochPerSave == 0)
				{
					// Save model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						string PhiCol = null;
						(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
						StreamWriter FileSaveModel = new StreamWriter(ResultFile + ".model.Phi", false);
						for (int IdxCol = 0; IdxCol < paramModel_avg.Phi.nCols; IdxCol++)
						{
							PhiCol = String.Join("\t", paramModel_avg.Phi.DenseMatrixValue[IdxCol].VectorValue);
							FileSaveModel.WriteLine(PhiCol);
						}
						FileSaveModel.Close();
						// Save the final learning curves
						StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false);
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_epoch.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_time.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_pool.VectorValue));
						FileSavePerf.Close();
					}
					{
						string PhiCol = null;
						(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
						StreamWriter FileSaveModel = new StreamWriter(ResultFile + ".model.Phi", false);
						for (int IdxCol = 0; IdxCol < paramModel.Phi.nCols; IdxCol++)
						{
							PhiCol = String.Join("\t", paramModel.Phi.DenseMatrixValue[IdxCol].VectorValue);
							FileSaveModel.WriteLine(PhiCol);
						}
						FileSaveModel.Close();
						// Save the final learning curves
						StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false);
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_epoch.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_time.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_pool.VectorValue));
						FileSavePerf.Close();
					}
				}

				// -- Dump feature --
				if (paramTrain.flag_DumpFeature && (epoch + 1) % nEpochPerDump == 0)
				{
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						DumpingFeature_BP_LDA(TrainData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
					}
					{
						DumpingFeature_BP_LDA(TrainData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
					}
				}


			}
		}
Пример #7
0
		/*
         * Training: supervised learning of feedforward (unfolding) LDA by back propagation
         */
		public static void TrainingBP_sLDA(
			SparseMatrix TrainData,
			SparseMatrix TrainLabel,
			SparseMatrix TestData,
			SparseMatrix TestLabel,
			SparseMatrix ValidData,
			SparseMatrix ValidLabel,
			paramModel_t paramModel,
			paramTrain_t paramTrain,
			string ModelFile,
			string ResultFile
		)
		{
			Console.WriteLine("*****************************************************************");
			Console.WriteLine("jvking version of BP-sLDA: Mirror-Descent Back Propagation");
			Console.WriteLine("*****************************************************************");
			// ---- Extract the parameters ----
			// Model parameters
			int nInput = paramModel.nInput;
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			int nOutput = paramModel.nOutput;
			float eta = paramModel.eta;
			float T_value = paramModel.T_value;
			string OutputType = paramModel.OutputType;
			float beta = paramModel.beta;
			// Training parameters
			int nEpoch = paramTrain.nEpoch;
			float mu_Phi = paramTrain.mu_Phi;
			float mu_U = paramTrain.mu_U;
			int nTrain = paramTrain.nTrain;
			float mu_ReduceFactor = paramTrain.mu_Phi_ReduceFactor;
			string LearnRateSchedule = paramTrain.LearnRateSchedule;
			int nSamplesPerDisplay = paramTrain.nSamplesPerDisplay;
			int nEpochPerSave = paramTrain.nEpochPerSave;
			int nEpochPerTest = paramTrain.nEpochPerTest;
			int nEpochPerDump = paramTrain.nEpochPerDump;


			// ---- Initialize the model ----            
			ModelInit_LDA_Feedforward(paramModel);

			// ---- Initialize the training algorithm ----
			float TotLoss = 0.0f;
			float TotTrErr = 0.0f;
			double TotTime = 0.0f;
			double TotTimeThisEpoch = 0.0f;
			int TotSamples = 0;
			int TotSamplesThisEpoch = 0;
			float CntRunningAvg = 0.0f;
			float CntModelUpdate = 0.0f;
			double AvgnHidLayerEffective = 0.0f;
			DenseRowVector mu_phi_search = new DenseRowVector(nHid, mu_Phi);
			DenseRowVector mu_U_search = new DenseRowVector(nHid, mu_U);
			DenseRowVector AdaGradSum = new DenseRowVector(nHid, 0.0f);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(nHid, 0.0f);
			DenseRowVector TestError_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector ValidError_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TrainError_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TrainLoss_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestError_epoch = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestError_time = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			int CountTest = 0;
			float nLearnLineSearch = 0.0f;
			int[] IdxPerm = null;
			int BatchSize_NormalBatch = paramTrain.BatchSize;
			int BatchSize_tmp = paramTrain.BatchSize;
			int nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
			DNNRun_t DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun = null;
			Grad_t Grad = new Grad_t(nHid, nOutput, nInput, paramModel.nHidLayer, OutputType);
			SparseMatrix TmpGrad = new SparseMatrix(nInput, nHid, true);
			DenseMatrix TmpMatDensePhi = new DenseMatrix(nInput, nHid);
			DenseMatrix TmpMatDenseU = new DenseMatrix(nOutput, nHid);
			paramModel_t paramModel_avg = new paramModel_t(paramModel);          
			Stopwatch stopWatch = new Stopwatch();            
			// ---- Compute the schedule of the learning rate
			double[] stepsize_pool_Phi = null;
			double[] stepsize_pool_U = null;
			switch (LearnRateSchedule)
			{
			case "PreCompute":
				stepsize_pool_Phi = PrecomputeLearningRateSchedule(nBatch, nEpoch, mu_Phi, mu_Phi / mu_ReduceFactor, 1e-8f);
				stepsize_pool_U = PrecomputeLearningRateSchedule(nBatch, nEpoch, mu_U, mu_U / mu_ReduceFactor, 1e-8f);
				break;
			case "Constant":
				stepsize_pool_Phi = new double[nEpoch];
				stepsize_pool_U = new double[nEpoch];
				for (int Idx = 0; Idx < nEpoch; Idx++)
				{
					stepsize_pool_Phi[Idx] = mu_Phi;
					stepsize_pool_U[Idx] = mu_U;
				}
				break;
			default:
				throw new Exception("Unknown type of LearnRateSchedule");
			}
			// Now start training.........................
			for (int epoch = 0; epoch < nEpoch; epoch++)
			{
				TotSamplesThisEpoch = 0;
				TotTimeThisEpoch = 0.0;
				AvgnHidLayerEffective = 0.0f;
				// -- Set the batch size if there is schedule --
				if (paramTrain.flag_BachSizeSchedule)
				{
					if (paramTrain.BachSizeSchedule.TryGetValue(epoch + 1, out BatchSize_tmp))
					{
						BatchSize_NormalBatch = BatchSize_tmp;
						nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
						DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
						DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
					}
				}

				// -- Shuffle the data (generating shuffled index) --
				IdxPerm = Statistics.RandPerm(nTrain);
				// -- Reset the (MDA) inference step-sizes --
				if (epoch > 0)
				{
					for (int Idx = 0; Idx < paramModel.nHidLayer; Idx++)
					{
						paramModel.T[Idx] = T_value;
					}
				}
				// -- Take the learning rate for the current epoch --
				mu_Phi = (float)stepsize_pool_Phi[epoch];
				mu_U = (float)stepsize_pool_U[epoch];
				// -- Start this epoch --
				Console.WriteLine("############## Epoch #{0}. BatchSize: {1} Learning Rate: Phi:{2}, U:{3} ##################",
					epoch + 1, BatchSize_NormalBatch, mu_Phi, mu_U);
				for (int IdxBatch = 0; IdxBatch < nBatch; IdxBatch++)
				{
					stopWatch.Start();
					// Extract the batch
					int BatchSize = 0;
					if (IdxBatch < nBatch - 1)
					{
						BatchSize = BatchSize_NormalBatch;
						DNNRun = DNNRun_NormalBatch;
					}
					else
					{
						BatchSize = nTrain - IdxBatch * BatchSize_NormalBatch;
						DNNRun = DNNRun_EndBatch;
					}
					SparseMatrix Xt = new SparseMatrix(nInput, BatchSize);
					SparseMatrix Dt = new SparseMatrix(nOutput, BatchSize);
					int[] IdxSample = new int[BatchSize];
					Array.Copy(IdxPerm, IdxBatch * BatchSize_NormalBatch, IdxSample, 0, BatchSize);
					TrainData.GetColumns(Xt, IdxSample);
					TrainLabel.GetColumns(Dt, IdxSample);

					// Forward activation
					LDA_Learn.ForwardActivation_LDA(Xt, DNNRun, paramModel, true);

					// Back propagation
					LDA_Learn.BackPropagation_LDA(Xt, Dt, DNNRun, paramModel, Grad);

					// Compute the gradient and update the model (All gradients of Phi are accumulated into Grad.grad_Q_Phi)
					// (i) Update Phi
					MatrixOperation.ScalarDivideMatrix(Grad.grad_Q_Phi, (-1.0f) * ((beta - 1) / ((float)nTrain)), paramModel.Phi, true);       
					mu_phi_search.FillValue(mu_Phi);
					// Different learning rate for different columns of Phi: Similar to AdaGrad but does not decay with time
					++CntModelUpdate;
					MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpMatDensePhi, Grad.grad_Q_Phi, Grad.grad_Q_Phi);
					MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpMatDensePhi);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / ((float)nInput));
					MatrixOperation.VectorSubtractVector(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / CntModelUpdate);
					MatrixOperation.VectorAddVector(AdaGradSum, TmpDenseRowVec);
					MatrixOperation.ElementwiseSquareRoot(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarAddVector(TmpDenseRowVec, mu_Phi);
					MatrixOperation.ElementwiseVectorDivideVector(mu_phi_search, mu_phi_search, TmpDenseRowVec);
					nLearnLineSearch = SMD_Update(paramModel.Phi, Grad.grad_Q_Phi, mu_phi_search, eta);
					// (ii) Update U                    
					MatrixOperation.ScalarMultiplyMatrix(Grad.grad_Q_U, (-1.0f) * mu_U);
					MatrixOperation.MatrixAddMatrix(paramModel.U, Grad.grad_Q_U);
					// (iii) Running average of the model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch)/2.0f))
					{
						++CntRunningAvg;
						MatrixOperation.MatrixSubtractMatrix(TmpMatDensePhi, paramModel.Phi, paramModel_avg.Phi);
						MatrixOperation.MatrixSubtractMatrix(TmpMatDenseU, paramModel.U, paramModel_avg.U);
						MatrixOperation.ScalarMultiplyMatrix(TmpMatDensePhi, 1.0f / CntRunningAvg);
						MatrixOperation.ScalarMultiplyMatrix(TmpMatDenseU, 1.0f / CntRunningAvg);
						MatrixOperation.MatrixAddMatrix(paramModel_avg.Phi, TmpMatDensePhi);
						MatrixOperation.MatrixAddMatrix(paramModel_avg.U, TmpMatDenseU);
					}

					// Display the result
					TotTrErr += 100 * ComputeNumberOfErrors(Dt, DNNRun.y);
					TotLoss += ComputeSupervisedLoss(Dt, DNNRun.y, paramModel.OutputType);
					TotSamples += BatchSize;
					TotSamplesThisEpoch += BatchSize;
					AvgnHidLayerEffective =
						(((double)(TotSamplesThisEpoch - BatchSize)) / ((double)TotSamplesThisEpoch)) * AvgnHidLayerEffective 
						+
						1.0 / ((double)TotSamplesThisEpoch) * DNNRun.nHidLayerEffective.Sum();
					stopWatch.Stop();
					TimeSpan ts = stopWatch.Elapsed;
					TotTime += ts.TotalSeconds;
					TotTimeThisEpoch += ts.TotalSeconds;
					stopWatch.Reset();
					if (TotSamplesThisEpoch % nSamplesPerDisplay == 0)
					{
						// Display results
						Console.WriteLine(
							"* Ep#{0}/{1} Bat#{2}/{3}. Loss={4:F3}. TrErr={5:F3}%. Speed={6} Samples/Sec.",
							epoch + 1, nEpoch,
							IdxBatch + 1, nBatch,
							TotLoss / TotSamples, TotTrErr / TotSamples,
							(int)((double)TotSamplesThisEpoch / TotTimeThisEpoch)
						);
						if (paramTrain.DebugLevel == DebugLevel_t.medium)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine();
						}
						if (paramTrain.DebugLevel == DebugLevel_t.high)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							float MaxAbsVal_Grad_Q_Phi = Grad.grad_Q_Phi.MaxAbsValue();
							float MaxAbsVal_Grad_Q_U = Grad.grad_Q_U.MaxAbsValue();
							Console.WriteLine(
								"  AvgnHidLayerEff={0:F1}. G_Phi={1:F3}. G_U={2:F3}",
								AvgnHidLayerEffective,
								MaxAbsVal_Grad_Q_Phi,
								MaxAbsVal_Grad_Q_U
							);
							// Save the screen into a log file
							(new FileInfo(ResultFile + ".log")).Directory.Create();
							using (StreamWriter LogFile = File.AppendText(ResultFile + ".log"))
							{
								LogFile.WriteLine(
									"- Ep#{0}/{1} Bat#{2}/{3}. Loss={4:F3}. TrErr={5:F3}%. Speed={6} Samples/Sec.",
									epoch + 1, nEpoch,
									IdxBatch + 1, nBatch,
									TotLoss / TotSamples, TotTrErr / TotSamples,
									(int)((double)TotSamplesThisEpoch / TotTimeThisEpoch)
								);
								LogFile.WriteLine(
									"  muPhiMax={0} \n  muPhiMin={1}",
									mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
								);
								LogFile.WriteLine(
									"  AvgnHidLayerEff={0:F1}. G_Phi={1:F3}. G_U={2:F3}",
									AvgnHidLayerEffective,
									MaxAbsVal_Grad_Q_Phi,
									MaxAbsVal_Grad_Q_U
								);
								Console.WriteLine();
							}
							Console.WriteLine();
						}

					}
				}
				// -- Test --
				if ((epoch + 1) % nEpochPerTest == 0)
				{
					// Standard performance metric
					TestError_epoch.VectorValue[(epoch + 1) / nEpochPerTest - 1] = epoch + 1;
					TestError_time.VectorValue[(epoch + 1) / nEpochPerTest - 1] = (float)TotTime;
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						if (paramTrain.flag_HasValidSet)
						{
							ValidError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
							= Testing_BP_sLDA(
								ValidData, 
								ValidLabel, 
								paramModel_avg, 
								paramTrain.BatchSize_Test, 
								ResultFile + ".validscore", 
								"Validation Set"
							);
						}
						TestError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
						= Testing_BP_sLDA(
							TestData, 
							TestLabel, 
							paramModel_avg, 
							paramTrain.BatchSize_Test, 
							ResultFile + ".testscore", 
							"Test Set"
						);                        
					}
					else
					{
						if (paramTrain.flag_HasValidSet)
						{
							ValidError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
							= Testing_BP_sLDA(
								ValidData, 
								ValidLabel, 
								paramModel, 
								paramTrain.BatchSize_Test, 
								ResultFile + ".validscore", 
								"Validation Set"
							);
						}
						TestError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
						= Testing_BP_sLDA(
							TestData, 
							TestLabel, 
							paramModel, 
							paramTrain.BatchSize_Test, 
							ResultFile + ".testscore", 
							"Test Set"
						);
					}
					TrainError_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
					= TotTrErr / TotSamples;
					TrainLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1]
					= TotLoss / TotSamples;

					// Performance metric evaluated using external evaluation tools, e.g., AUC, Top@K accuracy, etc.
					if (paramTrain.flag_ExternalEval)
					{
						ExternalEvaluation(
							paramTrain.ExternalEval, 
							ResultFile, 
							paramTrain.TestLabelFile, 
							epoch, 
							"Test Set"
						);
						if (paramTrain.flag_HasValidSet)
						{
							ExternalEvaluation(
								paramTrain.ExternalEval, 
								ResultFile, 
								paramTrain.ValidLabelFile, 
								epoch, 
								"Validation Set"
							);
						}
					}

					CountTest++;
				}

				// -- Save --
				if ((epoch + 1) % nEpochPerSave == 0)
				{
					// Save model
					string PhiCol = null;
					string UCol = null;
					(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
					string ModelName_Phi;
					string ModelName_U;
					if (paramTrain.flag_SaveAllModels)
					{
						ModelName_Phi = ResultFile + ".model.Phi" + ".iter" + (epoch + 1).ToString();
						ModelName_U = ResultFile + ".model.U" + ".iter" + (epoch + 1).ToString();
					}
					else
					{
						ModelName_Phi = ResultFile + ".model.Phi";
						ModelName_U = ResultFile + ".model.U";
					}
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						using (StreamWriter FileSaveModel_Phi = new StreamWriter(ModelName_Phi, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel_avg.Phi.nCols; IdxCol++)
							{
								PhiCol = String.Join("\t", paramModel_avg.Phi.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_Phi.WriteLine(PhiCol);
							}
						}
						using (StreamWriter FileSaveModel_U = new StreamWriter(ModelName_U, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel_avg.U.nCols; IdxCol++)
							{
								UCol = String.Join("\t", paramModel_avg.U.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_U.WriteLine(UCol);
							}
						}
					}
					else
					{
						using (StreamWriter FileSaveModel_Phi = new StreamWriter(ModelName_Phi, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel.Phi.nCols; IdxCol++)
							{
								PhiCol = String.Join("\t", paramModel.Phi.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_Phi.WriteLine(PhiCol);
							}
						}
						using (StreamWriter FileSaveModel_U = new StreamWriter(ModelName_U, false))
						{
							for (int IdxCol = 0; IdxCol < paramModel.U.nCols; IdxCol++)
							{
								UCol = String.Join("\t", paramModel.U.DenseMatrixValue[IdxCol].VectorValue);
								FileSaveModel_U.WriteLine(UCol);
							}
						}
					}
					// Save the final learning curves
					using (StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false))
					{
						FileSavePerf.Write("Epoch:\t");
						FileSavePerf.WriteLine(String.Join("\t", TestError_epoch.VectorValue));
						FileSavePerf.Write("TrainTime:\t");
						FileSavePerf.WriteLine(String.Join("\t", TestError_time.VectorValue));
						if (paramTrain.flag_HasValidSet)
						{
							FileSavePerf.Write("Validation:\t");
							FileSavePerf.WriteLine(String.Join("\t", ValidError_pool.VectorValue));
						}
						FileSavePerf.Write("Test:\t");
						FileSavePerf.WriteLine(String.Join("\t", TestError_pool.VectorValue));
						FileSavePerf.Write("TrainError:\t");
						FileSavePerf.WriteLine(String.Join("\t", TrainError_pool.VectorValue));
						FileSavePerf.Write("TrainLoss:\t");
						FileSavePerf.WriteLine(String.Join("\t", TrainLoss_pool.VectorValue));
					}
				}

				// -- Dump feature --
				if (paramTrain.flag_DumpFeature && (epoch + 1) % nEpochPerDump == 0)
				{
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						DumpingFeature_BP_LDA(TrainData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
						if (paramTrain.flag_HasValidSet)
						{
							DumpingFeature_BP_LDA(ValidData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".valid.fea", "Validation");
						}
					}
					{
						DumpingFeature_BP_LDA(TrainData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
						if (paramTrain.flag_HasValidSet)
						{
							DumpingFeature_BP_LDA(ValidData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".valid.fea", "Validation");
						}
					}
				}


			}


		}
Пример #8
0
        /*
         * Parse the input arguments
         */
        public static bool ParseArgument(
            string[] args,
            paramModel_t paramModel,
            paramTrain_t paramTrain,
            ref string TrainInputFile,
            ref string TestInputFile,
            ref string ModelFile,
            ref string ResultFile
            )
        {
            string ArgKey;
            string ArgValue;

            for (int IdxArg = 0; IdxArg < args.Length - 1; IdxArg += 2)
            {
                ArgKey   = args[IdxArg];
                ArgValue = args[IdxArg + 1];
                switch (ArgKey)
                {
                case "--nHid":
                    paramModel.nHid = int.Parse(ArgValue);
                    break;

                case "--nHidLayer":
                    paramModel.nHidLayer = int.Parse(ArgValue);
                    break;

                case "--alpha":
                    paramModel.alpha = float.Parse(ArgValue);
                    break;

                case "--beta":
                    paramModel.beta = float.Parse(ArgValue);
                    break;

                case "--nEpoch":
                    paramTrain.nEpoch = int.Parse(ArgValue);
                    break;

                case "--BatchSize":
                    paramTrain.BatchSize = int.Parse(ArgValue);
                    break;

                case "--BatchSize_Test":
                    paramTrain.BatchSize_Test = int.Parse(ArgValue);
                    break;

                case "--mu_Phi":
                    paramTrain.mu_Phi = float.Parse(ArgValue);
                    break;

                case "--mu_U":
                    paramTrain.mu_U = float.Parse(ArgValue);
                    break;

                case "--nSamplesPerDisplay":
                    paramTrain.nSamplesPerDisplay = int.Parse(ArgValue);
                    break;

                case "--nEpochPerSave":
                    paramTrain.nEpochPerSave = int.Parse(ArgValue);
                    break;

                case "--nEpochPerTest":
                    paramTrain.nEpochPerTest = int.Parse(ArgValue);
                    break;

                case "--TrainInputFile":
                    TrainInputFile            = ArgValue;
                    paramTrain.TrainInputFile = TrainInputFile;
                    break;

                case "--TestInputFile":
                    TestInputFile            = ArgValue;
                    paramTrain.TestInputFile = TestInputFile;
                    break;

                case "--ResultFile":
                    ResultFile = ArgValue;
                    break;

                case "--nInput":
                    paramModel.nInput = int.Parse(ArgValue);
                    break;

                case "--nOutput":
                    paramModel.nOutput = int.Parse(ArgValue);
                    break;

                case "--LearnRateSchedule":
                    paramTrain.LearnRateSchedule = ArgValue;
                    break;

                case "--flag_DumpFeature":
                    paramTrain.flag_DumpFeature = bool.Parse(ArgValue);
                    break;

                case "--nEpochPerDump":
                    paramTrain.nEpochPerDump = int.Parse(ArgValue);
                    break;

                case "--BatchSizeSchedule":
                    paramTrain.flag_BachSizeSchedule = true;
                    paramTrain.BachSizeSchedule      = new Dictionary <int, int>();
                    string[] StrBatSched = ArgValue.Split(',');
                    for (int Idx = 0; Idx < StrBatSched.Length; Idx++)
                    {
                        string[] KeyValPair = StrBatSched[Idx].Split(':');
                        paramTrain.BachSizeSchedule.Add(int.Parse(KeyValPair[0]), int.Parse(KeyValPair[1]));
                    }
                    break;

                case "--ThreadNum":
                    paramTrain.ThreadNum = int.Parse(ArgValue);
                    break;

                case "--MaxThreadDeg":
                    paramTrain.MaxMultiThreadDegree = int.Parse(ArgValue);
                    break;

                case "--T_value":
                    paramModel.T_value = float.Parse(ArgValue);
                    break;

                case "--DebugLevel":
                    paramTrain.DebugLevel = (DebugLevel_t)Enum.Parse(typeof(DebugLevel_t), ArgValue, true);
                    break;

                case "--flag_AdaptivenHidLayer":
                    paramModel.flag_AdaptivenHidLayer = bool.Parse(ArgValue);
                    break;

                case "--flag_RunningAvg":
                    paramTrain.flag_RunningAvg = bool.Parse(ArgValue);
                    break;

                default:
                    Console.WriteLine("Unknown ArgKey: {0}", ArgKey);
                    Program.DispHelp();
                    return(false);
                }
            }

            if (paramModel.alpha >= 1.0f)
            {
                paramModel.T_value = 1.0f;
                paramModel.flag_AdaptivenHidLayer = false;
            }
            else if (paramModel.alpha < 1.0f && paramModel.alpha > 0.0f)
            {
                paramModel.T_value = 0.001f;
                paramModel.flag_AdaptivenHidLayer = true;
            }
            else
            {
                throw new Exception("Invalid alpha.");
            }

            if (String.IsNullOrEmpty(TrainInputFile) || String.IsNullOrEmpty(TestInputFile))
            {
                Console.WriteLine("Empty TrainInputFile or TestInputFile!");
                return(false);
            }
            return(true);
        }