/// <summary> /// The constructor. /// </summary> /// <param name="mycaffe">Specifies the instance of MyCaffe assoiated with the open project - when using more than one Brain, this is the master project.</param> /// <param name="properties">Specifies the properties passed into the trainer.</param> /// <param name="random">Specifies the random number generator used.</param> /// <param name="phase">Specifies the phase under which to run.</param> public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_mycaffe = mycaffe; m_solver = mycaffe.GetInternalSolver(); m_netOutput = mycaffe.GetInternalNet(phase); m_netTarget = new Net <T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase); m_properties = properties; m_random = random; Blob <T> data = m_netOutput.blob_by_name("data"); if (data == null) { m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!"); } m_nBatchSize = data.num; Blob <T> logits = m_netOutput.blob_by_name("logits"); if (logits == null) { m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!"); } m_nActionCount = logits.channels; m_transformer = m_mycaffe.DataTransformer; m_blobActions = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobNextQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobExpectedQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobDone = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobLoss = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobWeights = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma); m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer <T>; if (m_memLoss == null) { m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!"); } double?dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr"); if (dfRate.HasValue) { m_dfLearningRate = dfRate.Value; } m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch); m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false); if (m_nMiniBatch > 1) { m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone(); m_colAccumulatedGradients.SetDiff(0); } }
public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_mycaffe = mycaffe; m_net = mycaffe.GetInternalNet(phase); m_solver = mycaffe.GetInternalSolver(); m_properties = properties; m_random = random; m_memData = m_net.FindLayer(LayerParameter.LayerType.MEMORYDATA, null) as MemoryDataLayer <T>; m_memLoss = m_net.FindLayer(LayerParameter.LayerType.MEMORY_LOSS, null) as MemoryLossLayer <T>; SoftmaxLayer <T> softmax = m_net.FindLayer(LayerParameter.LayerType.SOFTMAX, null) as SoftmaxLayer <T>; if (softmax != null) { throw new Exception("The PG.SIMPLE trainer does not support the Softmax layer, use the 'PG.ST' or 'PG.MT' trainer instead."); } if (m_memData == null) { throw new Exception("Could not find the MemoryData Layer!"); } if (m_memLoss == null) { throw new Exception("Could not find the MemoryLoss Layer!"); } m_memLoss.OnGetLoss += memLoss_OnGetLoss; m_blobDiscountedR = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobPolicyGradient = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_nMiniBatch = mycaffe.CurrentProject.GetBatchSize(phase); }
public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_mycaffe = mycaffe; m_net = mycaffe.GetInternalNet(phase); m_solver = mycaffe.GetInternalSolver(); m_properties = properties; m_random = random; m_memData = m_net.FindLayer(LayerParameter.LayerType.MEMORYDATA, null) as MemoryDataLayer <T>; m_memLoss = m_net.FindLayer(LayerParameter.LayerType.MEMORY_LOSS, null) as MemoryLossLayer <T>; m_softmax = m_net.FindLayer(LayerParameter.LayerType.SOFTMAX, null) as SoftmaxLayer <T>; if (m_memData == null) { throw new Exception("Could not find the MemoryData Layer!"); } if (m_memLoss == null) { throw new Exception("Could not find the MemoryLoss Layer!"); } m_memData.OnDataPack += memData_OnDataPack; m_memLoss.OnGetLoss += memLoss_OnGetLoss; m_blobDiscountedR = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobPolicyGradient = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobActionOneHot = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobDiscountedR1 = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobPolicyGradient1 = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobActionOneHot1 = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobLoss = new Blob <T>(mycaffe.Cuda, mycaffe.Log); m_blobAprobLogit = new Blob <T>(mycaffe.Cuda, mycaffe.Log); if (m_softmax != null) { LayerParameter p = new LayerParameter(LayerParameter.LayerType.SOFTMAXCROSSENTROPY_LOSS); p.loss_weight.Add(1); p.loss_weight.Add(0); p.loss_param.normalization = LossParameter.NormalizationMode.NONE; m_softmaxCe = new SoftmaxCrossEntropyLossLayer <T>(mycaffe.Cuda, mycaffe.Log, p); } m_colAccumulatedGradients = m_net.learnable_parameters.Clone(); m_colAccumulatedGradients.SetDiff(0); int nMiniBatch = mycaffe.CurrentProject.GetBatchSize(phase); if (nMiniBatch != 0) { m_nMiniBatch = nMiniBatch; } m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch); }
/// <summary> /// The DoWork thread is the main tread used to train or run the model depending on the operation selected. /// </summary> /// <param name="sender">Specifies the sender</param> /// <param name="e">specifies the arguments.</param> private void m_bw_DoWork(object sender, DoWorkEventArgs e) { BackgroundWorker bw = sender as BackgroundWorker; m_input = e.Argument as InputData; SettingsCaffe s = new SettingsCaffe(); s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL; try { m_model.Batch = m_input.Batch; m_mycaffe = new MyCaffeControl <float>(s, m_log, m_evtCancel); // Train the model. if (m_input.Operation == InputData.OPERATION.TRAIN) { m_model.Iterations = (int)((m_input.Epochs * 7000) / m_model.Batch); m_log.WriteLine("Training for " + m_input.Epochs.ToString() + " epochs (" + m_model.Iterations.ToString("N0") + " iterations).", true); m_log.WriteLine("INFO: " + m_model.Iterations.ToString("N0") + " iterations.", true); m_log.WriteLine("Using hidden = " + m_input.HiddenSize.ToString() + ", and word size = " + m_input.WordSize.ToString() + ".", true); // Load the Seq2Seq training model. NetParameter netParam = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp); string strModel = netParam.ToProto("root").ToString(); SolverParameter solverParam = m_model.CreateSolver(m_input.LearningRate); string strSolver = solverParam.ToProto("root").ToString(); byte[] rgWts = loadWeights("sequence"); m_strModel = strModel; m_strSolver = strSolver; m_mycaffe.OnTrainingIteration += m_mycaffe_OnTrainingIteration; m_mycaffe.OnTestingIteration += m_mycaffe_OnTestingIteration; m_mycaffe.LoadLite(Phase.TRAIN, strSolver, strModel, rgWts, false, false); if (!m_input.UseSoftmax) { MemoryLossLayer <float> lossLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>; if (lossLayerTraining != null) { lossLayerTraining.OnGetLoss += LossLayer_OnGetLossTraining; } MemoryLossLayer <float> lossLayerTesting = m_mycaffe.GetInternalNet(Phase.TEST).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>; if (lossLayerTesting != null) { lossLayerTesting.OnGetLoss += LossLayer_OnGetLossTesting; } } m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); TextDataLayer <float> dataLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.TEXT_DATA, "data") as TextDataLayer <float>; if (dataLayerTraining != null) { dataLayerTraining.OnGetData += DataLayerTraining_OnGetDataTraining; } // Train the Seq2Seq model. m_plotsSequenceLoss = new PlotCollection("Sequence Loss"); m_plotsSequenceAccuracyTest = new PlotCollection("Sequence Accuracy Test"); m_plotsSequenceAccuracyTrain = new PlotCollection("Sequence Accuracy Train"); m_mycaffe.Train(m_model.Iterations); saveWeights("sequence", m_mycaffe); } // Run a trained model. else { NetParameter netParam = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp, Phase.RUN); string strModel = netParam.ToProto("root").ToString(); byte[] rgWts = loadWeights("sequence"); strModel = m_model.PrependInput(strModel); m_strModelRun = strModel; int nN = m_model.TimeSteps; m_mycaffe.LoadToRun(strModel, rgWts, new BlobShape(new List <int>() { nN, 1, 1, 1 }), null, null, false, false); m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); runModel(m_mycaffe, bw, m_input.InputText); } } catch (Exception excpt) { throw excpt; } finally { // Cleanup. if (m_mycaffe != null) { m_mycaffe.Dispose(); m_mycaffe = null; } } }
/// <summary> /// The constructor. /// </summary> /// <param name="mycaffe">Specifies the instance of MyCaffe assoiated with the open project - when using more than one Brain, this is the master project.</param> /// <param name="properties">Specifies the properties passed into the trainer.</param> /// <param name="random">Specifies the random number generator used.</param> /// <param name="phase">Specifies the phase under which to run.</param> public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_mycaffe = mycaffe; m_solver = mycaffe.GetInternalSolver(); m_netOutput = mycaffe.GetInternalNet(phase); m_netTarget = new Net <T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase); m_properties = properties; m_random = random; Blob <T> data = m_netOutput.blob_by_name("data"); if (data == null) { m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!"); } m_nFramesPerX = data.channels; m_nBatchSize = data.num; Blob <T> logits = m_netOutput.blob_by_name("logits"); if (logits == null) { m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!"); } m_nActionCount = logits.channels; m_transformer = m_mycaffe.DataTransformer; if (m_transformer == null) { TransformationParameter trans_param = new TransformationParameter(); int nC = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageChannels; int nH = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageHeight; int nW = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageWidth; m_transformer = new DataTransformer <T>(m_mycaffe.Cuda, m_mycaffe.Log, trans_param, phase, nC, nH, nW); } for (int i = 0; i < m_nFramesPerX; i++) { m_transformer.param.mean_value.Add(255 / 2); // center each frame } m_transformer.param.scale = 1.0 / 255; // normalize m_transformer.Update(); m_blobActions = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobNextQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobExpectedQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobDone = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobLoss = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobWeights = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma); m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer <T>; if (m_memLoss == null) { m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!"); } double?dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr"); if (dfRate.HasValue) { m_dfLearningRate = dfRate.Value; } m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch); m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false); if (m_nMiniBatch > 1) { m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone(); m_colAccumulatedGradients.SetDiff(0); } }