/// <summary> /// Create the base solver to use. /// </summary> /// <returns> /// The solver parameter created is returned. /// </returns> public override SolverParameter CreateSolver() { m_solver = new SolverParameter(); m_solver.type = SolverParameter.SolverType.SGD; m_solver.base_lr = m_dfBaseLr; m_solver.weight_decay = 0.0005; m_solver.LearningRatePolicy = SolverParameter.LearningRatePolicyType.MULTISTEP; m_solver.stepvalue = new List <int>() { 80000, 100000, 120000 }; m_solver.gamma = 0.1; m_solver.momentum = 0.9; m_solver.iter_size = m_nIterSize; m_solver.max_iter = 120000; m_solver.snapshot = 80000; m_solver.display = 10; m_solver.average_loss = 10; m_solver.device_id = m_nGpuID; m_solver.debug_info = false; m_solver.snapshot_after_train = true; m_solver.clip_gradients = 1; // Test parameters. m_solver.test_iter.Add(m_nTestIter); m_solver.test_interval = 10000; m_solver.test_initialization = false; m_solver.eval_type = SolverParameter.EvaluationType.CLASSIFICATION; return(m_solver); }
/// <summary> /// The RmsPropSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the CaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> public RmsPropSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank) { m_log.CHECK_EQ(0, m_param.momentum, "Momentum cannot be used with RmsProp."); m_log.CHECK_GE(m_param.rms_decay, 0, "rms_decay should lie between 0 and 1."); m_log.CHECK_LT(m_param.rms_decay, 1, "rms_decay should lie between 0 and 1."); }
/// <summary> /// Create the ADAM solver used, setting the test interval > than the /// iterations to avoid testing. /// </summary> /// <param name="dfLr">Specifies the learning rate.</param> /// <returns>The SolverParameter is returned.</returns> public SolverParameter CreateSolver(double dfLr) { SolverParameter solver = new SolverParameter(); m_dfLearningRate = dfLr; solver.random_seed = 0xCAFFE; solver.test_interval = 100; solver.test_iter[0] = 1; solver.max_iter = m_nIterations; solver.snapshot = m_nIterations; solver.test_initialization = false; solver.display = m_nDisplay; solver.momentum = 0; solver.rms_decay = 0.999; solver.weight_decay = m_dfDecayRate; solver.clip_gradients = 5; solver.regularization_type = "L2"; solver.type = SolverParameter.SolverType.RMSPROP; solver.lr_policy = "multistep"; solver.stepvalue = new List <int>() { 100000, 200000 }; solver.gamma = 0.5; solver.base_lr = m_dfLearningRate; return(solver); }
public void TestCreateTrainingModel() { ModelBuilder builder = create(); NetParameter net_param = builder.CreateModel(); RawProto proto = net_param.ToProto("root"); string strNet = proto.ToString(); RawProto proto2 = RawProto.Parse(strNet); NetParameter net_param2 = NetParameter.FromProto(proto2); m_log.CHECK(net_param2.Compare(net_param), "The two net parameters should be the same!"); // verify creating the model. SolverParameter solver = builder.CreateSolver(); RawProto protoSolver = solver.ToProto("root"); string strSolver = protoSolver.ToString(); SettingsCaffe settings = new SettingsCaffe(); CancelEvent evtCancel = new CancelEvent(); MyCaffeControl <T> mycaffe = new MyCaffeControl <T>(settings, m_log, evtCancel); save(strNet, strSolver, false); // mycaffe.LoadLite(Phase.TRAIN, strSolver, strNet, null); mycaffe.Dispose(); }
/// <summary> /// Load and return the solver used with the MNIST LeNet input model. /// </summary> /// <returns>The SolverParameter is returned.</returns> public SolverParameter CreateMnistSolver() { string str = System.Text.Encoding.Default.GetString(Properties.Resources.lenet_solver); RawProto proto = RawProto.Parse(str); return(SolverParameter.FromProto(proto)); }
/// <summary> /// The LBFGSSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the MyCaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param> /// <param name="getws">Optionally, specifies the handler for getting the workspace.</param> /// <param name="setws">Optionally, specifies the handler for setting the workspace.</param> public LBFGSSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabaseBase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws) { m_tZero = (T)Convert.ChangeType(0, typeof(T)); m_tOne = (T)Convert.ChangeType(1, typeof(T)); m_tMinusOne = (T)Convert.ChangeType(-1, typeof(T)); PreSolve(); }
/// <summary> /// The LBFGSSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the CaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> public LBFGSSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank) { m_tZero = (T)Convert.ChangeType(0, typeof(T)); m_tOne = (T)Convert.ChangeType(1, typeof(T)); m_tMinusOne = (T)Convert.ChangeType(-1, typeof(T)); PreSolve(); }
/// <summary> /// Set the solver testing interval. /// </summary> /// <param name="strSolver">Specifies the solver parameter.</param> /// <returns>The solver description is returned.</returns> private string fixup_solver(string strSolver, int nInterval) { RawProto proto = RawProto.Parse(strSolver); SolverParameter solver_param = SolverParameter.FromProto(proto); // Set the testining interval during training. solver_param.test_interval = nInterval; solver_param.test_initialization = false; return(solver_param.ToProto("root").ToString()); }
public void TestCreateSolver() { ModelBuilder builder = create(); SolverParameter solverParam = builder.CreateSolver(); RawProto proto = solverParam.ToProto("root"); string strSolver = proto.ToString(); RawProto proto2 = RawProto.Parse(strSolver); SolverParameter solverParam2 = SolverParameter.FromProto(proto2); m_log.CHECK(solverParam2.Compare(solverParam), "The two solver parameters should be the same!"); }
/// <summary> /// Train the model. /// </summary> /// <param name="bNewWts">Specifies whether to use new weights or load existing ones (if they exist).</param> public void Train(bool bNewWts) { if (m_mycaffeTrain == null) { return; } byte[] rgWts = null; if (!bNewWts) { rgWts = loadWeights(); } if (rgWts == null) { Console.WriteLine("Starting with new weights..."); } SolverParameter solver = createSolver(); NetParameter model = createModel(); string strModel = model.ToProto("root").ToString(); Console.WriteLine("Using Train Model:"); Console.WriteLine(strModel); Console.WriteLine("Starting training..."); m_mycaffeTrain.LoadLite(Phase.TRAIN, solver.ToProto("root").ToString(), model.ToProto("root").ToString(), rgWts, false, false); m_mycaffeTrain.SetOnTrainingStartOverride(new EventHandler(onTrainingStart)); m_mycaffeTrain.SetOnTestingStartOverride(new EventHandler(onTestingStart)); // Set clockwork weights. if (m_param.LstmEngine != EngineParameter.Engine.CUDNN) { Net <float> net = m_mycaffeTrain.GetInternalNet(Phase.TRAIN); Blob <float> lstm1 = net.parameters[2]; lstm1.SetData(1, m_param.Hidden, m_param.Hidden); } m_mycaffeTrain.Train(m_param.Iterations); saveLstmState(m_mycaffeTrain); Image img = SimpleGraphingControl.QuickRender(m_plots, 1000, 600); showImage(img, "training.png"); saveWeights(m_mycaffeTrain.GetWeights()); }
/// <summary> /// Create the ADAM solver used, setting the test interval > than the /// iterations to avoid testing. /// </summary> /// <returns>The SolverParameter is returned.</returns> public SolverParameter CreateSolver() { SolverParameter solver = new SolverParameter(); solver.random_seed = 0xCAFFE; solver.test_interval = m_nIterations + 1; solver.test_iter[0] = 100; solver.max_iter = m_nIterations; solver.snapshot = m_nIterations; solver.test_initialization = false; solver.display = m_nDisplay; solver.type = SolverParameter.SolverType.ADAM; solver.lr_policy = "fixed"; solver.base_lr = m_dfLearningRate; return(solver); }
/// <summary> /// Create the LeNet solver prototxt programmatically. /// </summary> /// <param name="nIterations">Specifies the number of iterations to train.</param> /// <returns>The solver descriptor is returned as text.</returns> private string create_solver_descriptor_programmatically(int nIterations) { SolverParameter solver_param = new SolverParameter(); solver_param.max_iter = nIterations; solver_param.test_iter = new List <int>(); solver_param.test_iter.Add(100); solver_param.test_initialization = false; solver_param.test_interval = 500; solver_param.base_lr = 0.01; solver_param.momentum = 0.9; solver_param.weight_decay = 0.0005; solver_param.LearningRatePolicy = SolverParameter.LearningRatePolicyType.INV; solver_param.gamma = 0.0001; solver_param.power = 0.75; solver_param.display = 100; solver_param.snapshot = 5000; // Convert solver to text descriptor. RawProto proto = solver_param.ToProto("root"); return(proto.ToString()); }
/// <summary> /// The NesterovSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the CaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> public AdaGradSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank) { m_log.CHECK_EQ(0, m_param.momentum, "Momentum cannot be used with AdaGrad."); }
/// <summary> /// The NesterovSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the MyCaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param> /// <param name="getws">Optionally, specifies the handler for getting the workspace.</param> /// <param name="setws">Optionally, specifies the handler for setting the workspace.</param> public AdaGradSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabaseBase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws) { m_log.CHECK_EQ(0, m_param.momentum, "Momentum cannot be used with AdaGrad."); }
/// <summary> /// Process the content image by applying the style to it that was learned from the style image. /// </summary> /// <param name="bmpStyle">Specifies the image used to train the what style to apply to the content.</param> /// <param name="bmpContent">Specifies the content image to which the style is to be applied.</param> /// <param name="nIterations">Specifies the number of training iterations.</param> /// <param name="strResultDir">Optionally, specifies an output directory where intermediate images are stored.</param> /// <param name="nIntermediateOutput">Optionally, specifies how often to output an intermediate image.</param> /// <param name="dfTvLoss">Optionally, specifies the TV-Loss weight for smoothing (default = 0, which disables this loss).</param> /// <returns>The resulting image is returned.</returns> public Bitmap Process(Bitmap bmpStyle, Bitmap bmpContent, int nIterations, string strResultDir = null, int nIntermediateOutput = -1, double dfTvLoss = 0) { Solver <T> solver = null; Net <T> net = null; BlobCollection <T> colContentActivations = new BlobCollection <T>(); BlobCollection <T> colGramActivations = new BlobCollection <T>(); double dfLoss; try { m_dfTVLossWeight = dfTvLoss; m_nIterations = nIterations; if (bmpStyle.Width != bmpContent.Width || bmpStyle.Height != bmpContent.Height) { bmpStyle = ImageTools.ResizeImage(bmpStyle, bmpContent.Width, bmpContent.Height); } m_log.WriteLine("Creating input network..."); m_log.Enable = false; net = new Net <T>(m_cuda, m_log, m_param, m_evtCancel, null, Phase.TEST); m_log.Enable = true; if (m_rgWeights != null) { net.LoadWeights(m_rgWeights, m_persist); } //----------------------------------------- // Get style and content activations. //----------------------------------------- prepare_data_blob(net, bmpStyle); net.Forward(out dfLoss); foreach (KeyValuePair <string, double> kvGram in m_rgLayers["gram"]) { string strGram = kvGram.Key; Blob <T> blobGram = net.blob_by_name(strGram); colGramActivations.Add(blobGram.Clone()); } prepare_data_blob(net, bmpContent); net.Forward(out dfLoss); foreach (KeyValuePair <string, double> kvContent in m_rgLayers["content"]) { string strContent = kvContent.Key; Blob <T> blobContent = net.blob_by_name(strContent); colContentActivations.Add(blobContent.Clone()); } //----------------------------------------- // Prepare the network by adding new layers. //----------------------------------------- NetParameter net_param = m_param; foreach (KeyValuePair <string, double> kvInput in m_rgLayers["input"]) { string strName = kvInput.Key; LayerParameter p = new LayerParameter(LayerParameter.LayerType.INPUT); p.name = "input_" + strName; p.top.Add(p.name); Blob <T> blob = net.blob_by_name(strName); p.input_param.shape.Add(new BlobShape(blob.shape())); net_param.layer.Add(p); } foreach (KeyValuePair <string, double> kvContent in m_rgLayers["content"]) { string strName = kvContent.Key; string strScale1 = "input_" + strName; string strScale2 = strName; if (m_dfContentDataScale != 1.0) { strScale1 += "b"; LayerParameter ps1 = new LayerParameter(LayerParameter.LayerType.SCALAR); ps1.scalar_param.value = m_dfContentDataScale; ps1.scalar_param.operation = ScalarParameter.ScalarOp.MUL; ps1.scalar_param.passthrough_gradient = true; ps1.bottom.Add("input_" + strName); ps1.top.Add(strScale1); net_param.layer.Add(ps1); strScale2 += "b"; LayerParameter ps2 = new LayerParameter(LayerParameter.LayerType.SCALAR); ps2.scalar_param.value = m_dfContentDataScale; ps2.scalar_param.operation = ScalarParameter.ScalarOp.MUL; ps2.scalar_param.passthrough_gradient = true; ps2.bottom.Add(strName); ps2.top.Add(strScale2); net_param.layer.Add(ps2); } LayerParameter event_param = new LayerParameter(LayerParameter.LayerType.EVENT); event_param.name = "event_" + strName; event_param.bottom.Add(strScale2); event_param.bottom.Add(strScale1); event_param.top.Add("event_" + strName); net_param.layer.Add(event_param); LayerParameter p = new LayerParameter(LayerParameter.LayerType.EUCLIDEAN_LOSS); p.name = "loss_" + strName; Blob <T> blobContent = colContentActivations[strName]; double dfScale = get_content_scale(blobContent); p.loss_weight.Add(kvContent.Value * dfScale); p.bottom.Add("event_" + strName); p.bottom.Add(strScale1); p.top.Add("loss_" + strName); net_param.layer.Add(p); } foreach (KeyValuePair <string, double> kvGram in m_rgLayers["gram"].ToList()) { string strGramName = kvGram.Key; LayerParameter event_param = new LayerParameter(LayerParameter.LayerType.EVENT); event_param.name = "event_" + strGramName; event_param.bottom.Add(strGramName); event_param.bottom.Add("input_" + strGramName); event_param.top.Add("event_" + strGramName); net_param.layer.Add(event_param); LayerParameter p = new LayerParameter(LayerParameter.LayerType.EUCLIDEAN_LOSS); p.name = "loss_" + strGramName; Blob <T> blobGram = colGramActivations[strGramName]; double dfScale = get_style_scale(blobGram); p.loss_weight.Add(kvGram.Value * dfScale); p.bottom.Add("input_" + strGramName); p.bottom.Add("event_" + strGramName); p.top.Add("loss_" + strGramName); net_param.layer.Add(p); } // Add TV Loss; if (m_dfTVLossWeight != 0) { LayerParameter p = new LayerParameter(LayerParameter.LayerType.TV_LOSS); p.name = "loss_tv"; double dfWeight = m_dfTVLossWeight; p.loss_weight.Add(dfWeight); p.bottom.Add("data"); p.top.Add("loss_tv"); net_param.layer.Add(p); } // Replace InputLayer with ParameterLayer, // so that we'll be able to backprop into the image. Blob <T> data = net.blob_by_name("data"); for (int i = 0; i < net_param.layer.Count; i++) { LayerParameter p = net_param.layer[i]; if (p.name == "input1") { net_param.layer[i].SetType(LayerParameter.LayerType.PARAMETER); net_param.layer[i].parameter_param.shape = new BlobShape(data.shape()); break; } } // Disable weights learning. List <LayerParameter.LayerType> rgTypes = new List <LayerParameter.LayerType>(); rgTypes.Add(LayerParameter.LayerType.CONVOLUTION); rgTypes.Add(LayerParameter.LayerType.DECONVOLUTION); rgTypes.Add(LayerParameter.LayerType.INNERPRODUCT); rgTypes.Add(LayerParameter.LayerType.PRELU); rgTypes.Add(LayerParameter.LayerType.BIAS); rgTypes.Add(LayerParameter.LayerType.EMBED); rgTypes.Add(LayerParameter.LayerType.LSTM); rgTypes.Add(LayerParameter.LayerType.LSTM_SIMPLE); rgTypes.Add(LayerParameter.LayerType.RNN); foreach (LayerParameter layer in net_param.layer) { if (rgTypes.Contains(layer.type)) { layer.parameters = new List <ParamSpec>(); layer.parameters.Add(new ParamSpec(0, 0)); layer.parameters.Add(new ParamSpec(0, 0)); } } net.Dispose(); net = null; //----------------------------------------- // Create solver and assign inputs. //----------------------------------------- RawProto proto1 = net_param.ToProto("root"); string str = proto1.ToString(); SolverParameter solver_param = new SolverParameter(); solver_param.display = m_nDisplayEvery; solver_param.train_net_param = net_param; solver_param.test_iter.Clear(); solver_param.test_interval = 0; solver_param.test_initialization = false; solver_param.base_lr = m_dfLearningRate; solver_param.type = m_solverType; m_log.WriteLine("Creating " + m_solverType.ToString() + " solver with learning rate = " + m_dfLearningRate.ToString() + "..."); m_log.Enable = false; if (m_solverType == SolverParameter.SolverType.LBFGS) { solver = new LBFGSSolver <T>(m_cuda, m_log, solver_param, m_evtCancel, null, null, null, m_persist); } else { solver = Solver <T> .Create(m_cuda, m_log, solver_param, m_evtCancel, null, null, null, m_persist); } m_log.Enable = true; solver.OnSnapshot += Solver_OnSnapshot; solver.OnTrainingIteration += Solver_OnTrainingIteration; foreach (Layer <T> layer in solver.net.layers) { if (layer.type == LayerParameter.LayerType.EVENT) { EventLayer <T> eventLayer = layer as EventLayer <T>; eventLayer.OnBackward += EventLayer_OnBackward; } } prepare_input_param(solver.net, bmpContent); foreach (KeyValuePair <string, double> kvContent in m_rgLayers["content"]) { string strName = kvContent.Key; Blob <T> blobDst = solver.net.blob_by_name("input_" + strName); Blob <T> blobSrc = colContentActivations[strName]; blobDst.CopyFrom(blobSrc); } foreach (KeyValuePair <string, double> kvGram in m_rgLayers["gram"]) { string strName = kvGram.Key; Blob <T> blobDst = solver.net.blob_by_name("input_" + strName); Blob <T> blobSrc = colGramActivations[strName]; blobDst.CopyFrom(blobSrc); } //----------------------------------------- // Optimize. //----------------------------------------- int nIterations1 = m_nIterations; if (strResultDir != null && nIntermediateOutput > 0) { nIterations1 /= nIntermediateOutput; } if (m_rgWeights != null) { Blob <T> blobInput = solver.net.learnable_parameters[0]; solver.net.learnable_parameters.RemoveAt(0); solver.net.LoadWeights(m_rgWeights, m_persist); solver.net.learnable_parameters.Insert(0, blobInput); } if (strResultDir != null) { strResultDir = strResultDir.TrimEnd('\\'); strResultDir += "\\"; } for (int i = 0; i < nIterations1; i++) { if (m_evtCancel.WaitOne(0)) { break; } solver.Step(nIntermediateOutput, TRAIN_STEP.NONE, true, true, true); if (strResultDir != null) { Bitmap bmpTemp = save(solver.net); string strFile = strResultDir + i.ToString() + "_temp.png"; if (File.Exists(strFile)) { File.Delete(strFile); } bmpTemp.Save(strFile); } } Bitmap bmpOutput = save(solver.net); return(bmpOutput); } catch (Exception excpt) { throw excpt; } finally { if (net != null) { net.Dispose(); } if (solver != null) { solver.Dispose(); } colGramActivations.Dispose(); colContentActivations.Dispose(); } }
/// <summary> /// The SGDSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the CaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param> /// <param name="getws">Optionally, specifies the handler for getting the workspace.</param> /// <param name="setws">Optionally, specifies the handler for setting the workspace.</param> public AdaDeltaSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws) { AdaDeltaPreSolve(); }
/// <summary> /// The DoWork thread is the main tread used to train or run the model depending on the operation selected. /// </summary> /// <param name="sender">Specifies the sender</param> /// <param name="e">specifies the arguments.</param> private void m_bw_DoWork(object sender, DoWorkEventArgs e) { BackgroundWorker bw = sender as BackgroundWorker; m_input = e.Argument as InputData; SettingsCaffe s = new SettingsCaffe(); s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL; try { m_model.Batch = m_input.Batch; m_mycaffe = new MyCaffeControl <float>(s, m_log, m_evtCancel); // Train the model. if (m_input.Operation == InputData.OPERATION.TRAIN) { m_model.Iterations = (int)((m_input.Epochs * 7000) / m_model.Batch); m_log.WriteLine("Training for " + m_input.Epochs.ToString() + " epochs (" + m_model.Iterations.ToString("N0") + " iterations).", true); m_log.WriteLine("INFO: " + m_model.Iterations.ToString("N0") + " iterations.", true); m_log.WriteLine("Using hidden = " + m_input.HiddenSize.ToString() + ", and word size = " + m_input.WordSize.ToString() + ".", true); // Load the Seq2Seq training model. NetParameter netParam = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp); string strModel = netParam.ToProto("root").ToString(); SolverParameter solverParam = m_model.CreateSolver(m_input.LearningRate); string strSolver = solverParam.ToProto("root").ToString(); byte[] rgWts = loadWeights("sequence"); m_strModel = strModel; m_strSolver = strSolver; m_mycaffe.OnTrainingIteration += m_mycaffe_OnTrainingIteration; m_mycaffe.OnTestingIteration += m_mycaffe_OnTestingIteration; m_mycaffe.LoadLite(Phase.TRAIN, strSolver, strModel, rgWts, false, false); if (!m_input.UseSoftmax) { MemoryLossLayer <float> lossLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>; if (lossLayerTraining != null) { lossLayerTraining.OnGetLoss += LossLayer_OnGetLossTraining; } MemoryLossLayer <float> lossLayerTesting = m_mycaffe.GetInternalNet(Phase.TEST).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>; if (lossLayerTesting != null) { lossLayerTesting.OnGetLoss += LossLayer_OnGetLossTesting; } } m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); TextDataLayer <float> dataLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.TEXT_DATA, "data") as TextDataLayer <float>; if (dataLayerTraining != null) { dataLayerTraining.OnGetData += DataLayerTraining_OnGetDataTraining; } // Train the Seq2Seq model. m_plotsSequenceLoss = new PlotCollection("Sequence Loss"); m_plotsSequenceAccuracyTest = new PlotCollection("Sequence Accuracy Test"); m_plotsSequenceAccuracyTrain = new PlotCollection("Sequence Accuracy Train"); m_mycaffe.Train(m_model.Iterations); saveWeights("sequence", m_mycaffe); } // Run a trained model. else { NetParameter netParam = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp, Phase.RUN); string strModel = netParam.ToProto("root").ToString(); byte[] rgWts = loadWeights("sequence"); strModel = m_model.PrependInput(strModel); m_strModelRun = strModel; int nN = m_model.TimeSteps; m_mycaffe.LoadToRun(strModel, rgWts, new BlobShape(new List <int>() { nN, 1, 1, 1 }), null, null, false, false); m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log); runModel(m_mycaffe, bw, m_input.InputText); } } catch (Exception excpt) { throw excpt; } finally { // Cleanup. if (m_mycaffe != null) { m_mycaffe.Dispose(); m_mycaffe = null; } } }
/// <summary> /// The worker thread used to either train or run the models. /// </summary> /// <remarks> /// When training, first the input hand-written image model is trained /// using the LeNet model. /// /// This input mode is then run in the onTrainingStart event to get the /// detected hand written character representation. The outputs of layer /// 'ip1' from the input model are then fed as input to the sequence /// model which is then trained to encode the 'ip1' input data with one /// lstm and then decoded with another which is then trained to detect /// a section of the Sin curve data. /// /// When running, the first input model is run to get its 'ip1' representation, /// which is then fed into the sequence model to detect the section of the /// Sin curve. /// </remarks> /// <param name="sender">Specifies the sender of the event (e.g. the BackgroundWorker)</param> /// <param name="args">Specifies the event args.</param> private void m_bw_DoWork(object sender, DoWorkEventArgs e) { BackgroundWorker bw = sender as BackgroundWorker; OPERATION op = (OPERATION)e.Argument; SettingsCaffe s = new SettingsCaffe(); s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL; m_operation = op; m_mycaffe = new MyCaffeControl <float>(s, m_log, m_evtCancel); m_mycaffeInput = new MyCaffeControl <float>(s, m_log, m_evtCancel); m_imgDb = new MyCaffeImageDatabase2(m_log); // Load the image database. m_imgDb.InitializeWithDsName1(s, "MNIST"); m_ds = m_imgDb.GetDatasetByName("MNIST"); // Create the MNIST image detection model NetParameter netParamMnist = m_model.CreateMnistModel(m_ds); SolverParameter solverParamMnist = m_model.CreateMnistSolver(); byte[] rgWts = loadWeights("input"); m_mycaffeInput.Load(Phase.TRAIN, solverParamMnist.ToProto("root").ToString(), netParamMnist.ToProto("root").ToString(), rgWts, null, null, false, m_imgDb); Net <float> netTrain = m_mycaffeInput.GetInternalNet(Phase.TRAIN); Blob <float> input_ip = netTrain.FindBlob(m_strInputOutputBlobName); // input model's second to last output (includes relu) // Run the train or run operation. if (op == OPERATION.TRAIN) { // Train the MNIST model first. m_mycaffeInput.OnTrainingIteration += m_mycaffeInput_OnTrainingIteration; m_plotsInputLoss = new PlotCollection("Input Loss"); m_mycaffeInput.Train(2000); saveWeights("input", m_mycaffeInput.GetWeights()); // Load the Seq2Seq training model. NetParameter netParam = m_model.CreateModel(input_ip.channels, 10); string strModel = netParam.ToProto("root").ToString(); SolverParameter solverParam = m_model.CreateSolver(); rgWts = loadWeights("sequence"); m_mycaffe.OnTrainingIteration += m_mycaffe_OnTrainingIteration; m_mycaffe.LoadLite(Phase.TRAIN, solverParam.ToProto("root").ToString(), netParam.ToProto("root").ToString(), rgWts, false, false); m_mycaffe.SetOnTrainingStartOverride(new EventHandler(onTrainingStart)); // Train the Seq2Seq model. m_plotsSequenceLoss = new PlotCollection("Sequence Loss"); m_mycaffe.Train(m_model.Iterations); saveWeights("sequence", m_mycaffe.GetWeights()); } else { NetParameter netParam = m_model.CreateModel(input_ip.channels, 10, 1, 1); string strModel = netParam.ToProto("root").ToString(); rgWts = loadWeights("sequence"); int nN = 1; m_mycaffe.LoadToRun(netParam.ToProto("root").ToString(), rgWts, new BlobShape(new List <int>() { nN, 1, 1, 1 }), null, null, false, false); runModel(m_mycaffe, bw); } // Cleanup. m_mycaffe.Dispose(); m_mycaffe = null; m_mycaffeInput.Dispose(); m_mycaffeInput = null; }
/// <summary> /// The NesterovSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the CaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param> public NesterovSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet) { }
/// <summary> /// The AdamSolver constructor. /// </summary> /// <param name="cuda">Specifies the instance of CudaDnn to use.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies teh SolverParameter.</param> /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param> /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param> /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param> /// <param name="imgDb">Specifies the CaffeImageDatabase.</param> /// <param name="persist">Specifies the peristence used for loading and saving weights.</param> /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param> /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param> public AdamSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0) : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank) { AdamPreSolve(); }
private void Worker_DoWork(object sender, ActionStateArgs <T> e) { SolverInfo <T> info = e.Arg as SolverInfo <T>; NCCL <T> nccl = null; m_cuda = new common.CudaDnn <T>(e.DeviceID, DEVINIT.CUBLAS | DEVINIT.CURAND, null, info.CudaPath); try { Solver <T> rank0 = info.Rank0; Log log = new Log("Worker solver for DeviceID = " + e.DeviceID.ToString()); //----------------------------------------- // Transfer the NCCL handle from the // main kernel that created it to the // one used by the CudaDnn on this thread. // // After the copy, this thread will 'own' // the nccl and be responsible for its // destruction. //----------------------------------------- long hNccl = m_cuda.KernelCopyNccl(info.KernelHandle, info.NcclHandle); // Create solver and install callbacks SolverParameter param = rank0.parameter.Clone(); param.device_id = e.DeviceID; param.type = rank0.parameter.type; Solver <T> solver = Solver <T> .Create(m_cuda, log, param, rank0.CancelEvent, null, null, rank0.Database, null, rank0.solver_count, info.SolverRank); info.StartedEvent.Set(); log.CHECK_EQ((int)solver.type, (int)rank0.type, "The solver types should be the same."); //----------------------------------------- // Turn off logging for all other // operations on the worker thread. //----------------------------------------- log.Enable = false; nccl = new NCCL <T>(m_cuda, log, solver, e.DeviceID, hNccl, info.GradientReadyEvents); info.InitializedEvent.Set(); m_cuda.SynchronizeDevice(); List <WaitHandle> rgWait = new List <WaitHandle>(); rgWait.AddRange(rank0.CancelEvent.Handles); rgWait.Add(info.AllCreatedEvent); int nWait = WaitHandle.WaitAny(rgWait.ToArray()); if (nWait < rgWait.Count - 1) { return; } nccl.Broadcast(); int nIterations = param.max_iter - solver.iter; if (info.IterationOverride > 0) { nIterations = info.IterationOverride; } solver.Step(nIterations); solver.Dispose(); } catch (Exception excpt) { info.Error = excpt; info.ErrorEvent.Set(); } finally { if (nccl != null) { nccl.Dispose(); } m_cuda.Dispose(); m_cuda = null; } }