Ejemplo n.º 1
0
        /// <summary>
        /// Create the base solver to use.
        /// </summary>
        /// <returns>
        /// The solver parameter created is returned.
        /// </returns>
        public override SolverParameter CreateSolver()
        {
            m_solver                    = new SolverParameter();
            m_solver.type               = SolverParameter.SolverType.SGD;
            m_solver.base_lr            = m_dfBaseLr;
            m_solver.weight_decay       = 0.0005;
            m_solver.LearningRatePolicy = SolverParameter.LearningRatePolicyType.MULTISTEP;
            m_solver.stepvalue          = new List <int>()
            {
                80000, 100000, 120000
            };
            m_solver.gamma                = 0.1;
            m_solver.momentum             = 0.9;
            m_solver.iter_size            = m_nIterSize;
            m_solver.max_iter             = 120000;
            m_solver.snapshot             = 80000;
            m_solver.display              = 10;
            m_solver.average_loss         = 10;
            m_solver.device_id            = m_nGpuID;
            m_solver.debug_info           = false;
            m_solver.snapshot_after_train = true;
            m_solver.clip_gradients       = 1;

            // Test parameters.
            m_solver.test_iter.Add(m_nTestIter);
            m_solver.test_interval       = 10000;
            m_solver.test_initialization = false;
            m_solver.eval_type           = SolverParameter.EvaluationType.CLASSIFICATION;

            return(m_solver);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// The RmsPropSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the CaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 public RmsPropSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank)
 {
     m_log.CHECK_EQ(0, m_param.momentum, "Momentum cannot be used with RmsProp.");
     m_log.CHECK_GE(m_param.rms_decay, 0, "rms_decay should lie between 0 and 1.");
     m_log.CHECK_LT(m_param.rms_decay, 1, "rms_decay should lie between 0 and 1.");
 }
Ejemplo n.º 3
0
        /// <summary>
        /// Create the ADAM solver used, setting the test interval > than the
        /// iterations to avoid testing.
        /// </summary>
        /// <param name="dfLr">Specifies the learning rate.</param>
        /// <returns>The SolverParameter is returned.</returns>
        public SolverParameter CreateSolver(double dfLr)
        {
            SolverParameter solver = new SolverParameter();

            m_dfLearningRate = dfLr;

            solver.random_seed         = 0xCAFFE;
            solver.test_interval       = 100;
            solver.test_iter[0]        = 1;
            solver.max_iter            = m_nIterations;
            solver.snapshot            = m_nIterations;
            solver.test_initialization = false;
            solver.display             = m_nDisplay;
            solver.momentum            = 0;
            solver.rms_decay           = 0.999;
            solver.weight_decay        = m_dfDecayRate;
            solver.clip_gradients      = 5;
            solver.regularization_type = "L2";
            solver.type      = SolverParameter.SolverType.RMSPROP;
            solver.lr_policy = "multistep";
            solver.stepvalue = new List <int>()
            {
                100000, 200000
            };
            solver.gamma   = 0.5;
            solver.base_lr = m_dfLearningRate;

            return(solver);
        }
Ejemplo n.º 4
0
        public void TestCreateTrainingModel()
        {
            ModelBuilder builder = create();

            NetParameter net_param = builder.CreateModel();
            RawProto     proto     = net_param.ToProto("root");
            string       strNet    = proto.ToString();

            RawProto     proto2     = RawProto.Parse(strNet);
            NetParameter net_param2 = NetParameter.FromProto(proto2);

            m_log.CHECK(net_param2.Compare(net_param), "The two net parameters should be the same!");

            // verify creating the model.
            SolverParameter solver      = builder.CreateSolver();
            RawProto        protoSolver = solver.ToProto("root");
            string          strSolver   = protoSolver.ToString();

            SettingsCaffe      settings  = new SettingsCaffe();
            CancelEvent        evtCancel = new CancelEvent();
            MyCaffeControl <T> mycaffe   = new MyCaffeControl <T>(settings, m_log, evtCancel);

            save(strNet, strSolver, false);

            //            mycaffe.LoadLite(Phase.TRAIN, strSolver, strNet, null);
            mycaffe.Dispose();
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Load and return the solver used with the MNIST LeNet input model.
        /// </summary>
        /// <returns>The SolverParameter is returned.</returns>
        public SolverParameter CreateMnistSolver()
        {
            string   str   = System.Text.Encoding.Default.GetString(Properties.Resources.lenet_solver);
            RawProto proto = RawProto.Parse(str);

            return(SolverParameter.FromProto(proto));
        }
Ejemplo n.º 6
0
 /// <summary>
 /// The LBFGSSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the MyCaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param>
 /// <param name="getws">Optionally, specifies the handler for getting the workspace.</param>
 /// <param name="setws">Optionally, specifies the handler for setting the workspace.</param>
 public LBFGSSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabaseBase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws)
 {
     m_tZero     = (T)Convert.ChangeType(0, typeof(T));
     m_tOne      = (T)Convert.ChangeType(1, typeof(T));
     m_tMinusOne = (T)Convert.ChangeType(-1, typeof(T));
     PreSolve();
 }
Ejemplo n.º 7
0
 /// <summary>
 /// The LBFGSSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the CaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 public LBFGSSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank)
 {
     m_tZero     = (T)Convert.ChangeType(0, typeof(T));
     m_tOne      = (T)Convert.ChangeType(1, typeof(T));
     m_tMinusOne = (T)Convert.ChangeType(-1, typeof(T));
     PreSolve();
 }
Ejemplo n.º 8
0
        /// <summary>
        /// Set the solver testing interval.
        /// </summary>
        /// <param name="strSolver">Specifies the solver parameter.</param>
        /// <returns>The solver description is returned.</returns>
        private string fixup_solver(string strSolver, int nInterval)
        {
            RawProto        proto        = RawProto.Parse(strSolver);
            SolverParameter solver_param = SolverParameter.FromProto(proto);

            // Set the testining interval during training.
            solver_param.test_interval       = nInterval;
            solver_param.test_initialization = false;

            return(solver_param.ToProto("root").ToString());
        }
Ejemplo n.º 9
0
        public void TestCreateSolver()
        {
            ModelBuilder builder = create();

            SolverParameter solverParam = builder.CreateSolver();
            RawProto        proto       = solverParam.ToProto("root");
            string          strSolver   = proto.ToString();

            RawProto        proto2       = RawProto.Parse(strSolver);
            SolverParameter solverParam2 = SolverParameter.FromProto(proto2);

            m_log.CHECK(solverParam2.Compare(solverParam), "The two solver parameters should be the same!");
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Train the model.
        /// </summary>
        /// <param name="bNewWts">Specifies whether to use new weights or load existing ones (if they exist).</param>
        public void Train(bool bNewWts)
        {
            if (m_mycaffeTrain == null)
            {
                return;
            }

            byte[] rgWts = null;

            if (!bNewWts)
            {
                rgWts = loadWeights();
            }

            if (rgWts == null)
            {
                Console.WriteLine("Starting with new weights...");
            }

            SolverParameter solver = createSolver();
            NetParameter    model  = createModel();

            string strModel = model.ToProto("root").ToString();

            Console.WriteLine("Using Train Model:");
            Console.WriteLine(strModel);
            Console.WriteLine("Starting training...");

            m_mycaffeTrain.LoadLite(Phase.TRAIN, solver.ToProto("root").ToString(), model.ToProto("root").ToString(), rgWts, false, false);
            m_mycaffeTrain.SetOnTrainingStartOverride(new EventHandler(onTrainingStart));
            m_mycaffeTrain.SetOnTestingStartOverride(new EventHandler(onTestingStart));

            // Set clockwork weights.
            if (m_param.LstmEngine != EngineParameter.Engine.CUDNN)
            {
                Net <float>  net   = m_mycaffeTrain.GetInternalNet(Phase.TRAIN);
                Blob <float> lstm1 = net.parameters[2];
                lstm1.SetData(1, m_param.Hidden, m_param.Hidden);
            }

            m_mycaffeTrain.Train(m_param.Iterations);
            saveLstmState(m_mycaffeTrain);

            Image img = SimpleGraphingControl.QuickRender(m_plots, 1000, 600);

            showImage(img, "training.png");
            saveWeights(m_mycaffeTrain.GetWeights());
        }
Ejemplo n.º 11
0
        /// <summary>
        /// Create the ADAM solver used, setting the test interval > than the
        /// iterations to avoid testing.
        /// </summary>
        /// <returns>The SolverParameter is returned.</returns>
        public SolverParameter CreateSolver()
        {
            SolverParameter solver = new SolverParameter();

            solver.random_seed         = 0xCAFFE;
            solver.test_interval       = m_nIterations + 1;
            solver.test_iter[0]        = 100;
            solver.max_iter            = m_nIterations;
            solver.snapshot            = m_nIterations;
            solver.test_initialization = false;
            solver.display             = m_nDisplay;
            solver.type      = SolverParameter.SolverType.ADAM;
            solver.lr_policy = "fixed";
            solver.base_lr   = m_dfLearningRate;

            return(solver);
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Create the LeNet solver prototxt programmatically.
        /// </summary>
        /// <param name="nIterations">Specifies the number of iterations to train.</param>
        /// <returns>The solver descriptor is returned as text.</returns>
        private string create_solver_descriptor_programmatically(int nIterations)
        {
            SolverParameter solver_param = new SolverParameter();

            solver_param.max_iter  = nIterations;
            solver_param.test_iter = new List <int>();
            solver_param.test_iter.Add(100);
            solver_param.test_initialization = false;
            solver_param.test_interval       = 500;
            solver_param.base_lr             = 0.01;
            solver_param.momentum            = 0.9;
            solver_param.weight_decay        = 0.0005;
            solver_param.LearningRatePolicy  = SolverParameter.LearningRatePolicyType.INV;
            solver_param.gamma    = 0.0001;
            solver_param.power    = 0.75;
            solver_param.display  = 100;
            solver_param.snapshot = 5000;

            // Convert solver to text descriptor.
            RawProto proto = solver_param.ToProto("root");

            return(proto.ToString());
        }
Ejemplo n.º 13
0
 /// <summary>
 /// The NesterovSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the CaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 public AdaGradSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank)
 {
     m_log.CHECK_EQ(0, m_param.momentum, "Momentum cannot be used with AdaGrad.");
 }
Ejemplo n.º 14
0
 /// <summary>
 /// The NesterovSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the MyCaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param>
 /// <param name="getws">Optionally, specifies the handler for getting the workspace.</param>
 /// <param name="setws">Optionally, specifies the handler for setting the workspace.</param>
 public AdaGradSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabaseBase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws)
 {
     m_log.CHECK_EQ(0, m_param.momentum, "Momentum cannot be used with AdaGrad.");
 }
Ejemplo n.º 15
0
        /// <summary>
        /// Process the content image by applying the style to it that was learned from the style image.
        /// </summary>
        /// <param name="bmpStyle">Specifies the image used to train the what style to apply to the content.</param>
        /// <param name="bmpContent">Specifies the content image to which the style is to be applied.</param>
        /// <param name="nIterations">Specifies the number of training iterations.</param>
        /// <param name="strResultDir">Optionally, specifies an output directory where intermediate images are stored.</param>
        /// <param name="nIntermediateOutput">Optionally, specifies how often to output an intermediate image.</param>
        /// <param name="dfTvLoss">Optionally, specifies the TV-Loss weight for smoothing (default = 0, which disables this loss).</param>
        /// <returns>The resulting image is returned.</returns>
        public Bitmap Process(Bitmap bmpStyle, Bitmap bmpContent, int nIterations, string strResultDir = null, int nIntermediateOutput = -1, double dfTvLoss = 0)
        {
            Solver <T>         solver = null;
            Net <T>            net    = null;
            BlobCollection <T> colContentActivations = new BlobCollection <T>();
            BlobCollection <T> colGramActivations    = new BlobCollection <T>();
            double             dfLoss;

            try
            {
                m_dfTVLossWeight = dfTvLoss;
                m_nIterations    = nIterations;

                if (bmpStyle.Width != bmpContent.Width ||
                    bmpStyle.Height != bmpContent.Height)
                {
                    bmpStyle = ImageTools.ResizeImage(bmpStyle, bmpContent.Width, bmpContent.Height);
                }

                m_log.WriteLine("Creating input network...");
                m_log.Enable = false;
                net          = new Net <T>(m_cuda, m_log, m_param, m_evtCancel, null, Phase.TEST);
                m_log.Enable = true;

                if (m_rgWeights != null)
                {
                    net.LoadWeights(m_rgWeights, m_persist);
                }

                //-----------------------------------------
                //  Get style and content activations.
                //-----------------------------------------

                prepare_data_blob(net, bmpStyle);
                net.Forward(out dfLoss);

                foreach (KeyValuePair <string, double> kvGram in m_rgLayers["gram"])
                {
                    string   strGram  = kvGram.Key;
                    Blob <T> blobGram = net.blob_by_name(strGram);
                    colGramActivations.Add(blobGram.Clone());
                }

                prepare_data_blob(net, bmpContent);
                net.Forward(out dfLoss);

                foreach (KeyValuePair <string, double> kvContent in m_rgLayers["content"])
                {
                    string   strContent  = kvContent.Key;
                    Blob <T> blobContent = net.blob_by_name(strContent);
                    colContentActivations.Add(blobContent.Clone());
                }


                //-----------------------------------------
                //  Prepare the network by adding new layers.
                //-----------------------------------------

                NetParameter net_param = m_param;

                foreach (KeyValuePair <string, double> kvInput in m_rgLayers["input"])
                {
                    string         strName = kvInput.Key;
                    LayerParameter p       = new LayerParameter(LayerParameter.LayerType.INPUT);
                    p.name = "input_" + strName;
                    p.top.Add(p.name);

                    Blob <T> blob = net.blob_by_name(strName);
                    p.input_param.shape.Add(new BlobShape(blob.shape()));

                    net_param.layer.Add(p);
                }

                foreach (KeyValuePair <string, double> kvContent in m_rgLayers["content"])
                {
                    string strName   = kvContent.Key;
                    string strScale1 = "input_" + strName;
                    string strScale2 = strName;

                    if (m_dfContentDataScale != 1.0)
                    {
                        strScale1 += "b";
                        LayerParameter ps1 = new LayerParameter(LayerParameter.LayerType.SCALAR);
                        ps1.scalar_param.value                = m_dfContentDataScale;
                        ps1.scalar_param.operation            = ScalarParameter.ScalarOp.MUL;
                        ps1.scalar_param.passthrough_gradient = true;
                        ps1.bottom.Add("input_" + strName);
                        ps1.top.Add(strScale1);

                        net_param.layer.Add(ps1);

                        strScale2 += "b";
                        LayerParameter ps2 = new LayerParameter(LayerParameter.LayerType.SCALAR);
                        ps2.scalar_param.value                = m_dfContentDataScale;
                        ps2.scalar_param.operation            = ScalarParameter.ScalarOp.MUL;
                        ps2.scalar_param.passthrough_gradient = true;
                        ps2.bottom.Add(strName);
                        ps2.top.Add(strScale2);

                        net_param.layer.Add(ps2);
                    }

                    LayerParameter event_param = new LayerParameter(LayerParameter.LayerType.EVENT);
                    event_param.name = "event_" + strName;
                    event_param.bottom.Add(strScale2);
                    event_param.bottom.Add(strScale1);
                    event_param.top.Add("event_" + strName);

                    net_param.layer.Add(event_param);

                    LayerParameter p = new LayerParameter(LayerParameter.LayerType.EUCLIDEAN_LOSS);
                    p.name = "loss_" + strName;

                    Blob <T> blobContent = colContentActivations[strName];
                    double   dfScale     = get_content_scale(blobContent);
                    p.loss_weight.Add(kvContent.Value * dfScale);

                    p.bottom.Add("event_" + strName);
                    p.bottom.Add(strScale1);
                    p.top.Add("loss_" + strName);

                    net_param.layer.Add(p);
                }

                foreach (KeyValuePair <string, double> kvGram in m_rgLayers["gram"].ToList())
                {
                    string strGramName = kvGram.Key;

                    LayerParameter event_param = new LayerParameter(LayerParameter.LayerType.EVENT);
                    event_param.name = "event_" + strGramName;
                    event_param.bottom.Add(strGramName);
                    event_param.bottom.Add("input_" + strGramName);
                    event_param.top.Add("event_" + strGramName);

                    net_param.layer.Add(event_param);

                    LayerParameter p = new LayerParameter(LayerParameter.LayerType.EUCLIDEAN_LOSS);
                    p.name = "loss_" + strGramName;

                    Blob <T> blobGram = colGramActivations[strGramName];
                    double   dfScale  = get_style_scale(blobGram);
                    p.loss_weight.Add(kvGram.Value * dfScale);

                    p.bottom.Add("input_" + strGramName);
                    p.bottom.Add("event_" + strGramName);
                    p.top.Add("loss_" + strGramName);

                    net_param.layer.Add(p);
                }

                // Add TV Loss;
                if (m_dfTVLossWeight != 0)
                {
                    LayerParameter p = new LayerParameter(LayerParameter.LayerType.TV_LOSS);
                    p.name = "loss_tv";

                    double dfWeight = m_dfTVLossWeight;
                    p.loss_weight.Add(dfWeight);

                    p.bottom.Add("data");
                    p.top.Add("loss_tv");

                    net_param.layer.Add(p);
                }

                // Replace InputLayer with ParameterLayer,
                // so that we'll be able to backprop into the image.
                Blob <T> data = net.blob_by_name("data");
                for (int i = 0; i < net_param.layer.Count; i++)
                {
                    LayerParameter p = net_param.layer[i];

                    if (p.name == "input1")
                    {
                        net_param.layer[i].SetType(LayerParameter.LayerType.PARAMETER);
                        net_param.layer[i].parameter_param.shape = new BlobShape(data.shape());
                        break;
                    }
                }

                // Disable weights learning.
                List <LayerParameter.LayerType> rgTypes = new List <LayerParameter.LayerType>();
                rgTypes.Add(LayerParameter.LayerType.CONVOLUTION);
                rgTypes.Add(LayerParameter.LayerType.DECONVOLUTION);
                rgTypes.Add(LayerParameter.LayerType.INNERPRODUCT);
                rgTypes.Add(LayerParameter.LayerType.PRELU);
                rgTypes.Add(LayerParameter.LayerType.BIAS);
                rgTypes.Add(LayerParameter.LayerType.EMBED);
                rgTypes.Add(LayerParameter.LayerType.LSTM);
                rgTypes.Add(LayerParameter.LayerType.LSTM_SIMPLE);
                rgTypes.Add(LayerParameter.LayerType.RNN);

                foreach (LayerParameter layer in net_param.layer)
                {
                    if (rgTypes.Contains(layer.type))
                    {
                        layer.parameters = new List <ParamSpec>();
                        layer.parameters.Add(new ParamSpec(0, 0));
                        layer.parameters.Add(new ParamSpec(0, 0));
                    }
                }

                net.Dispose();
                net = null;


                //-----------------------------------------
                //  Create solver and assign inputs.
                //-----------------------------------------

                RawProto proto1 = net_param.ToProto("root");
                string   str    = proto1.ToString();

                SolverParameter solver_param = new SolverParameter();
                solver_param.display         = m_nDisplayEvery;
                solver_param.train_net_param = net_param;
                solver_param.test_iter.Clear();
                solver_param.test_interval       = 0;
                solver_param.test_initialization = false;
                solver_param.base_lr             = m_dfLearningRate;
                solver_param.type = m_solverType;

                m_log.WriteLine("Creating " + m_solverType.ToString() + " solver with learning rate = " + m_dfLearningRate.ToString() + "...");
                m_log.Enable = false;

                if (m_solverType == SolverParameter.SolverType.LBFGS)
                {
                    solver = new LBFGSSolver <T>(m_cuda, m_log, solver_param, m_evtCancel, null, null, null, m_persist);
                }
                else
                {
                    solver = Solver <T> .Create(m_cuda, m_log, solver_param, m_evtCancel, null, null, null, m_persist);
                }

                m_log.Enable                = true;
                solver.OnSnapshot          += Solver_OnSnapshot;
                solver.OnTrainingIteration += Solver_OnTrainingIteration;

                foreach (Layer <T> layer in solver.net.layers)
                {
                    if (layer.type == LayerParameter.LayerType.EVENT)
                    {
                        EventLayer <T> eventLayer = layer as EventLayer <T>;
                        eventLayer.OnBackward += EventLayer_OnBackward;
                    }
                }

                prepare_input_param(solver.net, bmpContent);

                foreach (KeyValuePair <string, double> kvContent in m_rgLayers["content"])
                {
                    string   strName = kvContent.Key;
                    Blob <T> blobDst = solver.net.blob_by_name("input_" + strName);
                    Blob <T> blobSrc = colContentActivations[strName];
                    blobDst.CopyFrom(blobSrc);
                }

                foreach (KeyValuePair <string, double> kvGram in m_rgLayers["gram"])
                {
                    string   strName = kvGram.Key;
                    Blob <T> blobDst = solver.net.blob_by_name("input_" + strName);
                    Blob <T> blobSrc = colGramActivations[strName];
                    blobDst.CopyFrom(blobSrc);
                }

                //-----------------------------------------
                //  Optimize.
                //-----------------------------------------

                int nIterations1 = m_nIterations;
                if (strResultDir != null && nIntermediateOutput > 0)
                {
                    nIterations1 /= nIntermediateOutput;
                }

                if (m_rgWeights != null)
                {
                    Blob <T> blobInput = solver.net.learnable_parameters[0];
                    solver.net.learnable_parameters.RemoveAt(0);
                    solver.net.LoadWeights(m_rgWeights, m_persist);
                    solver.net.learnable_parameters.Insert(0, blobInput);
                }

                if (strResultDir != null)
                {
                    strResultDir  = strResultDir.TrimEnd('\\');
                    strResultDir += "\\";
                }

                for (int i = 0; i < nIterations1; i++)
                {
                    if (m_evtCancel.WaitOne(0))
                    {
                        break;
                    }

                    solver.Step(nIntermediateOutput, TRAIN_STEP.NONE, true, true, true);

                    if (strResultDir != null)
                    {
                        Bitmap bmpTemp = save(solver.net);

                        string strFile = strResultDir + i.ToString() + "_temp.png";
                        if (File.Exists(strFile))
                        {
                            File.Delete(strFile);
                        }

                        bmpTemp.Save(strFile);
                    }
                }

                Bitmap bmpOutput = save(solver.net);

                return(bmpOutput);
            }
            catch (Exception excpt)
            {
                throw excpt;
            }
            finally
            {
                if (net != null)
                {
                    net.Dispose();
                }

                if (solver != null)
                {
                    solver.Dispose();
                }

                colGramActivations.Dispose();
                colContentActivations.Dispose();
            }
        }
Ejemplo n.º 16
0
 /// <summary>
 /// The SGDSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the CaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param>
 /// <param name="getws">Optionally, specifies the handler for getting the workspace.</param>
 /// <param name="setws">Optionally, specifies the handler for setting the workspace.</param>
 public AdaDeltaSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null, onGetWorkspace getws = null, onSetWorkspace setws = null)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet, getws, setws)
 {
     AdaDeltaPreSolve();
 }
Ejemplo n.º 17
0
        /// <summary>
        /// The DoWork thread is the main tread used to train or run the model depending on the operation selected.
        /// </summary>
        /// <param name="sender">Specifies the sender</param>
        /// <param name="e">specifies the arguments.</param>
        private void m_bw_DoWork(object sender, DoWorkEventArgs e)
        {
            BackgroundWorker bw = sender as BackgroundWorker;

            m_input = e.Argument as InputData;
            SettingsCaffe s = new SettingsCaffe();

            s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL;

            try
            {
                m_model.Batch = m_input.Batch;
                m_mycaffe     = new MyCaffeControl <float>(s, m_log, m_evtCancel);

                // Train the model.
                if (m_input.Operation == InputData.OPERATION.TRAIN)
                {
                    m_model.Iterations = (int)((m_input.Epochs * 7000) / m_model.Batch);
                    m_log.WriteLine("Training for " + m_input.Epochs.ToString() + " epochs (" + m_model.Iterations.ToString("N0") + " iterations).", true);
                    m_log.WriteLine("INFO: " + m_model.Iterations.ToString("N0") + " iterations.", true);
                    m_log.WriteLine("Using hidden = " + m_input.HiddenSize.ToString() + ", and word size = " + m_input.WordSize.ToString() + ".", true);

                    // Load the Seq2Seq training model.
                    NetParameter    netParam    = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp);
                    string          strModel    = netParam.ToProto("root").ToString();
                    SolverParameter solverParam = m_model.CreateSolver(m_input.LearningRate);
                    string          strSolver   = solverParam.ToProto("root").ToString();
                    byte[]          rgWts       = loadWeights("sequence");

                    m_strModel  = strModel;
                    m_strSolver = strSolver;

                    m_mycaffe.OnTrainingIteration += m_mycaffe_OnTrainingIteration;
                    m_mycaffe.OnTestingIteration  += m_mycaffe_OnTestingIteration;
                    m_mycaffe.LoadLite(Phase.TRAIN, strSolver, strModel, rgWts, false, false);

                    if (!m_input.UseSoftmax)
                    {
                        MemoryLossLayer <float> lossLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>;
                        if (lossLayerTraining != null)
                        {
                            lossLayerTraining.OnGetLoss += LossLayer_OnGetLossTraining;
                        }
                        MemoryLossLayer <float> lossLayerTesting = m_mycaffe.GetInternalNet(Phase.TEST).FindLayer(LayerParameter.LayerType.MEMORY_LOSS, "loss") as MemoryLossLayer <float>;
                        if (lossLayerTesting != null)
                        {
                            lossLayerTesting.OnGetLoss += LossLayer_OnGetLossTesting;
                        }
                    }

                    m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);
                    m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);

                    TextDataLayer <float> dataLayerTraining = m_mycaffe.GetInternalNet(Phase.TRAIN).FindLayer(LayerParameter.LayerType.TEXT_DATA, "data") as TextDataLayer <float>;
                    if (dataLayerTraining != null)
                    {
                        dataLayerTraining.OnGetData += DataLayerTraining_OnGetDataTraining;
                    }

                    // Train the Seq2Seq model.
                    m_plotsSequenceLoss          = new PlotCollection("Sequence Loss");
                    m_plotsSequenceAccuracyTest  = new PlotCollection("Sequence Accuracy Test");
                    m_plotsSequenceAccuracyTrain = new PlotCollection("Sequence Accuracy Train");
                    m_mycaffe.Train(m_model.Iterations);
                    saveWeights("sequence", m_mycaffe);
                }

                // Run a trained model.
                else
                {
                    NetParameter netParam = m_model.CreateModel(m_input.InputFileName, m_input.TargetFileName, m_input.HiddenSize, m_input.WordSize, m_input.UseSoftmax, m_input.UseExternalIp, Phase.RUN);
                    string       strModel = netParam.ToProto("root").ToString();
                    byte[]       rgWts    = loadWeights("sequence");

                    strModel = m_model.PrependInput(strModel);

                    m_strModelRun = strModel;

                    int nN = m_model.TimeSteps;
                    m_mycaffe.LoadToRun(strModel, rgWts, new BlobShape(new List <int>()
                    {
                        nN, 1, 1, 1
                    }), null, null, false, false);

                    m_blobProbs = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);
                    m_blobScale = new Blob <float>(m_mycaffe.Cuda, m_mycaffe.Log);

                    runModel(m_mycaffe, bw, m_input.InputText);
                }
            }
            catch (Exception excpt)
            {
                throw excpt;
            }
            finally
            {
                // Cleanup.
                if (m_mycaffe != null)
                {
                    m_mycaffe.Dispose();
                    m_mycaffe = null;
                }
            }
        }
Ejemplo n.º 18
0
        /// <summary>
        /// The worker thread used to either train or run the models.
        /// </summary>
        /// <remarks>
        /// When training, first the input hand-written image model is trained
        /// using the LeNet model.
        ///
        /// This input mode is then run in the onTrainingStart event to get the
        /// detected hand written character representation.  The outputs of layer
        /// 'ip1' from the input model are then fed as input to the sequence
        /// model which is then trained to encode the 'ip1' input data with one
        /// lstm and then decoded with another which is then trained to detect
        /// a section of the Sin curve data.
        ///
        /// When running, the first input model is run to get its 'ip1' representation,
        /// which is then fed into the sequence model to detect the section of the
        /// Sin curve.
        /// </remarks>
        /// <param name="sender">Specifies the sender of the event (e.g. the BackgroundWorker)</param>
        /// <param name="args">Specifies the event args.</param>
        private void m_bw_DoWork(object sender, DoWorkEventArgs e)
        {
            BackgroundWorker bw = sender as BackgroundWorker;
            OPERATION        op = (OPERATION)e.Argument;
            SettingsCaffe    s  = new SettingsCaffe();

            s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL;

            m_operation    = op;
            m_mycaffe      = new MyCaffeControl <float>(s, m_log, m_evtCancel);
            m_mycaffeInput = new MyCaffeControl <float>(s, m_log, m_evtCancel);
            m_imgDb        = new MyCaffeImageDatabase2(m_log);

            // Load the image database.
            m_imgDb.InitializeWithDsName1(s, "MNIST");
            m_ds = m_imgDb.GetDatasetByName("MNIST");

            // Create the MNIST image detection model
            NetParameter    netParamMnist    = m_model.CreateMnistModel(m_ds);
            SolverParameter solverParamMnist = m_model.CreateMnistSolver();

            byte[] rgWts = loadWeights("input");
            m_mycaffeInput.Load(Phase.TRAIN, solverParamMnist.ToProto("root").ToString(), netParamMnist.ToProto("root").ToString(), rgWts, null, null, false, m_imgDb);
            Net <float>  netTrain = m_mycaffeInput.GetInternalNet(Phase.TRAIN);
            Blob <float> input_ip = netTrain.FindBlob(m_strInputOutputBlobName); // input model's second to last output (includes relu)

            // Run the train or run operation.
            if (op == OPERATION.TRAIN)
            {
                // Train the MNIST model first.
                m_mycaffeInput.OnTrainingIteration += m_mycaffeInput_OnTrainingIteration;
                m_plotsInputLoss = new PlotCollection("Input Loss");
                m_mycaffeInput.Train(2000);
                saveWeights("input", m_mycaffeInput.GetWeights());

                // Load the Seq2Seq training model.
                NetParameter    netParam    = m_model.CreateModel(input_ip.channels, 10);
                string          strModel    = netParam.ToProto("root").ToString();
                SolverParameter solverParam = m_model.CreateSolver();
                rgWts = loadWeights("sequence");

                m_mycaffe.OnTrainingIteration += m_mycaffe_OnTrainingIteration;
                m_mycaffe.LoadLite(Phase.TRAIN, solverParam.ToProto("root").ToString(), netParam.ToProto("root").ToString(), rgWts, false, false);
                m_mycaffe.SetOnTrainingStartOverride(new EventHandler(onTrainingStart));

                // Train the Seq2Seq model.
                m_plotsSequenceLoss = new PlotCollection("Sequence Loss");
                m_mycaffe.Train(m_model.Iterations);
                saveWeights("sequence", m_mycaffe.GetWeights());
            }
            else
            {
                NetParameter netParam = m_model.CreateModel(input_ip.channels, 10, 1, 1);
                string       strModel = netParam.ToProto("root").ToString();
                rgWts = loadWeights("sequence");

                int nN = 1;
                m_mycaffe.LoadToRun(netParam.ToProto("root").ToString(), rgWts, new BlobShape(new List <int>()
                {
                    nN, 1, 1, 1
                }), null, null, false, false);
                runModel(m_mycaffe, bw);
            }

            // Cleanup.
            m_mycaffe.Dispose();
            m_mycaffe = null;
            m_mycaffeInput.Dispose();
            m_mycaffeInput = null;
        }
Ejemplo n.º 19
0
 /// <summary>
 /// The NesterovSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the CaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 /// <param name="shareNet">Optionally, specifies the net to share when creating the training network (default = null, meaning no share net is used).</param>
 public NesterovSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0, Net <T> shareNet = null)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank, shareNet)
 {
 }
Ejemplo n.º 20
0
 /// <summary>
 /// The AdamSolver constructor.
 /// </summary>
 /// <param name="cuda">Specifies the instance of CudaDnn to use.</param>
 /// <param name="log">Specifies the Log for output.</param>
 /// <param name="p">Specifies teh SolverParameter.</param>
 /// <param name="evtCancel">Specifies a CancelEvent used to cancel the current operation (e.g. training, testing) for which the Solver is performing.</param>
 /// <param name="evtForceSnapshot">Specifies an automatic reset event that causes the Solver to perform a Snapshot when set.</param>
 /// <param name="evtForceTest">Specifies an automatic reset event that causes teh Solver to run a testing cycle when set.</param>
 /// <param name="imgDb">Specifies the CaffeImageDatabase.</param>
 /// <param name="persist">Specifies the peristence used for loading and saving weights.</param>
 /// <param name="nSolverCount">Specifies the number of Solvers participating in a multi-GPU session.</param>
 /// <param name="nSolverRank">Specifies the rank of this Solver in a multi-GPU session.</param>
 public AdamSolver(CudaDnn <T> cuda, Log log, SolverParameter p, CancelEvent evtCancel, AutoResetEvent evtForceSnapshot, AutoResetEvent evtForceTest, IXImageDatabase imgDb, IXPersist <T> persist, int nSolverCount = 1, int nSolverRank = 0)
     : base(cuda, log, p, evtCancel, evtForceSnapshot, evtForceTest, imgDb, persist, nSolverCount, nSolverRank)
 {
     AdamPreSolve();
 }
Ejemplo n.º 21
0
        private void Worker_DoWork(object sender, ActionStateArgs <T> e)
        {
            SolverInfo <T> info = e.Arg as SolverInfo <T>;
            NCCL <T>       nccl = null;

            m_cuda = new common.CudaDnn <T>(e.DeviceID, DEVINIT.CUBLAS | DEVINIT.CURAND, null, info.CudaPath);

            try
            {
                Solver <T> rank0 = info.Rank0;
                Log        log   = new Log("Worker solver for DeviceID = " + e.DeviceID.ToString());

                //-----------------------------------------
                //  Transfer the NCCL handle from the
                //  main kernel that created it to the
                //  one used by the CudaDnn on this thread.
                //
                //  After the copy, this thread will 'own'
                //  the nccl and be responsible for its
                //  destruction.
                //-----------------------------------------
                long hNccl = m_cuda.KernelCopyNccl(info.KernelHandle, info.NcclHandle);

                // Create solver and install callbacks
                SolverParameter param = rank0.parameter.Clone();
                param.device_id = e.DeviceID;
                param.type      = rank0.parameter.type;
                Solver <T> solver = Solver <T> .Create(m_cuda, log, param, rank0.CancelEvent, null, null, rank0.Database, null, rank0.solver_count, info.SolverRank);

                info.StartedEvent.Set();
                log.CHECK_EQ((int)solver.type, (int)rank0.type, "The solver types should be the same.");

                //-----------------------------------------
                //  Turn off logging for all other
                //  operations on the worker thread.
                //-----------------------------------------
                log.Enable = false;

                nccl = new NCCL <T>(m_cuda, log, solver, e.DeviceID, hNccl, info.GradientReadyEvents);

                info.InitializedEvent.Set();
                m_cuda.SynchronizeDevice();

                List <WaitHandle> rgWait = new List <WaitHandle>();
                rgWait.AddRange(rank0.CancelEvent.Handles);
                rgWait.Add(info.AllCreatedEvent);

                int nWait = WaitHandle.WaitAny(rgWait.ToArray());
                if (nWait < rgWait.Count - 1)
                {
                    return;
                }

                nccl.Broadcast();

                int nIterations = param.max_iter - solver.iter;
                if (info.IterationOverride > 0)
                {
                    nIterations = info.IterationOverride;
                }

                solver.Step(nIterations);
                solver.Dispose();
            }
            catch (Exception excpt)
            {
                info.Error = excpt;
                info.ErrorEvent.Set();
            }
            finally
            {
                if (nccl != null)
                {
                    nccl.Dispose();
                }

                m_cuda.Dispose();
                m_cuda = null;
            }
        }