Exemple #1
0
        private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode)
        {
            if (nN == -1)
            {
                return(false);
            }

            if (type == ITERATOR_TYPE.EPISODE)
            {
                if (nEpisode < nN)
                {
                    return(false);
                }

                return(true);
            }
            else
            {
                if (nIteration < nN)
                {
                    return(false);
                }

                return(true);
            }
        }
Exemple #2
0
        /// <summary>
        /// Train the network using a modified PG training algorithm optimized for GPU use.
        /// </summary>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        /// <param name="step">Specifies the stepping mode to use (when debugging).</param>
        /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns>
        public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
        {
            m_mycaffe.CancelEvent.Reset();
            Agent<T> agent = new Agent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);
            agent.Run(Phase.TRAIN, nN, type, step);
            agent.Dispose();

            return false;
        }
Exemple #3
0
        /// <summary>
        /// Run the test cycle - currently this is not implemented.
        /// </summary>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns>
        public bool Test(int nN, ITERATOR_TYPE type)
        {
            int nDelay = 1000;

            m_mycaffe.CancelEvent.Reset();
            Agent <T> agent = new Agent <T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TEST, m_rgVocabulary, m_bUsePreloadData);

            agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE);

            agent.Dispose();
            Shutdown(nDelay);

            return(true);
        }
Exemple #4
0
        /// <summary>
        /// Create a new trainer and use it to run a training cycle.
        /// </summary>
        /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param>
        /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param>
        /// <param name="type">Specifies the type of iterator to use.</param>
        /// <param name="step">Optionally, specifies whether or not to step the training for debugging (default = NONE).</param>
        public void Train(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION, TRAIN_STEP step = TRAIN_STEP.NONE)
        {
            if (m_itrainer == null)
            {
                m_itrainer = createTrainer(mycaffe);
            }

            if (nIterationOverride == -1)
            {
                nIterationOverride = m_nIterations;
            }

            m_itrainer.Train(nIterationOverride, type, step);
            cleanup(0);
        }
Exemple #5
0
        /// <summary>
        /// Create a new trainer and use it to run a test cycle.
        /// </summary>
        /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param>
        /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param>
        /// <param name="type">Specifies the type of iterator to use.</param>
        public void Test(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION)
        {
            if (m_itrainer == null)
            {
                m_itrainer = createTrainer(mycaffe);
            }

            if (nIterationOverride == -1)
            {
                nIterationOverride = m_nIterations;
            }

            m_itrainer.Test(nIterationOverride, type);
            cleanup(0);
        }
        /// <summary>
        /// Train the network using a modified PG training algorithm optimized for GPU use.
        /// </summary>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        /// <param name="step">Specifies the stepping mode to use (when debugging).</param>
        /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns>
        public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step)
        {
            if (step != TRAIN_STEP.NONE)
            {
                throw new Exception("The simple traininer does not support stepping - use the 'PG.MT' trainer instead.");
            }

            m_mycaffe.CancelEvent.Reset();
            Agent <T> agent = new Agent <T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN);

            agent.Run(Phase.TRAIN, nN, type);
            agent.Dispose();

            return(false);
        }
Exemple #7
0
        /// <summary>
        /// Create a new trainer and use it to run a training cycle using the current 'stage' = RNN or RL.
        /// </summary>
        /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param>
        /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param>
        /// <param name="type">Specifies the type of iterator to use.</param>
        /// <param name="step">Optionally, specifies whether or not to step the training for debugging (default = NONE).</param>
        public void Train(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION, TRAIN_STEP step = TRAIN_STEP.NONE)
        {
            if (m_itrainer == null)
            {
                m_itrainer = createTrainer(mycaffe, getStage());
            }

            if (nIterationOverride == -1)
            {
                nIterationOverride = m_nIterations;
            }

            m_itrainer.Train(nIterationOverride, type, step);
            m_itrainer.Shutdown(1000);
            m_itrainer = null;
        }
Exemple #8
0
        /// <summary>
        /// Create a new trainer and use it to run a test cycle using the current 'stage' = RNN or RL.
        /// </summary>
        /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param>
        /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param>
        /// <param name="type">Specifies the type of iterator to use.</param>
        public void Test(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION)
        {
            if (m_itrainer == null)
            {
                m_itrainer = createTrainer(mycaffe, getStage());
            }

            if (nIterationOverride == -1)
            {
                nIterationOverride = m_nIterations;
            }

            m_itrainer.Test(nIterationOverride, type);
            m_itrainer.Shutdown(500);
            m_itrainer = null;
        }
Exemple #9
0
        /// <summary>
        /// Run the test cycle - currently this is not implemented.
        /// </summary>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns>
        public bool Test(int nN, ITERATOR_TYPE type)
        {
            int nDelay = 1000;
            string strProp = m_properties.ToString();

            // Turn off the num-skip to run at normal speed.
            strProp += "EnableNumSkip=False;";
            PropertySet properties = new PropertySet(strProp);

            m_mycaffe.CancelEvent.Reset();
            Agent<T> agent = new Agent<T>(m_icallback, m_mycaffe, properties, m_random, Phase.TRAIN);
            agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE);

            agent.Dispose();
            Shutdown(nDelay);

            return true;
        }
Exemple #10
0
        private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode)
        {
            if (nN == -1)
                return false;

            if (type == ITERATOR_TYPE.EPISODE)
            {
                if (nEpisode < nN)
                    return false;

                return true;
            }
            else
            {
                if (nIteration < nN)
                    return false;

                return true;
            }
        }
Exemple #11
0
        /// <summary>
        /// The Run method provides the main 'actor' loop that performs the following steps:
        /// 1.) Feed data into the network.
        /// 2.) either Test the network...
        /// 3.) ... or Train the network.
        /// </summary>
        /// <param name="phase">Specifies the phae.</param>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (required = ITERATION).</param>
        /// <param name="step">Specifies the training step (used only during debugging).</param>
        /// <returns>The vocabulary built up during training and testing is returned.</returns>
        public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
        {
            if (type != ITERATOR_TYPE.ITERATION)
            {
                throw new Exception("The TrainerRNN only supports the ITERATION type.");
            }

            StateBase s = getData(phase, -1);

            while (!m_brain.Cancel.WaitOne(0) && !s.Done)
            {
                if (phase == Phase.TEST)
                {
                    m_brain.Test(s, nN);
                }
                else if (phase == Phase.TRAIN)
                {
                    m_brain.Train(s, nN, step);
                }

                s = getData(phase, 1);
            }
        }
Exemple #12
0
        /// <summary>
        /// The Run method provides the main loop that performs the following steps:
        /// 1.) get state
        /// 2.) build experience
        /// 3.) create policy gradients
        /// 4.) train on experiences
        /// </summary>
        /// <param name="phase">Specifies the phae.</param>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        /// <param name="step">Specifies the training step to take, if any.  This is only used when debugging.</param>
        public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
        {
            IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha);
            int    nIteration         = 1;
            double dfRunningReward    = 0;
            double dfEpisodeReward    = 0;
            int    nEpisode           = 0;
            bool   bDifferent         = false;

            StateBase state = getData(phase, -1, -1);
            // Preprocess the observation.
            SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);

            // Set the initial target model to the current model.
            m_brain.UpdateTargetModel();

            while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
            {
                // Forward the policy network and sample an action.
                int action = m_brain.act(x, state.Clip, state.ActionCount);

                // Take the next step using the action
                StateBase state_next = getData(phase, action, nIteration);

                // Preprocess the next observation.
                SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent);
                if (!bDifferent)
                {
                    m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!");
                }

                // Build up episode memory, using reward for taking the action.
                iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode));
                dfEpisodeReward += state_next.Reward;

                // Do the training
                if (iMemory.Count > m_brain.BatchSize)
                {
                    double           dfBeta    = beta_by_frame(nIteration);
                    MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta);
                    m_brain.Train(nIteration, rgSamples, state.ActionCount);
                    iMemory.Update(rgSamples);

                    if (nIteration % m_nUpdateTargetFreq == 0)
                    {
                        m_brain.UpdateTargetModel();
                    }
                }

                if (state_next.Done)
                {
                    // Update reward running
                    dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;

                    nEpisode++;
                    updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated());

                    state           = getData(phase, -1, -1);
                    x               = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true);
                    dfEpisodeReward = 0;
                }
                else
                {
                    state = state_next;
                    x     = x_next;
                }

                nIteration++;
            }

            iMemory.CleanUp();
        }
Exemple #13
0
        /// <summary>
        /// The Run method provides the main 'actor' loop that performs the following steps:
        /// 1.) get state
        /// 2.) build experience
        /// 3.) create policy gradients
        /// 4.) train on experiences
        /// </summary>
        /// <param name="phase">Specifies the phae.</param>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        /// <param name="step">Specifies the training step to take, if any.  This is only used when debugging.</param>
        public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step)
        {
            MemoryCollection m_rgMemory      = new MemoryCollection();
            double?          dfRunningReward = null;
            double           dfEpisodeReward = 0;
            int nEpisode   = 0;
            int nIteration = 0;

            StateBase s = getData(phase, -1);

            while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
            {
                // Preprocess the observation.
                SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput);

                // Forward the policy network and sample an action.
                float[] rgfAprob;
                int     action = m_brain.act(x, s.Clip, out rgfAprob);

                if (step == TRAIN_STEP.FORWARD)
                {
                    return;
                }

                // Take the next step using the action
                StateBase s_ = getData(phase, action);
                dfEpisodeReward += s_.Reward;

                if (phase == Phase.TRAIN)
                {
                    // Build up episode memory, using reward for taking the action.
                    m_rgMemory.Add(new MemoryItem(s, x, action, rgfAprob, (float)s_.Reward));

                    // An episode has finished.
                    if (s_.Done)
                    {
                        nEpisode++;
                        nIteration++;

                        m_brain.Reshape(m_rgMemory);

                        // Compute the discounted reward (backwards through time)
                        float[] rgDiscountedR = m_rgMemory.GetDiscountedRewards(m_fGamma, m_bAllowDiscountReset);
                        // Rewards are standardized when set to be unit normal (helps control the gradient estimator variance)
                        m_brain.SetDiscountedR(rgDiscountedR);

                        // Get the action probabilities.
                        float[] rgfAprobSet = m_rgMemory.GetActionProbabilities();
                        // The action probabilities are used to calculate the initial gradient within the loss function.
                        m_brain.SetActionProbabilities(rgfAprobSet);

                        // Get the action one-hot vectors.  When using Softmax, this contains the one-hot vector containing
                        // each action set (e.g. 3 actions with action 0 set would return a vector <1,0,0>).
                        // When using a binary probability (e.g. with Sigmoid), the each action set only contains a
                        // single element which is set to the action value itself (e.g. 0 for action '0' and 1 for action '1')
                        float[] rgfAonehotSet = m_rgMemory.GetActionOneHotVectors();
                        m_brain.SetActionOneHotVectors(rgfAonehotSet);

                        // Train for one iteration, which triggers the loss function.
                        List <Datum> rgData = m_rgMemory.GetData();
                        List <Datum> rgClip = m_rgMemory.GetClip();
                        m_brain.SetData(rgData, rgClip);
                        m_brain.Train(nIteration, step);

                        // Update reward running
                        if (!dfRunningReward.HasValue)
                        {
                            dfRunningReward = dfEpisodeReward;
                        }
                        else
                        {
                            dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
                        }

                        updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value);
                        dfEpisodeReward = 0;

                        s = getData(phase, -1);
                        m_rgMemory.Clear();

                        if (step != TRAIN_STEP.NONE)
                        {
                            return;
                        }
                    }
                    else
                    {
                        s = s_;
                    }
                }
                else
                {
                    if (s_.Done)
                    {
                        nEpisode++;

                        // Update reward running
                        if (!dfRunningReward.HasValue)
                        {
                            dfRunningReward = dfEpisodeReward;
                        }
                        else
                        {
                            dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
                        }

                        updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value);
                        dfEpisodeReward = 0;

                        s = getData(phase, -1);
                    }
                    else
                    {
                        s = s_;
                    }

                    nIteration++;
                }
            }
        }
        /// <summary>
        /// The Run method provides the main 'actor' loop that performs the following steps:
        /// 1.) get state
        /// 2.) build experience
        /// 3.) create policy gradients
        /// 4.) train on experiences
        /// </summary>
        /// <param name="phase">Specifies the phae.</param>
        /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param>
        /// <param name="type">Specifies the iteration type (default = ITERATION).</param>
        public void Run(Phase phase, int nN, ITERATOR_TYPE type)
        {
            MemoryCollection m_rgMemory      = new MemoryCollection();
            double?          dfRunningReward = null;
            double           dfEpisodeReward = 0;
            int nEpisode   = 0;
            int nIteration = 0;

            StateBase s = getData(phase, -1);

            if (s.Clip != null)
            {
                throw new Exception("The PG.SIMPLE trainer does not support recurrent layers or clip data, use the 'PG.ST' or 'PG.MT' trainer instead.");
            }

            while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode))
            {
                // Preprocess the observation.
                SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput);

                // Forward the policy network and sample an action.
                float fAprob;
                int   action = m_brain.act(x, out fAprob);

                // Take the next step using the action
                StateBase s_ = getData(phase, action);
                dfEpisodeReward += s_.Reward;

                if (phase == Phase.TRAIN)
                {
                    // Build up episode memory, using reward for taking the action.
                    m_rgMemory.Add(new MemoryItem(s, x, action, fAprob, (float)s_.Reward));

                    // An episode has finished.
                    if (s_.Done)
                    {
                        nEpisode++;
                        nIteration++;

                        m_brain.Reshape(m_rgMemory);

                        // Compute the discounted reward (backwards through time)
                        float[] rgDiscountedR = m_rgMemory.GetDiscountedRewards(m_fGamma, m_bAllowDiscountReset);
                        // Rewards are standardized when set to be unit normal (helps control the gradient estimator variance)
                        m_brain.SetDiscountedR(rgDiscountedR);

                        // Modulate the gradient with the advantage (PG magic happens right here.)
                        float[] rgDlogp = m_rgMemory.GetPolicyGradients();
                        // discounted R applied to policy gradient within loss function, just before the backward pass.
                        m_brain.SetPolicyGradients(rgDlogp);

                        // Train for one iteration, which triggers the loss function.
                        List <Datum> rgData = m_rgMemory.GetData();
                        m_brain.SetData(rgData);
                        m_brain.Train(nIteration);

                        // Update reward running
                        if (!dfRunningReward.HasValue)
                        {
                            dfRunningReward = dfEpisodeReward;
                        }
                        else
                        {
                            dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
                        }

                        updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value);
                        dfEpisodeReward = 0;

                        s = getData(phase, -1);
                        m_rgMemory.Clear();
                    }
                    else
                    {
                        s = s_;
                    }
                }
                else
                {
                    if (s_.Done)
                    {
                        nEpisode++;

                        // Update reward running
                        if (!dfRunningReward.HasValue)
                        {
                            dfRunningReward = dfEpisodeReward;
                        }
                        else
                        {
                            dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01;
                        }

                        updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value);
                        dfEpisodeReward = 0;

                        s = getData(phase, -1);
                    }
                    else
                    {
                        s = s_;
                    }

                    nIteration++;
                }
            }
        }