private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode) { if (nN == -1) { return(false); } if (type == ITERATOR_TYPE.EPISODE) { if (nEpisode < nN) { return(false); } return(true); } else { if (nIteration < nN) { return(false); } return(true); } }
/// <summary> /// Train the network using a modified PG training algorithm optimized for GPU use. /// </summary> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the stepping mode to use (when debugging).</param> /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns> public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step) { m_mycaffe.CancelEvent.Reset(); Agent<T> agent = new Agent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN); agent.Run(Phase.TRAIN, nN, type, step); agent.Dispose(); return false; }
/// <summary> /// Run the test cycle - currently this is not implemented. /// </summary> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns> public bool Test(int nN, ITERATOR_TYPE type) { int nDelay = 1000; m_mycaffe.CancelEvent.Reset(); Agent <T> agent = new Agent <T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TEST, m_rgVocabulary, m_bUsePreloadData); agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE); agent.Dispose(); Shutdown(nDelay); return(true); }
/// <summary> /// Create a new trainer and use it to run a training cycle. /// </summary> /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param> /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param> /// <param name="type">Specifies the type of iterator to use.</param> /// <param name="step">Optionally, specifies whether or not to step the training for debugging (default = NONE).</param> public void Train(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION, TRAIN_STEP step = TRAIN_STEP.NONE) { if (m_itrainer == null) { m_itrainer = createTrainer(mycaffe); } if (nIterationOverride == -1) { nIterationOverride = m_nIterations; } m_itrainer.Train(nIterationOverride, type, step); cleanup(0); }
/// <summary> /// Create a new trainer and use it to run a test cycle. /// </summary> /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param> /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param> /// <param name="type">Specifies the type of iterator to use.</param> public void Test(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION) { if (m_itrainer == null) { m_itrainer = createTrainer(mycaffe); } if (nIterationOverride == -1) { nIterationOverride = m_nIterations; } m_itrainer.Test(nIterationOverride, type); cleanup(0); }
/// <summary> /// Train the network using a modified PG training algorithm optimized for GPU use. /// </summary> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the stepping mode to use (when debugging).</param> /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns> public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step) { if (step != TRAIN_STEP.NONE) { throw new Exception("The simple traininer does not support stepping - use the 'PG.MT' trainer instead."); } m_mycaffe.CancelEvent.Reset(); Agent <T> agent = new Agent <T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN); agent.Run(Phase.TRAIN, nN, type); agent.Dispose(); return(false); }
/// <summary> /// Create a new trainer and use it to run a training cycle using the current 'stage' = RNN or RL. /// </summary> /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param> /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param> /// <param name="type">Specifies the type of iterator to use.</param> /// <param name="step">Optionally, specifies whether or not to step the training for debugging (default = NONE).</param> public void Train(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION, TRAIN_STEP step = TRAIN_STEP.NONE) { if (m_itrainer == null) { m_itrainer = createTrainer(mycaffe, getStage()); } if (nIterationOverride == -1) { nIterationOverride = m_nIterations; } m_itrainer.Train(nIterationOverride, type, step); m_itrainer.Shutdown(1000); m_itrainer = null; }
/// <summary> /// Create a new trainer and use it to run a test cycle using the current 'stage' = RNN or RL. /// </summary> /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param> /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param> /// <param name="type">Specifies the type of iterator to use.</param> public void Test(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION) { if (m_itrainer == null) { m_itrainer = createTrainer(mycaffe, getStage()); } if (nIterationOverride == -1) { nIterationOverride = m_nIterations; } m_itrainer.Test(nIterationOverride, type); m_itrainer.Shutdown(500); m_itrainer = null; }
/// <summary> /// Run the test cycle - currently this is not implemented. /// </summary> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns> public bool Test(int nN, ITERATOR_TYPE type) { int nDelay = 1000; string strProp = m_properties.ToString(); // Turn off the num-skip to run at normal speed. strProp += "EnableNumSkip=False;"; PropertySet properties = new PropertySet(strProp); m_mycaffe.CancelEvent.Reset(); Agent<T> agent = new Agent<T>(m_icallback, m_mycaffe, properties, m_random, Phase.TRAIN); agent.Run(Phase.TEST, nN, type, TRAIN_STEP.NONE); agent.Dispose(); Shutdown(nDelay); return true; }
private bool isAtIteration(int nN, ITERATOR_TYPE type, int nIteration, int nEpisode) { if (nN == -1) return false; if (type == ITERATOR_TYPE.EPISODE) { if (nEpisode < nN) return false; return true; } else { if (nIteration < nN) return false; return true; } }
/// <summary> /// The Run method provides the main 'actor' loop that performs the following steps: /// 1.) Feed data into the network. /// 2.) either Test the network... /// 3.) ... or Train the network. /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (required = ITERATION).</param> /// <param name="step">Specifies the training step (used only during debugging).</param> /// <returns>The vocabulary built up during training and testing is returned.</returns> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { if (type != ITERATOR_TYPE.ITERATION) { throw new Exception("The TrainerRNN only supports the ITERATION type."); } StateBase s = getData(phase, -1); while (!m_brain.Cancel.WaitOne(0) && !s.Done) { if (phase == Phase.TEST) { m_brain.Test(s, nN); } else if (phase == Phase.TRAIN) { m_brain.Train(s, nN, step); } s = getData(phase, 1); } }
/// <summary> /// The Run method provides the main loop that performs the following steps: /// 1.) get state /// 2.) build experience /// 3.) create policy gradients /// 4.) train on experiences /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the training step to take, if any. This is only used when debugging.</param> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha); int nIteration = 1; double dfRunningReward = 0; double dfEpisodeReward = 0; int nEpisode = 0; bool bDifferent = false; StateBase state = getData(phase, -1, -1); // Preprocess the observation. SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true); // Set the initial target model to the current model. m_brain.UpdateTargetModel(); while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode)) { // Forward the policy network and sample an action. int action = m_brain.act(x, state.Clip, state.ActionCount); // Take the next step using the action StateBase state_next = getData(phase, action, nIteration); // Preprocess the next observation. SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent); if (!bDifferent) { m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!"); } // Build up episode memory, using reward for taking the action. iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode)); dfEpisodeReward += state_next.Reward; // Do the training if (iMemory.Count > m_brain.BatchSize) { double dfBeta = beta_by_frame(nIteration); MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta); m_brain.Train(nIteration, rgSamples, state.ActionCount); iMemory.Update(rgSamples); if (nIteration % m_nUpdateTargetFreq == 0) { m_brain.UpdateTargetModel(); } } if (state_next.Done) { // Update reward running dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; nEpisode++; updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated()); state = getData(phase, -1, -1); x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true); dfEpisodeReward = 0; } else { state = state_next; x = x_next; } nIteration++; } iMemory.CleanUp(); }
/// <summary> /// The Run method provides the main 'actor' loop that performs the following steps: /// 1.) get state /// 2.) build experience /// 3.) create policy gradients /// 4.) train on experiences /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the training step to take, if any. This is only used when debugging.</param> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { MemoryCollection m_rgMemory = new MemoryCollection(); double? dfRunningReward = null; double dfEpisodeReward = 0; int nEpisode = 0; int nIteration = 0; StateBase s = getData(phase, -1); while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode)) { // Preprocess the observation. SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput); // Forward the policy network and sample an action. float[] rgfAprob; int action = m_brain.act(x, s.Clip, out rgfAprob); if (step == TRAIN_STEP.FORWARD) { return; } // Take the next step using the action StateBase s_ = getData(phase, action); dfEpisodeReward += s_.Reward; if (phase == Phase.TRAIN) { // Build up episode memory, using reward for taking the action. m_rgMemory.Add(new MemoryItem(s, x, action, rgfAprob, (float)s_.Reward)); // An episode has finished. if (s_.Done) { nEpisode++; nIteration++; m_brain.Reshape(m_rgMemory); // Compute the discounted reward (backwards through time) float[] rgDiscountedR = m_rgMemory.GetDiscountedRewards(m_fGamma, m_bAllowDiscountReset); // Rewards are standardized when set to be unit normal (helps control the gradient estimator variance) m_brain.SetDiscountedR(rgDiscountedR); // Get the action probabilities. float[] rgfAprobSet = m_rgMemory.GetActionProbabilities(); // The action probabilities are used to calculate the initial gradient within the loss function. m_brain.SetActionProbabilities(rgfAprobSet); // Get the action one-hot vectors. When using Softmax, this contains the one-hot vector containing // each action set (e.g. 3 actions with action 0 set would return a vector <1,0,0>). // When using a binary probability (e.g. with Sigmoid), the each action set only contains a // single element which is set to the action value itself (e.g. 0 for action '0' and 1 for action '1') float[] rgfAonehotSet = m_rgMemory.GetActionOneHotVectors(); m_brain.SetActionOneHotVectors(rgfAonehotSet); // Train for one iteration, which triggers the loss function. List <Datum> rgData = m_rgMemory.GetData(); List <Datum> rgClip = m_rgMemory.GetClip(); m_brain.SetData(rgData, rgClip); m_brain.Train(nIteration, step); // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); m_rgMemory.Clear(); if (step != TRAIN_STEP.NONE) { return; } } else { s = s_; } } else { if (s_.Done) { nEpisode++; // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); } else { s = s_; } nIteration++; } } }
/// <summary> /// The Run method provides the main 'actor' loop that performs the following steps: /// 1.) get state /// 2.) build experience /// 3.) create policy gradients /// 4.) train on experiences /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> public void Run(Phase phase, int nN, ITERATOR_TYPE type) { MemoryCollection m_rgMemory = new MemoryCollection(); double? dfRunningReward = null; double dfEpisodeReward = 0; int nEpisode = 0; int nIteration = 0; StateBase s = getData(phase, -1); if (s.Clip != null) { throw new Exception("The PG.SIMPLE trainer does not support recurrent layers or clip data, use the 'PG.ST' or 'PG.MT' trainer instead."); } while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode)) { // Preprocess the observation. SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput); // Forward the policy network and sample an action. float fAprob; int action = m_brain.act(x, out fAprob); // Take the next step using the action StateBase s_ = getData(phase, action); dfEpisodeReward += s_.Reward; if (phase == Phase.TRAIN) { // Build up episode memory, using reward for taking the action. m_rgMemory.Add(new MemoryItem(s, x, action, fAprob, (float)s_.Reward)); // An episode has finished. if (s_.Done) { nEpisode++; nIteration++; m_brain.Reshape(m_rgMemory); // Compute the discounted reward (backwards through time) float[] rgDiscountedR = m_rgMemory.GetDiscountedRewards(m_fGamma, m_bAllowDiscountReset); // Rewards are standardized when set to be unit normal (helps control the gradient estimator variance) m_brain.SetDiscountedR(rgDiscountedR); // Modulate the gradient with the advantage (PG magic happens right here.) float[] rgDlogp = m_rgMemory.GetPolicyGradients(); // discounted R applied to policy gradient within loss function, just before the backward pass. m_brain.SetPolicyGradients(rgDlogp); // Train for one iteration, which triggers the loss function. List <Datum> rgData = m_rgMemory.GetData(); m_brain.SetData(rgData); m_brain.Train(nIteration); // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); m_rgMemory.Clear(); } else { s = s_; } } else { if (s_.Done) { nEpisode++; // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); } else { s = s_; } nIteration++; } } }