/// <summary> /// Train the network using a modified PG training algorithm optimized for GPU use. /// </summary> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the stepping mode to use (when debugging).</param> /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns> public bool Train(int nN, ITERATOR_TYPE type, TRAIN_STEP step) { m_mycaffe.CancelEvent.Reset(); Agent<T> agent = new Agent<T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN); agent.Run(Phase.TRAIN, nN, type, step); agent.Dispose(); return false; }
public void Train(int nIteration, TRAIN_STEP step) { m_mycaffe.Log.Enable = false; // Run data/clip groups > 1 in non batch mode. if (m_nRecurrentSequenceLength != 1 && m_rgData != null && m_rgData.Count > 1 && m_rgClip != null) { prepareBlob(m_blobActionOneHot1, m_blobActionOneHot); prepareBlob(m_blobDiscountedR1, m_blobDiscountedR); prepareBlob(m_blobPolicyGradient1, m_blobPolicyGradient); for (int i = 0; i < m_rgData.Count; i++) { copyBlob(i, m_blobActionOneHot1, m_blobActionOneHot); copyBlob(i, m_blobDiscountedR1, m_blobDiscountedR); copyBlob(i, m_blobPolicyGradient1, m_blobPolicyGradient); List <Datum> rgData1 = new List <Datum>() { m_rgData[i] }; List <Datum> rgClip1 = new List <Datum>() { m_rgClip[i] }; m_memData.AddDatumVector(rgData1, rgClip1, 1, true, true); m_solver.Step(1, step, true, false, true, true); } m_blobActionOneHot.ReshapeLike(m_blobActionOneHot1); m_blobDiscountedR.ReshapeLike(m_blobDiscountedR1); m_blobPolicyGradient.ReshapeLike(m_blobPolicyGradient1); m_rgData = null; m_rgClip = null; } else { m_solver.Step(1, step, true, false, true, true); } m_colAccumulatedGradients.Accumulate(m_mycaffe.Cuda, m_net.learnable_parameters, true); if (nIteration % m_nMiniBatch == 0 || step == TRAIN_STEP.BACKWARD || step == TRAIN_STEP.BOTH) { m_net.learnable_parameters.CopyFrom(m_colAccumulatedGradients, true); m_colAccumulatedGradients.SetDiff(0); m_solver.ApplyUpdate(nIteration); m_net.ClearParamDiffs(); } m_mycaffe.Log.Enable = true; }
public void Train(StateBase s, int nIterations, TRAIN_STEP step) { if (nIterations <= 0) { nIterations = m_solver.parameter.max_iter; } getRawData(s); m_sw.Start(); m_solver.Solve(nIterations, null, null, step); }
/// <summary> /// Train the network using a modified PG training algorithm optimized for GPU use. /// </summary> /// <param name="nIterations">Specifies the number of iterations to run.</param> /// <param name="step">Specifies the stepping mode to use (when debugging).</param> /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns> public bool Train(int nIterations, TRAIN_STEP step) { m_mycaffe.CancelEvent.Reset(); Agent <T> agent = new Agent <T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN, m_rgVocabulary, m_bUsePreloadData); agent.Run(Phase.TRAIN, nIterations, step); agent.Dispose(); return(false); }
/// <summary> /// Train the network using a modified PG training algorithm optimized for GPU use. /// </summary> /// <param name="nIterations">Specifies the number of iterations to run.</param> /// <param name="step">Specifies the stepping mode to use (when debugging).</param> /// <returns>A value of <i>true</i> is returned when handled, <i>false</i> otherwise.</returns> public bool Train(int nIterations, TRAIN_STEP step) { if (step != TRAIN_STEP.NONE) { throw new Exception("The simple traininer does not support stepping - use the 'PG.MT' trainer instead."); } m_mycaffe.CancelEvent.Reset(); Agent <T> agent = new Agent <T>(m_icallback, m_mycaffe, m_properties, m_random, Phase.TRAIN); agent.Run(Phase.TRAIN, nIterations); agent.Dispose(); return(false); }
/// <summary> /// Create a new trainer and use it to run a training cycle. /// </summary> /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param> /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param> /// <param name="step">Optionally, specifies whether or not to step the training for debugging (default = NONE).</param> public void Train(Component mycaffe, int nIterationOverride, TRAIN_STEP step = TRAIN_STEP.NONE) { if (m_itrainer == null) { m_itrainer = createTrainer(mycaffe); } if (nIterationOverride == -1) { nIterationOverride = m_nItertions; } m_itrainer.Train(nIterationOverride, step); m_itrainer.Shutdown(1000); m_itrainer = null; }
/// <summary> /// The Run method provides the main 'actor' loop that performs the following steps: /// 1.) Feed data into the network. /// 2.) either Test the network... /// 3.) ... or Train the network. /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nIterations">Specifies the number of iterations to run.</param> /// <param name="step">Specifies the training step (used only during debugging).</param> /// <returns>The vocabulary built up during training and testing is returned.</returns> public void Run(Phase phase, int nIterations, TRAIN_STEP step) { StateBase s = getData(-1); while (!m_brain.Cancel.WaitOne(0) && !s.Done) { if (phase == Phase.TEST) { m_brain.Test(s, nIterations); } else if (phase == Phase.TRAIN) { m_brain.Train(s, nIterations, step); } s = getData(1); } }
/// <summary> /// The Run method provides the main 'actor' loop that performs the following steps: /// 1.) Feed data into the network. /// 2.) either Test the network... /// 3.) ... or Train the network. /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (required = ITERATION).</param> /// <param name="step">Specifies the training step (used only during debugging).</param> /// <returns>The vocabulary built up during training and testing is returned.</returns> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { if (type != ITERATOR_TYPE.ITERATION) { throw new Exception("The TrainerRNN only supports the ITERATION type."); } StateBase s = getData(phase, -1); while (!m_brain.Cancel.WaitOne(0) && !s.Done) { if (phase == Phase.TEST) { m_brain.Test(s, nN); } else if (phase == Phase.TRAIN) { m_brain.Train(s, nN, step); } s = getData(phase, 1); } }
/// <summary> /// Create a new trainer and use it to run a training cycle using the current 'stage' = RNN or RL. /// </summary> /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param> /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param> /// <param name="type">Specifies the type of iterator to use.</param> /// <param name="step">Optionally, specifies whether or not to step the training for debugging (default = NONE).</param> public void Train(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION, TRAIN_STEP step = TRAIN_STEP.NONE) { if (m_itrainer == null) { m_itrainer = createTrainer(mycaffe, getStage()); } if (nIterationOverride == -1) { nIterationOverride = m_nIterations; } m_itrainer.Train(nIterationOverride, type, step); m_itrainer.Shutdown(1000); m_itrainer = null; }
/// <summary> /// The Run method provides the main loop that performs the following steps: /// 1.) get state /// 2.) build experience /// 3.) create policy gradients /// 4.) train on experiences /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the training step to take, if any. This is only used when debugging.</param> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { IMemoryCollection iMemory = MemoryCollectionFactory.CreateMemory(m_memType, m_nMemorySize, m_fPriorityAlpha); int nIteration = 1; double dfRunningReward = 0; double dfEpisodeReward = 0; int nEpisode = 0; bool bDifferent = false; StateBase state = getData(phase, -1, -1); // Preprocess the observation. SimpleDatum x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true); // Set the initial target model to the current model. m_brain.UpdateTargetModel(); while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode)) { // Forward the policy network and sample an action. int action = m_brain.act(x, state.Clip, state.ActionCount); // Take the next step using the action StateBase state_next = getData(phase, action, nIteration); // Preprocess the next observation. SimpleDatum x_next = m_brain.Preprocess(state_next, m_bUseRawInput, out bDifferent); if (!bDifferent) { m_brain.Log.WriteLine("WARNING: The current state is the same as the previous state!"); } // Build up episode memory, using reward for taking the action. iMemory.Add(new MemoryItem(state, x, action, state_next, x_next, state_next.Reward, state_next.Done, nIteration, nEpisode)); dfEpisodeReward += state_next.Reward; // Do the training if (iMemory.Count > m_brain.BatchSize) { double dfBeta = beta_by_frame(nIteration); MemoryCollection rgSamples = iMemory.GetSamples(m_random, m_brain.BatchSize, dfBeta); m_brain.Train(nIteration, rgSamples, state.ActionCount); iMemory.Update(rgSamples); if (nIteration % m_nUpdateTargetFreq == 0) { m_brain.UpdateTargetModel(); } } if (state_next.Done) { // Update reward running dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; nEpisode++; updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward, 0, 0, m_brain.GetModelUpdated()); state = getData(phase, -1, -1); x = m_brain.Preprocess(state, m_bUseRawInput, out bDifferent, true); dfEpisodeReward = 0; } else { state = state_next; x = x_next; } nIteration++; } iMemory.CleanUp(); }
/// <summary> /// Create a new trainer and use it to run a training cycle. /// </summary> /// <param name="mycaffe">Specifies the MyCaffeControl to use.</param> /// <param name="nIterationOverride">Specifies the iterations to run if greater than zero.</param> /// <param name="type">Specifies the type of iterator to use.</param> /// <param name="step">Optionally, specifies whether or not to step the training for debugging (default = NONE).</param> public void Train(Component mycaffe, int nIterationOverride, ITERATOR_TYPE type = ITERATOR_TYPE.ITERATION, TRAIN_STEP step = TRAIN_STEP.NONE) { if (m_itrainer == null) { m_itrainer = createTrainer(mycaffe); } if (nIterationOverride == -1) { nIterationOverride = m_nIterations; } m_itrainer.Train(nIterationOverride, type, step); cleanup(0); }
/// <summary> /// The constructor. /// </summary> /// <param name="net">Specifies the training network.</param> /// <param name="step">Specifies whether or not to step the operation.</param> public CustomForwardBackArgs(Net <T> net, TRAIN_STEP step) { m_net = net; m_step = step; }
private int getAction(int nIteration, SimpleDatum sd, SimpleDatum sdClip, int nActionCount, TRAIN_STEP step) { if (step == TRAIN_STEP.NONE) { switch (m_state) { case STATE.EXPLORING: return(m_random.Next(nActionCount)); case STATE.TRAINING: if (m_dfExplorationRate > m_dfEpsEnd) { m_dfExplorationRate -= m_dfEpsDelta; } if (m_random.NextDouble() < m_dfExplorationRate) { return(m_random.Next(nActionCount)); } break; } } return(m_brain.act(sd, sdClip, nActionCount)); }
/// <summary> /// The Run method provides the main 'actor' loop that performs the following steps: /// 1.) get state /// 2.) build experience /// 3.) create policy gradients /// 4.) train on experiences /// </summary> /// <param name="phase">Specifies the phae.</param> /// <param name="nN">Specifies the number of iterations (based on the ITERATION_TYPE) to run, or -1 to ignore.</param> /// <param name="type">Specifies the iteration type (default = ITERATION).</param> /// <param name="step">Specifies the training step to take, if any. This is only used when debugging.</param> public void Run(Phase phase, int nN, ITERATOR_TYPE type, TRAIN_STEP step) { MemoryCollection m_rgMemory = new MemoryCollection(); double? dfRunningReward = null; double dfEpisodeReward = 0; int nEpisode = 0; int nIteration = 0; StateBase s = getData(phase, -1); while (!m_brain.Cancel.WaitOne(0) && !isAtIteration(nN, type, nIteration, nEpisode)) { // Preprocess the observation. SimpleDatum x = m_brain.Preprocess(s, m_bUseRawInput); // Forward the policy network and sample an action. float[] rgfAprob; int action = m_brain.act(x, s.Clip, out rgfAprob); if (step == TRAIN_STEP.FORWARD) { return; } // Take the next step using the action StateBase s_ = getData(phase, action); dfEpisodeReward += s_.Reward; if (phase == Phase.TRAIN) { // Build up episode memory, using reward for taking the action. m_rgMemory.Add(new MemoryItem(s, x, action, rgfAprob, (float)s_.Reward)); // An episode has finished. if (s_.Done) { nEpisode++; nIteration++; m_brain.Reshape(m_rgMemory); // Compute the discounted reward (backwards through time) float[] rgDiscountedR = m_rgMemory.GetDiscountedRewards(m_fGamma, m_bAllowDiscountReset); // Rewards are standardized when set to be unit normal (helps control the gradient estimator variance) m_brain.SetDiscountedR(rgDiscountedR); // Get the action probabilities. float[] rgfAprobSet = m_rgMemory.GetActionProbabilities(); // The action probabilities are used to calculate the initial gradient within the loss function. m_brain.SetActionProbabilities(rgfAprobSet); // Get the action one-hot vectors. When using Softmax, this contains the one-hot vector containing // each action set (e.g. 3 actions with action 0 set would return a vector <1,0,0>). // When using a binary probability (e.g. with Sigmoid), the each action set only contains a // single element which is set to the action value itself (e.g. 0 for action '0' and 1 for action '1') float[] rgfAonehotSet = m_rgMemory.GetActionOneHotVectors(); m_brain.SetActionOneHotVectors(rgfAonehotSet); // Train for one iteration, which triggers the loss function. List <Datum> rgData = m_rgMemory.GetData(); List <Datum> rgClip = m_rgMemory.GetClip(); m_brain.SetData(rgData, rgClip); m_brain.Train(nIteration, step); // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); m_rgMemory.Clear(); if (step != TRAIN_STEP.NONE) { return; } } else { s = s_; } } else { if (s_.Done) { nEpisode++; // Update reward running if (!dfRunningReward.HasValue) { dfRunningReward = dfEpisodeReward; } else { dfRunningReward = dfRunningReward * 0.99 + dfEpisodeReward * 0.01; } updateStatus(nIteration, nEpisode, dfEpisodeReward, dfRunningReward.Value); dfEpisodeReward = 0; s = getData(phase, -1); } else { s = s_; } nIteration++; } } }