/// <summary> /// The constructor. /// </summary> /// <param name="icallback">Specifies the callback used for update notifications sent to the parent.</param> /// <param name="mycaffe">Specifies the instance of MyCaffe with the open project.</param> /// <param name="properties">Specifies the properties passed into the trainer.</param> /// <param name="random">Specifies the random number generator used.</param> /// <param name="phase">Specifies the phase of the internal network to use.</param> public DqnAgent(IxTrainerCallback icallback, MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_icallback = icallback; m_brain = new Brain <T>(mycaffe, properties, random, phase); m_properties = properties; m_random = random; m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma); m_bUseRawInput = properties.GetPropertyAsBool("UseRawInput", m_bUseRawInput); m_nMaxMemory = properties.GetPropertyAsInt("MaxMemory", m_nMaxMemory); m_nTrainingUpdateFreq = properties.GetPropertyAsInt("TrainingUpdateFreq", m_nTrainingUpdateFreq); m_nExplorationNum = properties.GetPropertyAsInt("ExplorationNum", m_nExplorationNum); m_nEpsSteps = properties.GetPropertyAsInt("EpsSteps", m_nEpsSteps); m_dfEpsStart = properties.GetPropertyAsDouble("EpsStart", m_dfEpsStart); m_dfEpsEnd = properties.GetPropertyAsDouble("EpsEnd", m_dfEpsEnd); m_dfEpsDelta = (m_dfEpsStart - m_dfEpsEnd) / m_nEpsSteps; m_dfExplorationRate = m_dfEpsStart; if (m_dfEpsStart < 0 || m_dfEpsStart > 1) { throw new Exception("The 'EpsStart' is out of range - please specify a real number in the range [0,1]"); } if (m_dfEpsEnd < 0 || m_dfEpsEnd > 1) { throw new Exception("The 'EpsEnd' is out of range - please specify a real number in the range [0,1]"); } if (m_dfEpsEnd > m_dfEpsStart) { throw new Exception("The 'EpsEnd' must be less than the 'EpsStart' value."); } }
/// <summary> /// Initialize the gym with the specified properties. /// </summary> /// <param name="log">Specifies the output log to use.</param> /// <param name="properties">Specifies the properties containing Gym specific initialization parameters.</param> /// <remarks> /// The AtariGym uses the following initialization properties. /// Init1=value - specifies the default force to use. /// Init2=value - specifies whether to use an additive force (1) or not (0). /// </remarks> public void Initialize(Log log, PropertySet properties) { m_dfForce = 10; m_bAdditive = false; if (properties != null) { m_dfForce = properties.GetPropertyAsDouble("Init1", 10); m_bAdditive = (properties.GetPropertyAsDouble("Init2", 0) == 0) ? false : true; } m_log = log; m_nMaxSteps = 0; Reset(false); }
/// <summary> /// The constructor. /// </summary> /// <param name="mycaffe">Specifies the instance of MyCaffe assoiated with the open project - when using more than one Brain, this is the master project.</param> /// <param name="properties">Specifies the properties passed into the trainer.</param> /// <param name="random">Specifies the random number generator used.</param> /// <param name="phase">Specifies the phase under which to run.</param> public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_mycaffe = mycaffe; m_solver = mycaffe.GetInternalSolver(); m_netOutput = mycaffe.GetInternalNet(phase); m_netTarget = new Net <T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase); m_properties = properties; m_random = random; Blob <T> data = m_netOutput.blob_by_name("data"); if (data == null) { m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!"); } m_nBatchSize = data.num; Blob <T> logits = m_netOutput.blob_by_name("logits"); if (logits == null) { m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!"); } m_nActionCount = logits.channels; m_transformer = m_mycaffe.DataTransformer; m_blobActions = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobNextQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobExpectedQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobDone = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobLoss = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobWeights = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma); m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer <T>; if (m_memLoss == null) { m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!"); } double?dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr"); if (dfRate.HasValue) { m_dfLearningRate = dfRate.Value; } m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch); m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false); if (m_nMiniBatch > 1) { m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone(); m_colAccumulatedGradients.SetDiff(0); } }
/// <summary> /// The constructor. /// </summary> /// <param name="icallback">Specifies the callback used for update notifications sent to the parent.</param> /// <param name="mycaffe">Specifies the instance of MyCaffe with the open project.</param> /// <param name="properties">Specifies the properties passed into the trainer.</param> /// <param name="random">Specifies the random number generator used.</param> /// <param name="phase">Specifies the phase of the internal network to use.</param> public DqnAgent(IxTrainerCallback icallback, MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_icallback = icallback; m_brain = new Brain <T>(mycaffe, properties, random, phase); m_properties = properties; m_random = random; m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma); m_bUseRawInput = properties.GetPropertyAsBool("UseRawInput", m_bUseRawInput); }
public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallbackRNN icallback, Phase phase, BucketCollection rgVocabulary, bool bUsePreloadData, string strRunProperties = null) { string strOutputBlob = null; if (strRunProperties != null) { m_runProperties = new PropertySet(strRunProperties); } m_icallback = icallback; m_mycaffe = mycaffe; m_properties = properties; m_random = random; m_rgVocabulary = rgVocabulary; m_bUsePreloadData = bUsePreloadData; m_nSolverSequenceLength = m_properties.GetPropertyAsInt("SequenceLength", -1); m_bDisableVocabulary = m_properties.GetPropertyAsBool("DisableVocabulary", false); m_nThreads = m_properties.GetPropertyAsInt("Threads", 1); m_dfScale = m_properties.GetPropertyAsDouble("Scale", 1.0); if (m_nThreads > 1) { m_dataPool.Initialize(m_nThreads, icallback); } if (m_runProperties != null) { m_dfTemperature = Math.Abs(m_runProperties.GetPropertyAsDouble("Temperature", 0)); if (m_dfTemperature > 1.0) { m_dfTemperature = 1.0; } string strPhaseOnRun = m_runProperties.GetProperty("PhaseOnRun", false); switch (strPhaseOnRun) { case "RUN": m_phaseOnRun = Phase.RUN; break; case "TEST": m_phaseOnRun = Phase.TEST; break; case "TRAIN": m_phaseOnRun = Phase.TRAIN; break; } if (phase == Phase.RUN && m_phaseOnRun != Phase.NONE) { if (m_phaseOnRun != Phase.RUN) { m_mycaffe.Log.WriteLine("Warning: Running on the '" + m_phaseOnRun.ToString() + "' network."); } strOutputBlob = m_runProperties.GetProperty("OutputBlob", false); if (strOutputBlob == null) { throw new Exception("You must specify the 'OutputBlob' when Running with a phase other than RUN."); } strOutputBlob = Utility.Replace(strOutputBlob, '~', ';'); phase = m_phaseOnRun; } } m_net = mycaffe.GetInternalNet(phase); if (m_net == null) { mycaffe.Log.WriteLine("WARNING: Test net does not exist, set test_iteration > 0. Using TRAIN phase instead."); m_net = mycaffe.GetInternalNet(Phase.TRAIN); } // Find the first LSTM layer to determine how to load the data. // NOTE: Only LSTM has a special loading order, other layers use the standard N, C, H, W ordering. LSTMLayer <T> lstmLayer = null; LSTMSimpleLayer <T> lstmSimpleLayer = null; foreach (Layer <T> layer1 in m_net.layers) { if (layer1.layer_param.type == LayerParameter.LayerType.LSTM) { lstmLayer = layer1 as LSTMLayer <T>; m_lstmType = LayerParameter.LayerType.LSTM; break; } else if (layer1.layer_param.type == LayerParameter.LayerType.LSTM_SIMPLE) { lstmSimpleLayer = layer1 as LSTMSimpleLayer <T>; m_lstmType = LayerParameter.LayerType.LSTM_SIMPLE; break; } } if (lstmLayer == null && lstmSimpleLayer == null) { throw new Exception("Could not find the required LSTM or LSTM_SIMPLE layer!"); } if (m_phaseOnRun != Phase.NONE && m_phaseOnRun != Phase.RUN && strOutputBlob != null) { if ((m_blobOutput = m_net.FindBlob(strOutputBlob)) == null) { throw new Exception("Could not find the 'Output' layer top named '" + strOutputBlob + "'!"); } } if ((m_blobData = m_net.FindBlob("data")) == null) { throw new Exception("Could not find the 'Input' layer top named 'data'!"); } if ((m_blobClip = m_net.FindBlob("clip")) == null) { throw new Exception("Could not find the 'Input' layer top named 'clip'!"); } Layer <T> layer = m_net.FindLastLayer(LayerParameter.LayerType.INNERPRODUCT); m_mycaffe.Log.CHECK(layer != null, "Could not find an ending INNERPRODUCT layer!"); if (!m_bDisableVocabulary) { m_nVocabSize = (int)layer.layer_param.inner_product_param.num_output; if (rgVocabulary != null) { m_mycaffe.Log.CHECK_EQ(m_nVocabSize, rgVocabulary.Count, "The vocabulary count = '" + rgVocabulary.Count.ToString() + "' and last inner product output count = '" + m_nVocabSize.ToString() + "' - these do not match but they should!"); } } if (m_lstmType == LayerParameter.LayerType.LSTM) { m_nSequenceLength = m_blobData.shape(0); m_nBatchSize = m_blobData.shape(1); } else { m_nBatchSize = (int)lstmSimpleLayer.layer_param.lstm_simple_param.batch_size; m_nSequenceLength = m_blobData.shape(0) / m_nBatchSize; if (phase == Phase.RUN) { m_nBatchSize = 1; List <int> rgNewShape = new List <int>() { m_nSequenceLength, 1 }; m_blobData.Reshape(rgNewShape); m_blobClip.Reshape(rgNewShape); m_net.Reshape(); } } m_mycaffe.Log.CHECK_EQ(m_nSequenceLength, m_blobData.num, "The data num must equal the sequence lengh of " + m_nSequenceLength.ToString()); m_rgDataInput = new T[m_nSequenceLength * m_nBatchSize]; T[] rgClipInput = new T[m_nSequenceLength * m_nBatchSize]; m_mycaffe.Log.CHECK_EQ(rgClipInput.Length, m_blobClip.count(), "The clip count must equal the sequence length * batch size: " + rgClipInput.Length.ToString()); m_tZero = (T)Convert.ChangeType(0, typeof(T)); m_tOne = (T)Convert.ChangeType(1, typeof(T)); for (int i = 0; i < rgClipInput.Length; i++) { if (m_lstmType == LayerParameter.LayerType.LSTM) { rgClipInput[i] = (i < m_nBatchSize) ? m_tZero : m_tOne; } else { rgClipInput[i] = (i % m_nSequenceLength == 0) ? m_tZero : m_tOne; } } m_blobClip.mutable_cpu_data = rgClipInput; if (phase != Phase.RUN) { m_solver = mycaffe.GetInternalSolver(); m_solver.OnStart += m_solver_OnStart; m_solver.OnTestStart += m_solver_OnTestStart; m_solver.OnTestingIteration += m_solver_OnTestingIteration; m_solver.OnTrainingIteration += m_solver_OnTrainingIteration; if ((m_blobLabel = m_net.FindBlob("label")) == null) { throw new Exception("Could not find the 'Input' layer top named 'label'!"); } m_nSequenceLengthLabel = m_blobLabel.count(0, 2); m_rgLabelInput = new T[m_nSequenceLengthLabel]; m_mycaffe.Log.CHECK_EQ(m_rgLabelInput.Length, m_blobLabel.count(), "The label count must equal the label sequence length * batch size: " + m_rgLabelInput.Length.ToString()); m_mycaffe.Log.CHECK(m_nSequenceLengthLabel == m_nSequenceLength * m_nBatchSize || m_nSequenceLengthLabel == 1, "The label sqeuence length must be 1 or equal the length of the sequence: " + m_nSequenceLength.ToString()); } }
/// <summary> /// The constructor. /// </summary> /// <param name="mycaffe">Specifies the instance of MyCaffe assoiated with the open project - when using more than one Brain, this is the master project.</param> /// <param name="properties">Specifies the properties passed into the trainer.</param> /// <param name="random">Specifies the random number generator used.</param> /// <param name="phase">Specifies the phase under which to run.</param> public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, Phase phase) { m_mycaffe = mycaffe; m_solver = mycaffe.GetInternalSolver(); m_netOutput = mycaffe.GetInternalNet(phase); m_netTarget = new Net <T>(m_mycaffe.Cuda, m_mycaffe.Log, m_netOutput.net_param, m_mycaffe.CancelEvent, null, phase); m_properties = properties; m_random = random; Blob <T> data = m_netOutput.blob_by_name("data"); if (data == null) { m_mycaffe.Log.FAIL("Missing the expected input 'data' blob!"); } m_nFramesPerX = data.channels; m_nBatchSize = data.num; Blob <T> logits = m_netOutput.blob_by_name("logits"); if (logits == null) { m_mycaffe.Log.FAIL("Missing the expected input 'logits' blob!"); } m_nActionCount = logits.channels; m_transformer = m_mycaffe.DataTransformer; if (m_transformer == null) { TransformationParameter trans_param = new TransformationParameter(); int nC = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageChannels; int nH = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageHeight; int nW = m_mycaffe.CurrentProject.Dataset.TrainingSource.ImageWidth; m_transformer = new DataTransformer <T>(m_mycaffe.Cuda, m_mycaffe.Log, trans_param, phase, nC, nH, nW); } for (int i = 0; i < m_nFramesPerX; i++) { m_transformer.param.mean_value.Add(255 / 2); // center each frame } m_transformer.param.scale = 1.0 / 255; // normalize m_transformer.Update(); m_blobActions = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobNextQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobExpectedQValue = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobDone = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_blobLoss = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log); m_blobWeights = new Blob <T>(m_mycaffe.Cuda, m_mycaffe.Log, false); m_fGamma = (float)properties.GetPropertyAsDouble("Gamma", m_fGamma); m_memLoss = m_netOutput.FindLastLayer(LayerParameter.LayerType.MEMORY_LOSS) as MemoryLossLayer <T>; if (m_memLoss == null) { m_mycaffe.Log.FAIL("Missing the expected MEMORY_LOSS layer!"); } double?dfRate = mycaffe.CurrentProject.GetSolverSettingAsNumeric("base_lr"); if (dfRate.HasValue) { m_dfLearningRate = dfRate.Value; } m_nMiniBatch = m_properties.GetPropertyAsInt("MiniBatch", m_nMiniBatch); m_bUseAcceleratedTraining = properties.GetPropertyAsBool("UseAcceleratedTraining", false); if (m_nMiniBatch > 1) { m_colAccumulatedGradients = m_netOutput.learnable_parameters.Clone(); m_colAccumulatedGradients.SetDiff(0); } }
public Brain(MyCaffeControl <T> mycaffe, PropertySet properties, CryptoRandom random, IxTrainerCallbackRNN icallback, Phase phase, BucketCollection rgVocabulary, string strRunProperties = null) { string strOutputBlob = null; if (strRunProperties != null) { m_runProperties = new PropertySet(strRunProperties); } m_icallback = icallback; m_mycaffe = mycaffe; m_properties = properties; m_random = random; m_rgVocabulary = rgVocabulary; if (m_runProperties != null) { m_dfTemperature = m_runProperties.GetPropertyAsDouble("Temperature", 0); string strPhaseOnRun = m_runProperties.GetProperty("PhaseOnRun", false); switch (strPhaseOnRun) { case "RUN": m_phaseOnRun = Phase.RUN; break; case "TEST": m_phaseOnRun = Phase.TEST; break; case "TRAIN": m_phaseOnRun = Phase.TRAIN; break; } if (phase == Phase.RUN && m_phaseOnRun != Phase.NONE) { if (m_phaseOnRun != Phase.RUN) { m_mycaffe.Log.WriteLine("Warning: Running on the '" + m_phaseOnRun.ToString() + "' network."); } strOutputBlob = m_runProperties.GetProperty("OutputBlob", false); if (strOutputBlob == null) { throw new Exception("You must specify the 'OutputBlob' when Running with a phase other than RUN."); } strOutputBlob = Utility.Replace(strOutputBlob, '~', ';'); phase = m_phaseOnRun; } } m_net = mycaffe.GetInternalNet(phase); // Find the first LSTM layer to determine how to load the data. // NOTE: Only LSTM has a special loading order, other layers use the standard N, C, H, W ordering. LSTMLayer <T> lstmLayer = null; LSTMSimpleLayer <T> lstmSimpleLayer = null; foreach (Layer <T> layer1 in m_net.layers) { if (layer1.layer_param.type == LayerParameter.LayerType.LSTM) { lstmLayer = layer1 as LSTMLayer <T>; m_lstmType = LayerParameter.LayerType.LSTM; break; } else if (layer1.layer_param.type == LayerParameter.LayerType.LSTM_SIMPLE) { lstmSimpleLayer = layer1 as LSTMSimpleLayer <T>; m_lstmType = LayerParameter.LayerType.LSTM_SIMPLE; break; } } if (lstmLayer == null && lstmSimpleLayer == null) { throw new Exception("Could not find the required LSTM or LSTM_SIMPLE layer!"); } if (m_phaseOnRun != Phase.NONE && m_phaseOnRun != Phase.RUN && strOutputBlob != null) { if ((m_blobOutput = m_net.FindBlob(strOutputBlob)) == null) { throw new Exception("Could not find the 'Output' layer top named '" + strOutputBlob + "'!"); } } if ((m_blobData = m_net.FindBlob("data")) == null) { throw new Exception("Could not find the 'Input' layer top named 'data'!"); } if ((m_blobClip = m_net.FindBlob("clip")) == null) { throw new Exception("Could not find the 'Input' layer top named 'clip'!"); } Layer <T> layer = m_net.FindLastLayer(LayerParameter.LayerType.INNERPRODUCT); m_mycaffe.Log.CHECK(layer != null, "Could not find an ending INNERPRODUCT layer!"); m_nVocabSize = (int)layer.layer_param.inner_product_param.num_output; if (rgVocabulary != null) { m_mycaffe.Log.CHECK_EQ(m_nVocabSize, rgVocabulary.Count, "The vocabulary count and last inner product output count should match!"); } if (m_lstmType == LayerParameter.LayerType.LSTM) { m_nSequenceLength = m_blobData.shape(0); m_nBatchSize = m_blobData.shape(1); } else { m_nBatchSize = (int)lstmSimpleLayer.layer_param.lstm_simple_param.batch_size; m_nSequenceLength = m_blobData.shape(0) / m_nBatchSize; if (phase == Phase.RUN) { m_nBatchSize = 1; List <int> rgNewShape = new List <int>() { m_nSequenceLength, 1 }; m_blobData.Reshape(rgNewShape); m_blobClip.Reshape(rgNewShape); m_net.Reshape(); } } m_mycaffe.Log.CHECK_EQ(m_blobData.count(), m_blobClip.count(), "The data and clip blobs must have the same count!"); m_rgDataInput = new T[m_nSequenceLength * m_nBatchSize]; T[] rgClipInput = new T[m_nSequenceLength * m_nBatchSize]; m_tZero = (T)Convert.ChangeType(0, typeof(T)); m_tOne = (T)Convert.ChangeType(1, typeof(T)); for (int i = 0; i < rgClipInput.Length; i++) { if (m_lstmType == LayerParameter.LayerType.LSTM) { rgClipInput[i] = (i < m_nBatchSize) ? m_tZero : m_tOne; } else { rgClipInput[i] = (i % m_nSequenceLength == 0) ? m_tZero : m_tOne; } } m_blobClip.mutable_cpu_data = rgClipInput; if (phase != Phase.RUN) { m_solver = mycaffe.GetInternalSolver(); m_solver.OnStart += m_solver_OnStart; m_solver.OnTestStart += m_solver_OnTestStart; m_solver.OnTestingIteration += m_solver_OnTestingIteration; m_solver.OnTrainingIteration += m_solver_OnTrainingIteration; if ((m_blobLabel = m_net.FindBlob("label")) == null) { throw new Exception("Could not find the 'Input' layer top named 'label'!"); } m_rgLabelInput = new T[m_nSequenceLength * m_nBatchSize]; m_mycaffe.Log.CHECK_EQ(m_blobData.count(), m_blobLabel.count(), "The data and label blobs must have the same count!"); } }