public override void ResetTrainer() { base.ResetTrainer(); var agents = statesEpisodeHistory.Keys; stats.ClearAll(); statesEpisodeHistory.Clear(); rewardsEpisodeHistory.Clear(); actionprobsEpisodeHistory.Clear(); actionsEpisodeHistory.Clear(); valuesEpisodeHistory.Clear(); accumulatedRewards.Clear(); episodeSteps.Clear(); dataBuffer.ClearData(); foreach (var agent in agents) { if (agent) { agent.AgentReset(); } } goodEpisodeHistory.Clear(); policyTrainEpisodeHistory.Clear(); if (exampleEpisodes != null) { goodEpisodeHistory = LoadHistory(exampleEpisodes.bytes); } }
public void SaveHistory(SortedRawHistory history, string savefilename) { if (string.IsNullOrEmpty(savefilename)) { Debug.Log("savefilename empty. history not saved."); return; } //serailzie the data and save it to path var binFormatter = new BinaryFormatter(); var mStream = new MemoryStream(); binFormatter.Serialize(mStream, history); byte[] data = mStream.ToArray(); var fullPath = Path.GetFullPath(Path.Combine(checkpointPath, savefilename)); fullPath = fullPath.Replace('/', Path.DirectorySeparatorChar); fullPath = fullPath.Replace('\\', Path.DirectorySeparatorChar); Directory.CreateDirectory(Path.GetDirectoryName(fullPath)); File.WriteAllBytes(fullPath, data); Debug.Log("history saved to " + fullPath); }
public override void Initialize() { iModelHPPO = modelRef as IRLModelHPPO; Debug.Assert(iModelHPPO != null, "Please assign a model that implement interface IRLModelHPPO to modelRef"); parametersPPO = parameters as TrainerParamsPPO; Debug.Assert(parametersPPO != null, "Please Specify PPO Trainer Parameters"); Debug.Assert(BrainToTrain != null, "brain can not be null"); //initialize all data buffers statesEpisodeHistory = new Dictionary <Agent, List <float> >(); rewardsEpisodeHistory = new Dictionary <Agent, List <float> >(); actionsEpisodeHistory = new Dictionary <Agent, List <float> >(); actionprobsEpisodeHistory = new Dictionary <Agent, List <float> >(); valuesEpisodeHistory = new Dictionary <Agent, List <float> >(); visualEpisodeHistory = new Dictionary <Agent, List <List <float[, , ]> > >(); actionMasksEpisodeHistory = new Dictionary <Agent, List <List <float> > >(); accumulatedRewards = new Dictionary <Agent, float>(); episodeSteps = new Dictionary <Agent, int>(); var brainParameters = BrainToTrain.brainParameters; Debug.Assert(brainParameters.vectorActionSize.Length > 0, "Action size can not be zero. Please set it in the brain"); List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>() { new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }), new DataBuffer.DataInfo("ActionProb", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }), new DataBuffer.DataInfo("TargetValue", typeof(float), new int[] { 1 }), new DataBuffer.DataInfo("OldValue", typeof(float), new int[] { 1 }), new DataBuffer.DataInfo("Advantage", typeof(float), new int[] { 1 }) }; if (brainParameters.vectorObservationSize > 0) { allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations })); } for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i) { int width = brainParameters.cameraResolutions[i].width; int height = brainParameters.cameraResolutions[i].height; int channels; if (brainParameters.cameraResolutions[i].blackAndWhite) { channels = 1; } else { channels = 3; } allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels })); } if (brainParameters.vectorActionSpaceType == SpaceType.discrete) { for (int i = 0; i < brainParameters.vectorActionSize.Length; ++i) { allBufferData.Add(new DataBuffer.DataInfo("ActionMask" + i, typeof(float), new int[] { brainParameters.vectorActionSize[i] })); } } dataBuffer = new DataBuffer(allBufferData.ToArray()); policyTrainBuffer = new DataBuffer(allBufferData.ToArray()); tempGoodTrainBuffer = new DataBuffer(allBufferData.ToArray()); policyTrainEpisodeHistory = new SortedRawHistory(); goodEpisodeHistory = new SortedRawHistory(goodHistoryCount); //exampleHistory = null; if (exampleEpisodes != null) { goodEpisodeHistory = LoadHistory(exampleEpisodes.bytes); } //initialize loggers and neuralnetowrk model stats = new StatsLogger(); modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters); if (continueFromCheckpoint) { LoadModel(); } }