public override void Initialize() { modeNE = modelRef as INeuralEvolutionModel; Debug.Assert(modeNE != null, "Please assign a INeuralEvolutionModel to modelRef"); parametersNE = parameters as TrainerParamsNeuralEvolution; Debug.Assert(parametersNE != null, "Please Specify TrainerNeuralEvolution Trainer Parameters"); Debug.Assert(BrainToTrain != null, "brain can not be null"); modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters); agentsRewards = new Dictionary <Agent, List <float> >(); rewardsOfCurrentChild = new List <float>(); tensorsToOptimize = modeNE.GetWeightsForNeuralEvolution(); paramDimension = 0; foreach (var t in tensorsToOptimize) { int size = t.shape.Aggregate((t1, t2) => t1 * t2).Value; tensorSizes.Add(size); paramDimension += size; } optimizer = parametersNE.optimizerType == ESOptimizer.ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES(); samples = new OptimizationSample[parametersNE.populationSize]; for (int i = 0; i < parametersNE.populationSize; ++i) { samples[i] = new OptimizationSample(paramDimension); } //initialize the optimizer optimizer.init(paramDimension, parametersNE.populationSize, new double[paramDimension], parametersNE.initialStepSize, parametersNE.mode); if (continueFromCheckpoint) { if (!LoadNEDataFromFile()) { optimizer.generateSamples(samples); } } else { optimizer.generateSamples(samples); } if (isTraining) { SetWeights(samples[currentEvaluationIndex]); } else if (bestSample != null) { SetWeights(bestSample); } stats = new StatsLogger(); }
public override void Initialize() { modelSL = modelRef as ISupervisedLearningModel; Debug.Assert(modelSL != null, "Please assign a ISupervisedLearningModel to modelRef"); Debug.Assert(BrainToTrain != null, "brain can not be null"); parametersMimic = parameters as TrainerParamsMimic; Debug.Assert(parametersMimic != null, "Please Specify PPO Trainer Parameters"); stats = new StatsLogger(); modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters); var brainParameters = BrainToTrain.brainParameters; //intialize data buffer Debug.Assert(brainParameters.vectorActionSize.Length <= 1, "Action branching is not supported yet"); List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>() { new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : 1 }) }; if (brainParameters.vectorObservationSize > 0) { allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations })); } for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i) { int width = brainParameters.cameraResolutions[i].width; int height = brainParameters.cameraResolutions[i].height; int channels; if (brainParameters.cameraResolutions[i].blackAndWhite) { channels = 1; } else { channels = 3; } allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels })); } allBufferData.Add(new DataBuffer.DataInfo("Reward", typeof(float), new int[] { 1 })); dataBuffer = new DataBuffer(parametersMimic.maxBufferSize, allBufferData.ToArray()); if (continueFromCheckpoint) { LoadModel(); } if (loadTrainingDataFromCheckpoint) { LoadTrainingData(); } }
public override void Initialize() { iModelPPO = modelRef as IRLModelPPO; Debug.Assert(iModelPPO != null, "Please assign a model that implement interface IRLModelPPO to modelRef"); parametersPPO = parameters as TrainerParamsPPO; Debug.Assert(parametersPPO != null, "Please Specify PPO Trainer Parameters"); Debug.Assert(BrainToTrain != null, "brain can not be null"); //initialize all data buffers statesEpisodeHistory = new Dictionary <Agent, List <float> >(); rewardsEpisodeHistory = new Dictionary <Agent, List <float> >(); actionsEpisodeHistory = new Dictionary <Agent, List <float> >(); actionprobsEpisodeHistory = new Dictionary <Agent, List <float> >(); valuesEpisodeHistory = new Dictionary <Agent, List <float> >(); visualEpisodeHistory = new Dictionary <Agent, List <List <float[, , ]> > >(); actionMasksEpisodeHistory = new Dictionary <Agent, List <List <float> > >(); accumulatedRewards = new Dictionary <Agent, float>(); episodeSteps = new Dictionary <Agent, int>(); var brainParameters = BrainToTrain.brainParameters; Debug.Assert(brainParameters.vectorActionSize.Length > 0, "Action size can not be zero. Please set it in the brain"); List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>() { new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }), new DataBuffer.DataInfo("ActionProb", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }), new DataBuffer.DataInfo("TargetValue", typeof(float), new int[] { 1 }), new DataBuffer.DataInfo("OldValue", typeof(float), new int[] { 1 }), new DataBuffer.DataInfo("Advantage", typeof(float), new int[] { 1 }) }; if (brainParameters.vectorObservationSize > 0) { allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations })); } for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i) { int width = brainParameters.cameraResolutions[i].width; int height = brainParameters.cameraResolutions[i].height; int channels; if (brainParameters.cameraResolutions[i].blackAndWhite) { channels = 1; } else { channels = 3; } allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels })); } if (brainParameters.vectorActionSpaceType == SpaceType.discrete) { for (int i = 0; i < brainParameters.vectorActionSize.Length; ++i) { allBufferData.Add(new DataBuffer.DataInfo("ActionMask" + i, typeof(float), new int[] { brainParameters.vectorActionSize[i] })); } } dataBuffer = new DataBuffer(allBufferData.ToArray()); //initialize loggers and neuralnetowrk model stats = new StatsLogger(); modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters); if (continueFromCheckpoint) { LoadModel(); } }