void Start() { tensorsToOptimize = model.networkHierarchy.GetLowLevelWeights(); paramDimension = 0; foreach (var t in tensorsToOptimize) { int size = t.shape.Aggregate((t1, t2) => t1 * t2).Value; tensorSizes.Add(size); paramDimension += size; } optimizer = optimizerType == ESOptimizer.ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES(); samples = new OptimizationSample[populationSize]; for (int i = 0; i < populationSize; ++i) { samples[i] = new OptimizationSample(paramDimension); } //initialize the optimizer optimizer.init(paramDimension, populationSize, new double[paramDimension], initialStepSize, mode); if (continueFromPrev) { LoadFromFile(); } else { optimizer.generateSamples(samples); } SetWeights(samples[currentEvaluationIndex]); }
/// <summary> /// Start to optimize asynchronized. It is actaually not running in another thread, but running in Update() in each frame of your game. /// This way the optimization will not block your game. /// </summary> /// <param name="optimizeTarget">Target to optimize</param> /// <param name="onReady">Action to call when optmization is ready. THe input is the best solution found.</param> /// <param name="initialMean">initial mean guess.</param> public void StartOptimizingAsync(IESOptimizable optimizeTarget, Action <double[]> onReady = null, double[] initialMean = null) { optimizable = optimizeTarget; optimizer = optimizerType == ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES(); samples = new OptimizationSample[populationSize]; for (int i = 0; i < populationSize; ++i) { samples[i] = new OptimizationSample(optimizable.GetParamDimension()); } iteration = 0; //initial mean double[] actualInitMean = null; if (initialMean != null && initialMean.Length != optimizeTarget.GetParamDimension()) { Debug.LogError("Init mean has a wrong dimension " + initialMean.Length + " rather than " + optimizeTarget.GetParamDimension() + "."); } if (initialMean == null) { actualInitMean = new double[optimizeTarget.GetParamDimension()]; } else { actualInitMean = initialMean; } optimizer.init(optimizable.GetParamDimension(), populationSize, actualInitMean, initialStepSize, mode); IsOptimizing = true; this.onReady = onReady; }
public override void Initialize() { modeNE = modelRef as INeuralEvolutionModel; Debug.Assert(modeNE != null, "Please assign a INeuralEvolutionModel to modelRef"); parametersNE = parameters as TrainerParamsNeuralEvolution; Debug.Assert(parametersNE != null, "Please Specify TrainerNeuralEvolution Trainer Parameters"); Debug.Assert(BrainToTrain != null, "brain can not be null"); modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters); agentsRewards = new Dictionary <Agent, List <float> >(); rewardsOfCurrentChild = new List <float>(); tensorsToOptimize = modeNE.GetWeightsForNeuralEvolution(); paramDimension = 0; foreach (var t in tensorsToOptimize) { int size = t.shape.Aggregate((t1, t2) => t1 * t2).Value; tensorSizes.Add(size); paramDimension += size; } optimizer = parametersNE.optimizerType == ESOptimizer.ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES(); samples = new OptimizationSample[parametersNE.populationSize]; for (int i = 0; i < parametersNE.populationSize; ++i) { samples[i] = new OptimizationSample(paramDimension); } //initialize the optimizer optimizer.init(paramDimension, parametersNE.populationSize, new double[paramDimension], parametersNE.initialStepSize, parametersNE.mode); if (continueFromCheckpoint) { if (!LoadNEDataFromFile()) { optimizer.generateSamples(samples); } } else { optimizer.generateSamples(samples); } if (isTraining) { SetWeights(samples[currentEvaluationIndex]); } else if (bestSample != null) { SetWeights(bestSample); } stats = new StatsLogger(); }
private void FixedUpdate() { if (!isOptimizing) { return; } if (trainer.GetStep() >= evaluationSteps) { //set the objective function value to the samples var rewards = trainer.stats.GetStat("accumulatedRewards"); float aveRewards = 0; for (int i = 0; i < evaluationLastRewardsNum; ++i) { aveRewards += rewards[rewards.Count - 1 - i].Item2; } aveRewards = aveRewards / evaluationLastRewardsNum; samples[currentEvaluationIndex].objectiveFuncVal = aveRewards; //reset stuff currentEvaluationIndex++; Current.K.try_initialize_variables(false); trainer.ResetTrainer(); if (currentEvaluationIndex < populationSize) { SaveToFile(); SetWeights(samples[currentEvaluationIndex]); } } if (currentEvaluationIndex >= populationSize) { optimizer.update(samples);//update the optimizer currentGeneration++; currentEvaluationIndex = 0; optimizer.generateSamples(samples);//generate new samples if (Best == null) { Best = new OptimizationSample(); Best.x = optimizer.getBest(); Best.objectiveFuncVal = optimizer.getBestObjectiveFuncValue(); } else if ((mode == OptimizationModes.maximize && Best.objectiveFuncVal < optimizer.getBestObjectiveFuncValue()) || (mode == OptimizationModes.minimize && Best.objectiveFuncVal > optimizer.getBestObjectiveFuncValue())) { Best.x = optimizer.getBest(); Best.objectiveFuncVal = optimizer.getBestObjectiveFuncValue(); } SaveToFile(); SetWeights(samples[currentEvaluationIndex]);//set weight for the first sample } }
public override void UpdateModel() { float aveRewards = 0; for (int i = 0; i < rewardsOfCurrentChild.Count; ++i) { aveRewards += rewardsOfCurrentChild[i]; } aveRewards = aveRewards / rewardsOfCurrentChild.Count; rewardsOfCurrentChild.Clear(); stats.AddData("accumulatedRewards", aveRewards); samples[currentEvaluationIndex].objectiveFuncVal = aveRewards; currentEvaluationIndex++; //reset stuff if (currentEvaluationIndex < parametersNE.populationSize) { SetWeights(samples[currentEvaluationIndex]); } if (currentEvaluationIndex >= parametersNE.populationSize) { optimizer.update(samples);//update the optimizer currentGeneration++; currentEvaluationIndex = 0; optimizer.generateSamples(samples);//generate new samples if (bestSample == null) { bestSample = new OptimizationSample(); bestSample.x = optimizer.getBest(); bestSample.objectiveFuncVal = optimizer.getBestObjectiveFuncValue(); } else if ((parametersNE.mode == OptimizationModes.maximize && bestSample.objectiveFuncVal < optimizer.getBestObjectiveFuncValue()) || (parametersNE.mode == OptimizationModes.minimize && bestSample.objectiveFuncVal > optimizer.getBestObjectiveFuncValue())) { bestSample.x = optimizer.getBest(); bestSample.objectiveFuncVal = optimizer.getBestObjectiveFuncValue(); } SetWeights(samples[currentEvaluationIndex]);//set weight for the first sample } //reset all agents var agentList = agentsRewards.Keys; foreach (var agent in agentList) { agent.AgentReset(); agentsRewards[agent].Clear(); } }
/// <summary> /// set the weights using an optimizatoin sample /// </summary> /// <param name="sample"></param> protected void SetWeights(OptimizationSample sample) { float[] floatValues = Array.ConvertAll(sample.x, (t) => (float)t); int currentStartIndex = 0; for (int i = 0; i < tensorsToOptimize.Count; ++i) { Current.K.set_value(tensorsToOptimize[i], SubArray(floatValues, currentStartIndex, tensorSizes[i])); currentStartIndex += tensorSizes[i]; } }
void Start() { //Init optimization opt.init(nVariables, opt.recommendedPopulationSize(nVariables), new double[nVariables], 1, OptimizationModes.minimize); //allocate container for sample vectors samples = new OptimizationSample[opt.recommendedPopulationSize(nVariables)]; for (int i = 0; i < samples.Length; i++) { samples[i] = new OptimizationSample(nVariables); } }
/// <summary> /// Restore the training checkpoint from byte array. . Use <see cref="SaveNECheckpoint()"/> to restore from it. /// </summary> /// <param name="data"></param> public virtual void RestoreNECheckpoint(byte[] data) { //deserialize the data var mStream = new MemoryStream(data); var binFormatter = new BinaryFormatter(); var restoredData = (EvolutionData)binFormatter.Deserialize(mStream); samples = restoredData.samples; currentGeneration = restoredData.currentGeneration; currentEvaluationIndex = restoredData.currentEvaluationIndex; bestSample = restoredData.bestSample; }
void OnGUI() { GUILayout.BeginVertical(); GUILayout.BeginHorizontal(); if (uiState == UIState.idle) { if (GUILayout.Button("Optimize", GUILayout.Width(100))) { //init optimization samples = new OptimizationSample[populationSize]; for (int i = 0; i < populationSize; i++) { samples[i] = new OptimizationSample(2); } opt.init(2, populationSize, new double[2] { 0, 0 }, 1, OptimizationModes.maximize); iter = 0; //update state uiState = UIState.optimizing; Physics.autoSimulation = false; } } else { if (GUILayout.Button("Shoot", GUILayout.Width(100))) { endOptimization(); } } gameSystem.rewardShaping = GUILayout.Toggle(gameSystem.rewardShaping, "Reward Shaping", GUILayout.Width(200)); GUILayout.Label("Predicted score: " + gameSystem.predictedShotScore); GUILayout.EndHorizontal(); GUILayout.BeginHorizontal(); GUILayout.Label("Max Iter:", GUILayout.Width(100)); maxIter = (int)GUILayout.HorizontalSlider(maxIter, 1, 50, GUILayout.Width(50)); GUILayout.Label(maxIter.ToString(), GUILayout.Width(50)); GUILayout.EndHorizontal(); GUILayout.BeginHorizontal(); GUILayout.Label("Population size:", GUILayout.Width(100)); populationSize = (int)GUILayout.HorizontalSlider(populationSize, 1, 100, GUILayout.Width(50)); GUILayout.Label(populationSize.ToString()); GUILayout.EndHorizontal(); GUILayout.EndVertical(); }
public virtual void RestoreCheckpoint(byte[] data) { //deserialize the data var mStream = new MemoryStream(data); var binFormatter = new BinaryFormatter(); object deserizlied = binFormatter.Deserialize(mStream); var restoredData = deserizlied as EvolutionData; if (restoredData == null) { Debug.LogError("loaded data error"); } else { samples = restoredData.samples; currentGeneration = restoredData.currentGeneration; currentEvaluationIndex = restoredData.currentEvaluationIndex; Best = restoredData.best; } }
/// <summary> /// Optimize and return the solution immediately. /// </summary> /// <param name="optimizeTarget">Target to optimize</param> /// <param name="initialMean">initial mean guess.</param> /// <returns>The best solution found</returns> public double[] Optimize(IESOptimizable optimizeTarget, double[] initialMean = null) { var tempOptimizer = (optimizerType == ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES()); var tempSamples = new OptimizationSample[populationSize]; for (int i = 0; i < populationSize; ++i) { tempSamples[i] = new OptimizationSample(optimizeTarget.GetParamDimension()); } //initial mean double[] actualInitMean = null; if (initialMean != null && initialMean.Length != optimizeTarget.GetParamDimension()) { Debug.LogError("Init mean has a wrong dimension " + initialMean.Length + " rather than " + optimizeTarget.GetParamDimension() + "."); } if (initialMean == null) { actualInitMean = new double[optimizeTarget.GetParamDimension()]; } else { actualInitMean = initialMean; } //initialize the optimizer tempOptimizer.init(optimizeTarget.GetParamDimension(), populationSize, actualInitMean, initialStepSize, mode); //iteration double[] bestParams = null; //bool hasInvokeReady = false; iteration = 0; for (int it = 0; it < maxIteration; ++it) { tempOptimizer.generateSamples(tempSamples); for (int s = 0; s <= tempSamples.Length / evaluationBatchSize; ++s) { List <double[]> paramList = new List <double[]>(); for (int b = 0; b < evaluationBatchSize; ++b) { int ind = s * evaluationBatchSize + b; if (ind < tempSamples.Length) { paramList.Add(tempSamples[ind].x); } } var values = optimizeTarget.Evaluate(paramList); for (int b = 0; b < evaluationBatchSize; ++b) { int ind = s * evaluationBatchSize + b; if (ind < tempSamples.Length) { tempSamples[ind].objectiveFuncVal = values[b]; } } } tempOptimizer.update(tempSamples); BestScore = tempOptimizer.getBestObjectiveFuncValue(); iteration++; bestParams = tempOptimizer.getBest(); if ((BestScore <= targetValue && mode == OptimizationModes.minimize) || (BestScore >= targetValue && mode == OptimizationModes.maximize)) { //optimizatoin is done /*if (onReady != null) * { * onReady.Invoke(bestParams); * hasInvokeReady = true; * }*/ break; } } /*if (onReady != null && !hasInvokeReady) * { * onReady.Invoke(bestParams); * }*/ return(bestParams); }