void Start()
    {
        tensorsToOptimize = model.networkHierarchy.GetLowLevelWeights();
        paramDimension    = 0;
        foreach (var t in tensorsToOptimize)
        {
            int size = t.shape.Aggregate((t1, t2) => t1 * t2).Value;
            tensorSizes.Add(size);
            paramDimension += size;
        }


        optimizer = optimizerType == ESOptimizer.ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES();

        samples = new OptimizationSample[populationSize];
        for (int i = 0; i < populationSize; ++i)
        {
            samples[i] = new OptimizationSample(paramDimension);
        }

        //initialize the optimizer
        optimizer.init(paramDimension, populationSize, new double[paramDimension], initialStepSize, mode);

        if (continueFromPrev)
        {
            LoadFromFile();
        }
        else
        {
            optimizer.generateSamples(samples);
        }

        SetWeights(samples[currentEvaluationIndex]);
    }
Esempio n. 2
0
    /// <summary>
    /// Start to optimize asynchronized. It is actaually not running in another thread, but running in Update() in each frame of your game.
    /// This way the optimization will not block your game.
    /// </summary>
    /// <param name="optimizeTarget">Target to optimize</param>
    /// <param name="onReady">Action to call when optmization is ready. THe input is the best solution found.</param>
    /// <param name="initialMean">initial mean guess.</param>
    public void StartOptimizingAsync(IESOptimizable optimizeTarget, Action <double[]> onReady = null, double[] initialMean = null)
    {
        optimizable = optimizeTarget;

        optimizer = optimizerType == ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES();

        samples = new OptimizationSample[populationSize];
        for (int i = 0; i < populationSize; ++i)
        {
            samples[i] = new OptimizationSample(optimizable.GetParamDimension());
        }
        iteration = 0;

        //initial mean
        double[] actualInitMean = null;
        if (initialMean != null && initialMean.Length != optimizeTarget.GetParamDimension())
        {
            Debug.LogError("Init mean has a wrong dimension " + initialMean.Length + " rather than " + optimizeTarget.GetParamDimension() + ".");
        }
        if (initialMean == null)
        {
            actualInitMean = new double[optimizeTarget.GetParamDimension()];
        }
        else
        {
            actualInitMean = initialMean;
        }


        optimizer.init(optimizable.GetParamDimension(), populationSize, actualInitMean, initialStepSize, mode);

        IsOptimizing = true;

        this.onReady = onReady;
    }
    public override void Initialize()
    {
        modeNE = modelRef as INeuralEvolutionModel;
        Debug.Assert(modeNE != null, "Please assign a INeuralEvolutionModel to modelRef");
        parametersNE = parameters as TrainerParamsNeuralEvolution;
        Debug.Assert(parametersNE != null, "Please Specify TrainerNeuralEvolution Trainer Parameters");
        Debug.Assert(BrainToTrain != null, "brain can not be null");

        modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters);

        agentsRewards         = new Dictionary <Agent, List <float> >();
        rewardsOfCurrentChild = new List <float>();


        tensorsToOptimize = modeNE.GetWeightsForNeuralEvolution();
        paramDimension    = 0;
        foreach (var t in tensorsToOptimize)
        {
            int size = t.shape.Aggregate((t1, t2) => t1 * t2).Value;
            tensorSizes.Add(size);
            paramDimension += size;
        }


        optimizer = parametersNE.optimizerType == ESOptimizer.ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES();

        samples = new OptimizationSample[parametersNE.populationSize];
        for (int i = 0; i < parametersNE.populationSize; ++i)
        {
            samples[i] = new OptimizationSample(paramDimension);
        }

        //initialize the optimizer
        optimizer.init(paramDimension, parametersNE.populationSize, new double[paramDimension], parametersNE.initialStepSize, parametersNE.mode);

        if (continueFromCheckpoint)
        {
            if (!LoadNEDataFromFile())
            {
                optimizer.generateSamples(samples);
            }
        }
        else
        {
            optimizer.generateSamples(samples);
        }
        if (isTraining)
        {
            SetWeights(samples[currentEvaluationIndex]);
        }
        else if (bestSample != null)
        {
            SetWeights(bestSample);
        }

        stats = new StatsLogger();
    }
    private void FixedUpdate()
    {
        if (!isOptimizing)
        {
            return;
        }

        if (trainer.GetStep() >= evaluationSteps)
        {
            //set the objective function value to the samples
            var rewards = trainer.stats.GetStat("accumulatedRewards");

            float aveRewards = 0;
            for (int i = 0; i < evaluationLastRewardsNum; ++i)
            {
                aveRewards += rewards[rewards.Count - 1 - i].Item2;
            }
            aveRewards = aveRewards / evaluationLastRewardsNum;
            samples[currentEvaluationIndex].objectiveFuncVal = aveRewards;

            //reset stuff
            currentEvaluationIndex++;
            Current.K.try_initialize_variables(false);
            trainer.ResetTrainer();
            if (currentEvaluationIndex < populationSize)
            {
                SaveToFile();
                SetWeights(samples[currentEvaluationIndex]);
            }
        }

        if (currentEvaluationIndex >= populationSize)
        {
            optimizer.update(samples);//update the optimizer

            currentGeneration++;
            currentEvaluationIndex = 0;
            optimizer.generateSamples(samples);//generate new samples

            if (Best == null)
            {
                Best   = new OptimizationSample();
                Best.x = optimizer.getBest();
                Best.objectiveFuncVal = optimizer.getBestObjectiveFuncValue();
            }
            else if ((mode == OptimizationModes.maximize && Best.objectiveFuncVal < optimizer.getBestObjectiveFuncValue()) ||
                     (mode == OptimizationModes.minimize && Best.objectiveFuncVal > optimizer.getBestObjectiveFuncValue()))
            {
                Best.x = optimizer.getBest();
                Best.objectiveFuncVal = optimizer.getBestObjectiveFuncValue();
            }

            SaveToFile();

            SetWeights(samples[currentEvaluationIndex]);//set weight for the first sample
        }
    }
    public override void UpdateModel()
    {
        float aveRewards = 0;

        for (int i = 0; i < rewardsOfCurrentChild.Count; ++i)
        {
            aveRewards += rewardsOfCurrentChild[i];
        }
        aveRewards = aveRewards / rewardsOfCurrentChild.Count;
        rewardsOfCurrentChild.Clear();
        stats.AddData("accumulatedRewards", aveRewards);


        samples[currentEvaluationIndex].objectiveFuncVal = aveRewards;


        currentEvaluationIndex++;

        //reset stuff
        if (currentEvaluationIndex < parametersNE.populationSize)
        {
            SetWeights(samples[currentEvaluationIndex]);
        }

        if (currentEvaluationIndex >= parametersNE.populationSize)
        {
            optimizer.update(samples);//update the optimizer

            currentGeneration++;
            currentEvaluationIndex = 0;
            optimizer.generateSamples(samples);//generate new samples

            if (bestSample == null)
            {
                bestSample   = new OptimizationSample();
                bestSample.x = optimizer.getBest();
                bestSample.objectiveFuncVal = optimizer.getBestObjectiveFuncValue();
            }
            else if ((parametersNE.mode == OptimizationModes.maximize && bestSample.objectiveFuncVal < optimizer.getBestObjectiveFuncValue()) ||
                     (parametersNE.mode == OptimizationModes.minimize && bestSample.objectiveFuncVal > optimizer.getBestObjectiveFuncValue()))
            {
                bestSample.x = optimizer.getBest();
                bestSample.objectiveFuncVal = optimizer.getBestObjectiveFuncValue();
            }
            SetWeights(samples[currentEvaluationIndex]);//set weight for the first sample
        }

        //reset all agents
        var agentList = agentsRewards.Keys;

        foreach (var agent in agentList)
        {
            agent.AgentReset();
            agentsRewards[agent].Clear();
        }
    }
    /// <summary>
    /// set the weights using an optimizatoin sample
    /// </summary>
    /// <param name="sample"></param>
    protected void SetWeights(OptimizationSample sample)
    {
        float[] floatValues       = Array.ConvertAll(sample.x, (t) => (float)t);
        int     currentStartIndex = 0;

        for (int i = 0; i < tensorsToOptimize.Count; ++i)
        {
            Current.K.set_value(tensorsToOptimize[i], SubArray(floatValues, currentStartIndex, tensorSizes[i]));
            currentStartIndex += tensorSizes[i];
        }
    }
Esempio n. 7
0
 void Start()
 {
     //Init optimization
     opt.init(nVariables, opt.recommendedPopulationSize(nVariables), new double[nVariables], 1, OptimizationModes.minimize);
     //allocate container for sample vectors
     samples = new OptimizationSample[opt.recommendedPopulationSize(nVariables)];
     for (int i = 0; i < samples.Length; i++)
     {
         samples[i] = new OptimizationSample(nVariables);
     }
 }
    /// <summary>
    /// Restore the training checkpoint from byte array. . Use <see cref="SaveNECheckpoint()"/> to restore from it.
    /// </summary>
    /// <param name="data"></param>
    public virtual void RestoreNECheckpoint(byte[] data)
    {
        //deserialize the data
        var mStream      = new MemoryStream(data);
        var binFormatter = new BinaryFormatter();
        var restoredData = (EvolutionData)binFormatter.Deserialize(mStream);

        samples                = restoredData.samples;
        currentGeneration      = restoredData.currentGeneration;
        currentEvaluationIndex = restoredData.currentEvaluationIndex;
        bestSample             = restoredData.bestSample;
    }
Esempio n. 9
0
    void OnGUI()
    {
        GUILayout.BeginVertical();
        GUILayout.BeginHorizontal();
        if (uiState == UIState.idle)
        {
            if (GUILayout.Button("Optimize", GUILayout.Width(100)))
            {
                //init optimization
                samples = new OptimizationSample[populationSize];
                for (int i = 0; i < populationSize; i++)
                {
                    samples[i] = new OptimizationSample(2);
                }
                opt.init(2, populationSize, new double[2] {
                    0, 0
                }, 1, OptimizationModes.maximize);
                iter = 0;
                //update state
                uiState = UIState.optimizing;
                Physics.autoSimulation = false;
            }
        }
        else
        {
            if (GUILayout.Button("Shoot", GUILayout.Width(100)))
            {
                endOptimization();
            }
        }
        gameSystem.rewardShaping = GUILayout.Toggle(gameSystem.rewardShaping, "Reward Shaping", GUILayout.Width(200));
        GUILayout.Label("Predicted score: " + gameSystem.predictedShotScore);
        GUILayout.EndHorizontal();
        GUILayout.BeginHorizontal();
        GUILayout.Label("Max Iter:", GUILayout.Width(100));
        maxIter = (int)GUILayout.HorizontalSlider(maxIter, 1, 50, GUILayout.Width(50));
        GUILayout.Label(maxIter.ToString(), GUILayout.Width(50));
        GUILayout.EndHorizontal();
        GUILayout.BeginHorizontal();
        GUILayout.Label("Population size:", GUILayout.Width(100));
        populationSize = (int)GUILayout.HorizontalSlider(populationSize, 1, 100, GUILayout.Width(50));
        GUILayout.Label(populationSize.ToString());

        GUILayout.EndHorizontal();
        GUILayout.EndVertical();
    }
    public virtual void RestoreCheckpoint(byte[] data)
    {
        //deserialize the data
        var    mStream      = new MemoryStream(data);
        var    binFormatter = new BinaryFormatter();
        object deserizlied  = binFormatter.Deserialize(mStream);
        var    restoredData = deserizlied as EvolutionData;

        if (restoredData == null)
        {
            Debug.LogError("loaded data error");
        }
        else
        {
            samples                = restoredData.samples;
            currentGeneration      = restoredData.currentGeneration;
            currentEvaluationIndex = restoredData.currentEvaluationIndex;
            Best = restoredData.best;
        }
    }
Esempio n. 11
0
    /// <summary>
    /// Optimize and return the solution immediately.
    /// </summary>
    /// <param name="optimizeTarget">Target to optimize</param>
    /// <param name="initialMean">initial mean guess.</param>
    /// <returns>The best solution found</returns>
    public double[] Optimize(IESOptimizable optimizeTarget, double[] initialMean = null)
    {
        var tempOptimizer = (optimizerType == ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES());

        var tempSamples = new OptimizationSample[populationSize];

        for (int i = 0; i < populationSize; ++i)
        {
            tempSamples[i] = new OptimizationSample(optimizeTarget.GetParamDimension());
        }

        //initial mean
        double[] actualInitMean = null;
        if (initialMean != null && initialMean.Length != optimizeTarget.GetParamDimension())
        {
            Debug.LogError("Init mean has a wrong dimension " + initialMean.Length + " rather than " + optimizeTarget.GetParamDimension() + ".");
        }
        if (initialMean == null)
        {
            actualInitMean = new double[optimizeTarget.GetParamDimension()];
        }
        else
        {
            actualInitMean = initialMean;
        }

        //initialize the optimizer
        tempOptimizer.init(optimizeTarget.GetParamDimension(), populationSize, actualInitMean, initialStepSize, mode);

        //iteration
        double[] bestParams = null;

        //bool hasInvokeReady = false;
        iteration = 0;
        for (int it = 0; it < maxIteration; ++it)
        {
            tempOptimizer.generateSamples(tempSamples);
            for (int s = 0; s <= tempSamples.Length / evaluationBatchSize; ++s)
            {
                List <double[]> paramList = new List <double[]>();
                for (int b = 0; b < evaluationBatchSize; ++b)
                {
                    int ind = s * evaluationBatchSize + b;
                    if (ind < tempSamples.Length)
                    {
                        paramList.Add(tempSamples[ind].x);
                    }
                }

                var values = optimizeTarget.Evaluate(paramList);

                for (int b = 0; b < evaluationBatchSize; ++b)
                {
                    int ind = s * evaluationBatchSize + b;
                    if (ind < tempSamples.Length)
                    {
                        tempSamples[ind].objectiveFuncVal = values[b];
                    }
                }
            }

            tempOptimizer.update(tempSamples);
            BestScore = tempOptimizer.getBestObjectiveFuncValue();

            iteration++;
            bestParams = tempOptimizer.getBest();

            if ((BestScore <= targetValue && mode == OptimizationModes.minimize) ||
                (BestScore >= targetValue && mode == OptimizationModes.maximize))
            {
                //optimizatoin is done

                /*if (onReady != null)
                 * {
                 *  onReady.Invoke(bestParams);
                 *  hasInvokeReady = true;
                 * }*/
                break;
            }
        }

        /*if (onReady != null && !hasInvokeReady)
         * {
         *  onReady.Invoke(bestParams);
         * }*/
        return(bestParams);
    }