Exemple #1
0
    /***** * * * * * CEM Helpers * * * * * *****/
    private Layers TrainRearAxelBicycleModel()
    {
        Int32 numElite = (Int32)(cemBatchSize * cemEliteFrac);

        // Initialize our base set of layers.
        // This value will be iterated on each simulation iteration and
        // is the basis for the current iteration's batches
        Layers bestLayers = new Layers();

        bestLayers.Init(inputSize, hiddenSize, outputSize);
        bestLayers.SetStdDev(cemInitStddev);
        bestLayers.SetMean(0);

        // Run through our CEM
        for (Int32 i = 0; i < cemIterations; i++)
        {
            // Run batches based off of our current 'bestLayers' and store them
            // on order to find the elite results.
            List <Layers> batchLayers  = new List <Layers>(cemBatchSize);
            List <double> batchRewards = new List <double>(cemBatchSize);
            for (Int32 j = 0; j < cemBatchSize; j++)
            {
                // Init a set of layers off of our current best set.
                Layers batchLayer = new Layers();
                batchLayer.Init(bestLayers, (float)cemInitStddev);

                // Calculate the reward for this batch and store it.
                batchRewards.Add(Reward(batchLayer));
                batchLayers.Add(batchLayer);
            }

            double meanReward = batchRewards.Average();

            // We now have our run-throughs, keep the best elite and set our layers back up.
            Int32[] indexArray = batchRewards
                                 .Select((value, index) => new { value, index })
                                 .OrderByDescending(item => item.value)
                                 .Take(numElite)
                                 .Select(item => item.index)
                                 .ToArray();

            // Store the mean and stdDev of our elite layers into our best layer.
            BatchMeanAndStdDev(in batchLayers, in indexArray, ref bestLayers);
            bestLayers.AddNoise(cemNoiseFactor / (i + 1));

            Debug.Log("Iteration: " + i + "\tmeanReward: " + meanReward + "\t Reward(CurrMatrix): " + Reward(bestLayers));
        }
        UnityEngine.Debug.Log("done");
        return(bestLayers);
    }