public override void Initialize()
    {
        modeNE = modelRef as INeuralEvolutionModel;
        Debug.Assert(modeNE != null, "Please assign a INeuralEvolutionModel to modelRef");
        parametersNE = parameters as TrainerParamsNeuralEvolution;
        Debug.Assert(parametersNE != null, "Please Specify TrainerNeuralEvolution Trainer Parameters");
        Debug.Assert(BrainToTrain != null, "brain can not be null");

        modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters);

        agentsRewards         = new Dictionary <Agent, List <float> >();
        rewardsOfCurrentChild = new List <float>();


        tensorsToOptimize = modeNE.GetWeightsForNeuralEvolution();
        paramDimension    = 0;
        foreach (var t in tensorsToOptimize)
        {
            int size = t.shape.Aggregate((t1, t2) => t1 * t2).Value;
            tensorSizes.Add(size);
            paramDimension += size;
        }


        optimizer = parametersNE.optimizerType == ESOptimizer.ESOptimizerType.LMMAES ? (IMAES) new LMMAES() : (IMAES) new MAES();

        samples = new OptimizationSample[parametersNE.populationSize];
        for (int i = 0; i < parametersNE.populationSize; ++i)
        {
            samples[i] = new OptimizationSample(paramDimension);
        }

        //initialize the optimizer
        optimizer.init(paramDimension, parametersNE.populationSize, new double[paramDimension], parametersNE.initialStepSize, parametersNE.mode);

        if (continueFromCheckpoint)
        {
            if (!LoadNEDataFromFile())
            {
                optimizer.generateSamples(samples);
            }
        }
        else
        {
            optimizer.generateSamples(samples);
        }
        if (isTraining)
        {
            SetWeights(samples[currentEvaluationIndex]);
        }
        else if (bestSample != null)
        {
            SetWeights(bestSample);
        }

        stats = new StatsLogger();
    }
Example #2
0
    public override void Initialize()
    {
        modelSL = modelRef as ISupervisedLearningModel;
        Debug.Assert(modelSL != null, "Please assign a ISupervisedLearningModel to modelRef");
        Debug.Assert(BrainToTrain != null, "brain can not be null");
        parametersMimic = parameters as TrainerParamsMimic;
        Debug.Assert(parametersMimic != null, "Please Specify PPO Trainer Parameters");
        stats = new StatsLogger();
        modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters);

        var brainParameters = BrainToTrain.brainParameters;

        //intialize data buffer
        Debug.Assert(brainParameters.vectorActionSize.Length <= 1, "Action branching is not supported yet");
        List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>()
        {
            new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : 1 })
        };

        if (brainParameters.vectorObservationSize > 0)
        {
            allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations }));
        }

        for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i)
        {
            int width  = brainParameters.cameraResolutions[i].width;
            int height = brainParameters.cameraResolutions[i].height;
            int channels;
            if (brainParameters.cameraResolutions[i].blackAndWhite)
            {
                channels = 1;
            }
            else
            {
                channels = 3;
            }

            allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels }));
        }
        allBufferData.Add(new DataBuffer.DataInfo("Reward", typeof(float), new int[] { 1 }));

        dataBuffer = new DataBuffer(parametersMimic.maxBufferSize, allBufferData.ToArray());

        if (continueFromCheckpoint)
        {
            LoadModel();
        }
        if (loadTrainingDataFromCheckpoint)
        {
            LoadTrainingData();
        }
    }
    public override void Initialize()
    {
        iModelPPO = modelRef as IRLModelPPO;
        Debug.Assert(iModelPPO != null, "Please assign a model that implement interface IRLModelPPO to modelRef");
        parametersPPO = parameters as TrainerParamsPPO;
        Debug.Assert(parametersPPO != null, "Please Specify PPO Trainer Parameters");
        Debug.Assert(BrainToTrain != null, "brain can not be null");


        //initialize all data buffers
        statesEpisodeHistory      = new Dictionary <Agent, List <float> >();
        rewardsEpisodeHistory     = new Dictionary <Agent, List <float> >();
        actionsEpisodeHistory     = new Dictionary <Agent, List <float> >();
        actionprobsEpisodeHistory = new Dictionary <Agent, List <float> >();
        valuesEpisodeHistory      = new Dictionary <Agent, List <float> >();
        visualEpisodeHistory      = new Dictionary <Agent, List <List <float[, , ]> > >();
        actionMasksEpisodeHistory = new Dictionary <Agent, List <List <float> > >();

        accumulatedRewards = new Dictionary <Agent, float>();
        episodeSteps       = new Dictionary <Agent, int>();


        var brainParameters = BrainToTrain.brainParameters;

        Debug.Assert(brainParameters.vectorActionSize.Length > 0, "Action size can not be zero. Please set it in the brain");
        List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>()
        {
            new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }),
            new DataBuffer.DataInfo("ActionProb", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }),
            new DataBuffer.DataInfo("TargetValue", typeof(float), new int[] { 1 }),
            new DataBuffer.DataInfo("OldValue", typeof(float), new int[] { 1 }),
            new DataBuffer.DataInfo("Advantage", typeof(float), new int[] { 1 })
        };

        if (brainParameters.vectorObservationSize > 0)
        {
            allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations }));
        }

        for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i)
        {
            int width  = brainParameters.cameraResolutions[i].width;
            int height = brainParameters.cameraResolutions[i].height;
            int channels;
            if (brainParameters.cameraResolutions[i].blackAndWhite)
            {
                channels = 1;
            }
            else
            {
                channels = 3;
            }

            allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels }));
        }

        if (brainParameters.vectorActionSpaceType == SpaceType.discrete)
        {
            for (int i = 0; i < brainParameters.vectorActionSize.Length; ++i)
            {
                allBufferData.Add(new DataBuffer.DataInfo("ActionMask" + i, typeof(float), new int[] { brainParameters.vectorActionSize[i] }));
            }
        }

        dataBuffer = new DataBuffer(allBufferData.ToArray());

        //initialize loggers and neuralnetowrk model
        stats = new StatsLogger();

        modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters);
        if (continueFromCheckpoint)
        {
            LoadModel();
        }
    }