コード例 #1
0
    public override void ResetTrainer()
    {
        base.ResetTrainer();

        var agents = statesEpisodeHistory.Keys;

        stats.ClearAll();
        statesEpisodeHistory.Clear();
        rewardsEpisodeHistory.Clear();
        actionprobsEpisodeHistory.Clear();
        actionsEpisodeHistory.Clear();
        valuesEpisodeHistory.Clear();
        accumulatedRewards.Clear();
        episodeSteps.Clear();
        dataBuffer.ClearData();
        foreach (var agent in agents)
        {
            if (agent)
            {
                agent.AgentReset();
            }
        }

        goodEpisodeHistory.Clear();
        policyTrainEpisodeHistory.Clear();


        if (exampleEpisodes != null)
        {
            goodEpisodeHistory = LoadHistory(exampleEpisodes.bytes);
        }
    }
コード例 #2
0
    public void SaveHistory(SortedRawHistory history, string savefilename)
    {
        if (string.IsNullOrEmpty(savefilename))
        {
            Debug.Log("savefilename empty. history not saved.");
            return;
        }
        //serailzie the data and save it to path
        var binFormatter = new BinaryFormatter();
        var mStream      = new MemoryStream();

        binFormatter.Serialize(mStream, history);
        byte[] data     = mStream.ToArray();
        var    fullPath = Path.GetFullPath(Path.Combine(checkpointPath, savefilename));

        fullPath = fullPath.Replace('/', Path.DirectorySeparatorChar);
        fullPath = fullPath.Replace('\\', Path.DirectorySeparatorChar);

        Directory.CreateDirectory(Path.GetDirectoryName(fullPath));
        File.WriteAllBytes(fullPath, data);
        Debug.Log("history saved to " + fullPath);
    }
コード例 #3
0
    public override void Initialize()
    {
        iModelHPPO = modelRef as IRLModelHPPO;
        Debug.Assert(iModelHPPO != null, "Please assign a model that implement interface IRLModelHPPO to modelRef");
        parametersPPO = parameters as TrainerParamsPPO;
        Debug.Assert(parametersPPO != null, "Please Specify PPO Trainer Parameters");
        Debug.Assert(BrainToTrain != null, "brain can not be null");


        //initialize all data buffers
        statesEpisodeHistory      = new Dictionary <Agent, List <float> >();
        rewardsEpisodeHistory     = new Dictionary <Agent, List <float> >();
        actionsEpisodeHistory     = new Dictionary <Agent, List <float> >();
        actionprobsEpisodeHistory = new Dictionary <Agent, List <float> >();
        valuesEpisodeHistory      = new Dictionary <Agent, List <float> >();
        visualEpisodeHistory      = new Dictionary <Agent, List <List <float[, , ]> > >();
        actionMasksEpisodeHistory = new Dictionary <Agent, List <List <float> > >();

        accumulatedRewards = new Dictionary <Agent, float>();
        episodeSteps       = new Dictionary <Agent, int>();


        var brainParameters = BrainToTrain.brainParameters;

        Debug.Assert(brainParameters.vectorActionSize.Length > 0, "Action size can not be zero. Please set it in the brain");
        List <DataBuffer.DataInfo> allBufferData = new List <DataBuffer.DataInfo>()
        {
            new DataBuffer.DataInfo("Action", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }),
            new DataBuffer.DataInfo("ActionProb", typeof(float), new int[] { brainParameters.vectorActionSpaceType == SpaceType.continuous ? brainParameters.vectorActionSize[0] : brainParameters.vectorActionSize.Length }),
            new DataBuffer.DataInfo("TargetValue", typeof(float), new int[] { 1 }),
            new DataBuffer.DataInfo("OldValue", typeof(float), new int[] { 1 }),
            new DataBuffer.DataInfo("Advantage", typeof(float), new int[] { 1 })
        };

        if (brainParameters.vectorObservationSize > 0)
        {
            allBufferData.Add(new DataBuffer.DataInfo("VectorObservation", typeof(float), new int[] { brainParameters.vectorObservationSize *brainParameters.numStackedVectorObservations }));
        }

        for (int i = 0; i < brainParameters.cameraResolutions.Length; ++i)
        {
            int width  = brainParameters.cameraResolutions[i].width;
            int height = brainParameters.cameraResolutions[i].height;
            int channels;
            if (brainParameters.cameraResolutions[i].blackAndWhite)
            {
                channels = 1;
            }
            else
            {
                channels = 3;
            }

            allBufferData.Add(new DataBuffer.DataInfo("VisualObservation" + i, typeof(float), new int[] { height, width, channels }));
        }

        if (brainParameters.vectorActionSpaceType == SpaceType.discrete)
        {
            for (int i = 0; i < brainParameters.vectorActionSize.Length; ++i)
            {
                allBufferData.Add(new DataBuffer.DataInfo("ActionMask" + i, typeof(float), new int[] { brainParameters.vectorActionSize[i] }));
            }
        }

        dataBuffer                = new DataBuffer(allBufferData.ToArray());
        policyTrainBuffer         = new DataBuffer(allBufferData.ToArray());
        tempGoodTrainBuffer       = new DataBuffer(allBufferData.ToArray());
        policyTrainEpisodeHistory = new SortedRawHistory();
        goodEpisodeHistory        = new SortedRawHistory(goodHistoryCount);
        //exampleHistory = null;

        if (exampleEpisodes != null)
        {
            goodEpisodeHistory = LoadHistory(exampleEpisodes.bytes);
        }
        //initialize loggers and neuralnetowrk model
        stats = new StatsLogger();

        modelRef.Initialize(BrainToTrain.brainParameters, isTraining, parameters);
        if (continueFromCheckpoint)
        {
            LoadModel();
        }
    }