コード例 #1
0
    void FixedUpdate()
    {
        int bestAction = expert.EvaluateAction(rMemory, threshold, samples);
        int lastAction = learner.CallAction(bestAction);

        expert.AddToMemory(CLAB);
        float predictionError = expert.PredictionError(memory, CLAB, lastAction);

        //Debug.Log("prediction Error" + predictionError);
        kga.AddToErrorMemory(predictionError);
        float meanError = kga.MeanError(eMemory, samples);
        //Debug.Log("mean Error" + meanError);
        float metaError = kga.MetaM(eMemory, samples, offset);
        //Debug.Log("meta Error" + meanError);
        float reward = kga.Reward(meanError, metaError);

        Debug.Log("reward" + reward * 10);
        expert.AddToRewardMemory(CLAB, reward * 10, lastAction);
    }
コード例 #2
0
    void FixedUpdate()
    {
        int bestAction = expert.EvaluateAction(rMemory, threshold, samples, availableAcions);

        sensorReading = ReadSensors(sensors);
        predictionModel.SensoryActionDataAtT.Add(double.Parse(sensorReading.ToString() + bestAction.ToString()));
        double prediction = expert.MakePredition(double.Parse(sensorReading.ToString() + bestAction.ToString()));

        availableAcions[bestAction].DoAction(CLAB);

        //now at t+1
        sensorReading = ReadSensors(sensors);
        predictionModel.SensoryDataAtTPlus1.Add(sensorReading);
        int lastAction = bestAction;

        float predictionError = expert.PredictionError(prediction, sensorReading);

        //Debug.Log("Prediction Error:" + predictionError);
        //Debug.Log("prediction Error" + predictionError);
        kga.AddToErrorMemory(predictionError);
        float meanError = kga.MeanError(eMemory, samples);
        //Debug.Log("mean Error" + meanError);
        float metaError = kga.MetaM(eMemory, samples, MetaMemOffset);
        //Debug.Log("meta Error" + meanError);
        float reward = kga.Reward(meanError, metaError);

        Debug.Log("Reward: " + reward * 10 + " To Action: " + lastAction);
        expert.AddToRewardMemory(reward * 10, lastAction);

        CullErrorMemory(eMemory, MaxMemorySamples);
        CullRewardMemory(rMemory, MaxMemorySamples);
        CullPredictionMemory(predictionModel.SensoryDataAtTPlus1, MaxMemorySamples);
        CullPredictionMemory(predictionModel.SensoryActionDataAtT, MaxMemorySamples);
        UpdateThreshold();
        // Debug.Log("New Threshold: " + threshold);
    }