public override double[] PerformAction(double[] action)
        {
            Debug.DLogHeader("SEASON TASK START", true);
            Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true);
            Debug.DLog($"{"Step:",-16} {_step}", true);
            double thisScore = 0;
            double[] observation = new double[OutputCount];
            int task = _step % 2; // 0 = reward / food step, 1 = eat step
            switch (task)
            {
                case 0:
                    Debug.DLog("Task: Reward / Food Step", true);
                    double eatVal = action[0];
                    thisScore = Evaluate(eatVal, (_step - 2) / 2); // we compare against the previos food, therefor - 2
                    observation = GetOutput(_step, thisScore);

                    if (!ScoreThisStep((_step - 2) / 2)) // no scoring here
                    {
                        thisScore = 0;
                    }
                    _score += thisScore;

                    Debug.DLog($"{"Eating:",-16} {eatVal}" +
                                $"\n{"Last Was Poisonous:",-16} {Sequence[(_step - 2) / 2].IsPoisonous}" +
                                $"\n{"Score:",-16} {thisScore.ToString("F4")}" +
                                $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 2)}" +
                                $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true);
                    break;
                case 1:
                    Debug.DLog("Task: Eat Step", true);
                    // send only 0s
                    break;
                default:
                    break;
            }

            Debug.DLogHeader("SEASON TASK END", true);

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, observation, thisScore);
            }

            _step++;
            return observation;
        }
        public override double[] PerformAction(double[] action)
        {
            Debug.DLogHeader("SEASON TASK START", true);
            Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true);
            Debug.DLog($"{"Step:",-16} {_step}", true);
            double thisEvaluation = 0;
            double[] observation;

            int task = _step % StepNum;
            if (task == StepNum - 1)
            {
                Debug.DLog($"Task: Reward Step", true);
                double eatVal = action[0];
                thisEvaluation = Evaluate(eatVal, (_step / StepNum));
                observation = GetOutput(_step, thisEvaluation);
                if (!ScoreThisStep((_step / StepNum)))
                {
                    thisEvaluation = 0;
                }
                _score += thisEvaluation;
                Debug.DLog($"{"Eating:",-16} {eatVal}" +
                           $"\n{"Poisonous:",-16} {Sequence[(_step / 3) - 1].IsPoisonous}" +
                           $"\n{"Score:",-16} {thisEvaluation.ToString("F4")}" +
                           $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 3) - 1}" +
                           $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true);
            }
            else
            {
                Debug.DLog($"Task: Normal Step {(_step / StepNum) - 1}", true);
                observation = GetOutput(_step, -1);
            }

            Debug.DLogHeader("SEASON TASK END", true);

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, observation, thisEvaluation);
            }

            _step++;
            return observation;
        }
Beispiel #3
0
 public void Record(EnvironmentTimeStep environmentTimeStep)
 {
     _recordedTimeSteps.Add(new TimeStep(environmentTimeStep));
 }
Beispiel #4
0
 public void Record(EnvironmentTimeStep environmentTimeStep, TuringMachineTimeStep turingTimeStep)
 {
     _recordedTimeSteps.Add(new TimeStep(environmentTimeStep, turingTimeStep));
 }
Beispiel #5
0
 public TimeStep(EnvironmentTimeStep environmentTimeStep)
 {
     EnvironmentTimeStep   = environmentTimeStep;
     TuringMachineTimeStep = default(TuringMachineTimeStep);
 }
Beispiel #6
0
 public TimeStep(EnvironmentTimeStep environmentTimeStep, TuringMachineTimeStep turingMachineTimeStep)
 {
     EnvironmentTimeStep   = environmentTimeStep;
     TuringMachineTimeStep = turingMachineTimeStep;
 }
        public override double[] PerformAction(double[] action)
        {
            double[] prev = _sequence[_step - 1];
            double target = prev[0] == 1d ^ prev[1] == 1d ? 1d : 0d;

            double thisScore = Absolute(target, action[0]);
            _score += thisScore;

            double[] result = GetObservation(_step);

            _step++;

            if (RecordTimeSteps)
            {
                _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore);
            }

            return result;
        }
Beispiel #8
0
 public TimeStep(EnvironmentTimeStep environmentTimeStep)
 {
     EnvironmentTimeStep = environmentTimeStep;
     TuringMachineTimeStep = default(TuringMachineTimeStep);
 }
Beispiel #9
0
 public TimeStep(EnvironmentTimeStep environmentTimeStep, TuringMachineTimeStep turingMachineTimeStep)
 {
     EnvironmentTimeStep = environmentTimeStep;
     TuringMachineTimeStep = turingMachineTimeStep;
 }
Beispiel #10
0
 public void Record(EnvironmentTimeStep environmentTimeStep)
 {
     _recordedTimeSteps.Add(new TimeStep(environmentTimeStep));
 }
Beispiel #11
0
 public void Record(EnvironmentTimeStep environmentTimeStep, TuringMachineTimeStep turingTimeStep)
 {
     _recordedTimeSteps.Add(new TimeStep(environmentTimeStep, turingTimeStep));
 }