public override double[] PerformAction(double[] action) { Debug.DLogHeader("SEASON TASK START", true); Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true); Debug.DLog($"{"Step:",-16} {_step}", true); double thisScore = 0; double[] observation = new double[OutputCount]; int task = _step % 2; // 0 = reward / food step, 1 = eat step switch (task) { case 0: Debug.DLog("Task: Reward / Food Step", true); double eatVal = action[0]; thisScore = Evaluate(eatVal, (_step - 2) / 2); // we compare against the previos food, therefor - 2 observation = GetOutput(_step, thisScore); if (!ScoreThisStep((_step - 2) / 2)) // no scoring here { thisScore = 0; } _score += thisScore; Debug.DLog($"{"Eating:",-16} {eatVal}" + $"\n{"Last Was Poisonous:",-16} {Sequence[(_step - 2) / 2].IsPoisonous}" + $"\n{"Score:",-16} {thisScore.ToString("F4")}" + $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 2)}" + $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true); break; case 1: Debug.DLog("Task: Eat Step", true); // send only 0s break; default: break; } Debug.DLogHeader("SEASON TASK END", true); if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, observation, thisScore); } _step++; return observation; }
public override double[] PerformAction(double[] action) { Debug.DLogHeader("SEASON TASK START", true); Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true); Debug.DLog($"{"Step:",-16} {_step}", true); double thisEvaluation = 0; double[] observation; int task = _step % StepNum; if (task == StepNum - 1) { Debug.DLog($"Task: Reward Step", true); double eatVal = action[0]; thisEvaluation = Evaluate(eatVal, (_step / StepNum)); observation = GetOutput(_step, thisEvaluation); if (!ScoreThisStep((_step / StepNum))) { thisEvaluation = 0; } _score += thisEvaluation; Debug.DLog($"{"Eating:",-16} {eatVal}" + $"\n{"Poisonous:",-16} {Sequence[(_step / 3) - 1].IsPoisonous}" + $"\n{"Score:",-16} {thisEvaluation.ToString("F4")}" + $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 3) - 1}" + $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true); } else { Debug.DLog($"Task: Normal Step {(_step / StepNum) - 1}", true); observation = GetOutput(_step, -1); } Debug.DLogHeader("SEASON TASK END", true); if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, observation, thisEvaluation); } _step++; return observation; }
public void Record(EnvironmentTimeStep environmentTimeStep) { _recordedTimeSteps.Add(new TimeStep(environmentTimeStep)); }
public void Record(EnvironmentTimeStep environmentTimeStep, TuringMachineTimeStep turingTimeStep) { _recordedTimeSteps.Add(new TimeStep(environmentTimeStep, turingTimeStep)); }
public TimeStep(EnvironmentTimeStep environmentTimeStep) { EnvironmentTimeStep = environmentTimeStep; TuringMachineTimeStep = default(TuringMachineTimeStep); }
public TimeStep(EnvironmentTimeStep environmentTimeStep, TuringMachineTimeStep turingMachineTimeStep) { EnvironmentTimeStep = environmentTimeStep; TuringMachineTimeStep = turingMachineTimeStep; }
public override double[] PerformAction(double[] action) { double[] prev = _sequence[_step - 1]; double target = prev[0] == 1d ^ prev[1] == 1d ? 1d : 0d; double thisScore = Absolute(target, action[0]); _score += thisScore; double[] result = GetObservation(_step); _step++; if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore); } return result; }