public override double[] PerformAction(double[] action) { Debug.DLogHeader("SEASON TASK START", true); Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true); Debug.DLog($"{"Step:",-16} {_step}", true); double thisScore = 0; double[] observation = new double[OutputCount]; int task = _step % 2; // 0 = reward / food step, 1 = eat step switch (task) { case 0: Debug.DLog("Task: Reward / Food Step", true); double eatVal = action[0]; thisScore = Evaluate(eatVal, (_step - 2) / 2); // we compare against the previos food, therefor - 2 observation = GetOutput(_step, thisScore); if (!ScoreThisStep((_step - 2) / 2)) // no scoring here { thisScore = 0; } _score += thisScore; Debug.DLog($"{"Eating:",-16} {eatVal}" + $"\n{"Last Was Poisonous:",-16} {Sequence[(_step - 2) / 2].IsPoisonous}" + $"\n{"Score:",-16} {thisScore.ToString("F4")}" + $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 2)}" + $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true); break; case 1: Debug.DLog("Task: Eat Step", true); // send only 0s break; default: break; } Debug.DLogHeader("SEASON TASK END", true); if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, observation, thisScore); } _step++; return(observation); }
public override double[] PerformAction(double[] action) { var guess = GetXYCoordinates(action); var thisScore = Evaluate(guess, _targetCenters[_step - 1]); _score += thisScore; var result = _step >= _images.Length ? new double[XDim * YDim] : Flatten2DArray(_images[_step]); _step++; if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore); } return(result); }
public override double[] PerformAction(double[] action) { var thisScore = Evaluate(action, _sequence[_step - 1]); _score += thisScore; var result = _step >= _sequence.Length ? new double[OutputCount] : _sequence[_step]; _step++; if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore); } return(result); }
public override double[] PerformAction(double[] action) { Debug.DLogHeader("COPYTASK START", true); Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true); Debug.DLog($"{"Step:",-16} {_step}", true); double[] result = GetObservation(_step); double thisScore = 0; // Compare and score (if reading) if (_step >= Sequence.Length + 2 + 1) { // The controllers "action" is the reading after |seq| + 2 + 1 steps. // The +2 are the start and delimiter bits, and the +1 is the idle step after the delimiter, to allow for content jumping int index = _step - Sequence.Length - 2 - 1; // actual sequence index to compare to double[] correct = Sequence[index]; double[] received = action; thisScore = Evaluate(correct, received); _score += thisScore; if (NoveltySearch.ScoreNovelty && NoveltySearch.VectorMode == NoveltyVectorMode.EnvironmentAction) { // Register read as novelty behaviour NoveltySearch.NoveltyVectors[index] = action; } Debug.DLog($"{"Reading:",-16} {Utilities.ToString(received, "F2")}" + $"\n{"Actual:",-16} {Utilities.ToString(correct, "F2")}" + $"\n{"Score:",-16} {thisScore.ToString("F4")}" + $"\n{"Total Score:",-16} {_score.ToString("F4")} / {_step - Sequence.Length - 1}" + $"\n{"Max Score:",-16} {Sequence.Length.ToString("F0")}", true); } Debug.DLogHeader("COPYTASK END", true); if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore); } _step++; // Increment step return(result); }
public override double[] PerformAction(double[] action) { Debug.DLogHeader("SEASON TASK START", true); Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true); Debug.DLog($"{"Step:",-16} {_step}", true); double thisEvaluation = 0; double[] observation; int task = _step % StepNum; if (task == StepNum - 1) { Debug.DLog($"Task: Reward Step", true); double eatVal = action[0]; thisEvaluation = Evaluate(eatVal, (_step / StepNum)); observation = GetOutput(_step, thisEvaluation); if (!ScoreThisStep((_step / StepNum))) { thisEvaluation = 0; } _score += thisEvaluation; Debug.DLog($"{"Eating:",-16} {eatVal}" + $"\n{"Poisonous:",-16} {Sequence[(_step / 3) - 1].IsPoisonous}" + $"\n{"Score:",-16} {thisEvaluation.ToString("F4")}" + $"\n{"Total Score:",-16} {_score.ToString("F4")} / {(_step / 3) - 1}" + $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true); } else { Debug.DLog($"Task: Normal Step {(_step / StepNum) - 1}", true); observation = GetOutput(_step, -1); } Debug.DLogHeader("SEASON TASK END", true); if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, observation, thisEvaluation); } _step++; return(observation); }
public override double[] PerformAction(double[] action) { double[] prev = _sequence[_step - 1]; double target = prev[0] == 1d ^ prev[1] == 1d ? 1d : 0d; double thisScore = Absolute(target, action[0]); _score += thisScore; double[] result = GetObservation(_step); _step++; if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, result, thisScore); } return(result); }
public override double[] PerformAction(double[] action) { Debug.DLogHeader("SEASON TASK START", true); Debug.DLog($"{"Action:",-16} {Utilities.ToString(action, "f4")}", true); Debug.DLog($"{"Step:",-16} {_step}", true); double thisScore = 0; double[] observation = new double[OutputCount]; Debug.DLog("Task: Reward / Food Step", true); double eatVal = action[0]; thisScore = Evaluate(eatVal, _step - 1); observation = GetOutput(_step, thisScore); if (!ScoreThisStep(_step)) // no scoring here { thisScore = 0; } _score += thisScore; Debug.DLog($"{"Eating:",-16} {eatVal}" + $"\n{"Last Was Poisonous:",-16} {Sequence[_step - 1].IsPoisonous}" + $"\n{"Score:",-16} {thisScore.ToString("F4")}" + $"\n{"Total Score:",-16} {_score.ToString("F4")} / {_step}" + $"\n{"Max Score:",-16} {Sequence.Length.ToString("F4")}", true); Debug.DLogHeader("SEASON TASK END", true); if (RecordTimeSteps) { _prevTimeStep = new EnvironmentTimeStep(action, observation, thisScore); } _step++; return(observation); }
public override double[] PerformAction(double[] action) { var move = GetUnitVector(action); var targetVector = GetTargetVector(_currentTarget); var thisScore = new Tuple <double, double>(0, 0).Equals(move) ? 0.0 : GetCosineSimilarity(move, targetVector) + 1; //Score between 0 and 2 var newPosition = AddVectors(_agentPosition, move); if (thisScore == Double.NaN) { throw new Exception(); } _currentScore += thisScore; _agentPosition = newPosition; MaybeChangeDirection(); var result = CalculateEnvironmentOutput(_agentPosition); if (RecordTimeSteps) { _previousTimeStep = new EnvironmentTimeStep(action, result, thisScore); } _step++; return(result); }