override public Tuple <int, int> GeneratePerceptAndUpdate() { int observation = Utils.RandomElement(this.Environment.ValidObservations); int reward = Utils.RandomElement(this.Environment.ValidRewards); return(new Tuple <int, int>(observation, reward)); }
public override Tuple <int, int> PerformAction(int action) { //note: here is little confussion because my actions are his observations and vice versa Debug.Assert(this.IsValidAction(action), "non-valid action used " + action); this.Action = action; //Bias in environment: if we won playing rock, we repeat it: if ((this.Observation == Rock) && (this.Reward == RLose)) { this.Observation = Rock; } else { this.Observation = Utils.RandomElement(this.ValidObservations); } if (action == this.Observation) { this.Reward = this.RDraw; } else if ((action == Rock && Observation == Paper) || (action == Paper && Observation == Scissors) || (action == Scissors && Observation == Rock)) {//Agent lost; env won this.Reward = RLose; } else {//Agent won this.Reward = RWin; } return(new Tuple <int, int>(this.Observation, this.Reward)); }
override public int Search() { return(Utils.RandomElement(this.Environment.ValidActions)); }
public int GenerateRandomReward() { return(Utils.RandomElement(this.Environment.ValidRewards)); }
public int GenerateRandomAction() { return(Utils.RandomElement(this.Environment.ValidActions)); }