public override Tuple <int, int> PerformAction(int action) { Debug.Assert(this.IsValidAction(action), "non-valid action used " + action); this.Action = action; if (Utils.ProbabilisticDecision(this._probability)) { this.Observation = this.OHead; } else { this.Observation = this.OTail; } if (action == this.Observation) { this.Reward = this.RWin; } else { this.Reward = this.RLose; } return(new Tuple <int, int>(this.Observation, this.Reward)); }
public override Tuple <int, int> PerformAction(int action) { Debug.Assert(this.IsValidAction(action), "non-valid action used " + action); this.Action = action; this.Observation = ONull; this.Reward = RInvalid; if (action == AListen && this.sitting) { if (Utils.ProbabilisticDecision(this.listen_accuracy)) { this.Observation = this.tiger; } else { this.Observation = this.gold; } this.Reward = RListen; } else if (action == ALeft && !this.sitting) { if (this.tiger == OLeft) { this.Reward = RTiger; } else { this.Reward = RGold; } this.reset(); } else if (action == ARight && !this.sitting) { if (this.tiger == ORight) { this.Reward = RTiger; } else { this.Reward = RGold; } this.reset(); } else if (action == AStand && this.sitting) { this.Reward = this.RStand; this.sitting = false; //observation stays null } return(new Tuple <int, int>(this.Observation, this.Reward)); }
public void place_tiger() { if (Utils.ProbabilisticDecision(0.5)) //if (this.myrnd.NextDouble() < 0.5) { this.Tiger = OLeft; this.Gold = ORight; } else { this.Tiger = ORight; this.Gold = OLeft; } }
public void reset() { // puts tiger and gold to random place, and agent to seat // observation/Reward are not changed if (Utils.ProbabilisticDecision(0.5)) { this.tiger = ORight; this.gold = OLeft; } else { this.tiger = OLeft; this.gold = ORight; } this.sitting = true; }
public CoinFlip(Dictionary <string, string> options) : base(options) { ValidActions = (int[])Enum.GetValues(typeof(ActionsEnum)); ValidObservations = (int[])Enum.GetValues(typeof(ObservationsEnum)); ValidRewards = (int[])Enum.GetValues(typeof(RewardEnum)); base.fill_out_bits(); //todo: OPTIONS -> set probability this._probability = _defaultProbability; Debug.Assert(this._probability >= 0 && this._probability <= 1, "probability is set outside [0,1]"); if (Utils.ProbabilisticDecision(this._probability)) { this.Observation = this.OHead; } else { this.Observation = this.OTail; } this.Reward = 0; }
public override Tuple <int, int> PerformAction(int action) { Debug.Assert(this.IsValidAction(action)); this.Action = action; if (action == AListen) { this.Reward = this.RListen; //if (this.myrnd.NextDouble() < this.listen_accuracy) if (Utils.ProbabilisticDecision(this.ListenAccuracy)) { this.Observation = this.Tiger; } else { this.Observation = this.Gold; } } else { if ((action == ALeft && Tiger == OLeft) || (action == ARight && Tiger == ORight)) { this.Reward = REaten; } else { this.Reward = RGold; } this.Observation = ONull; this.place_tiger(); } return(new Tuple <int, int>(this.Observation, this.Reward)); }