private bool WriteToFile(QState qstate, int actionNumber) { try { using (Stream count = GetDataFile("count.dat")) { var data = count; using (var tw = new StreamWriter(count)) { data.CopyTo(tw.BaseStream); tw.WriteLine((int)qstate.DistanceToEnemy + " " + (int)qstate.EnemyEnergy + " " + (int)qstate.EnemyVelocity + " " + (int)qstate.OurEnergy + " " + (int)qstate.OurVelocity + " " + actionNumber); } } } catch (Exception e) { return(false); } return(true); }
public void Learn(QState prevState, QState currState, int actionNubmer, double reward) { var qVal = QFunction.Q[(int)prevState.DistanceToEnemy, (int)prevState.EnemyEnergy, (int)prevState.EnemyVelocity, (int)prevState.OurEnergy, (int)prevState.OurVelocity, actionNubmer]; var valuesForAnyActionCurrState = new double[] { QFunction.Q[(int)currState.DistanceToEnemy, (int)currState.EnemyEnergy, (int)currState.EnemyVelocity, (int)currState.OurEnergy, (int)currState.OurVelocity, 0], QFunction.Q[(int)currState.DistanceToEnemy, (int)currState.EnemyEnergy, (int)currState.EnemyVelocity, (int)currState.OurEnergy, (int)currState.OurVelocity, 1], QFunction.Q[(int)currState.DistanceToEnemy, (int)currState.EnemyEnergy, (int)currState.EnemyVelocity, (int)currState.OurEnergy, (int)currState.OurVelocity, 2], QFunction.Q[(int)currState.DistanceToEnemy, (int)currState.EnemyEnergy, (int)currState.EnemyVelocity, (int)currState.OurEnergy, (int)currState.OurVelocity, 3], QFunction.Q[(int)currState.DistanceToEnemy, (int)currState.EnemyEnergy, (int)currState.EnemyVelocity, (int)currState.OurEnergy, (int)currState.OurVelocity, 4] }; var maxQVal = valuesForAnyActionCurrState.Max(); var newQVal = qVal + alpha * (reward + gamma * maxQVal - qVal); QFunction.Q[(int)prevState.DistanceToEnemy, (int)prevState.EnemyEnergy, (int)prevState.EnemyVelocity, (int)prevState.OurEnergy, (int)prevState.OurVelocity, actionNubmer] = newQVal; }
private void UpdateValue(QState previousState, double maxDistance) { var numbers = Enumerable.Range(0, buckets.Max()).ToList(); var doubleNubmers = numbers.Select(x => x + 0.0001).ToList(); if (doubleNubmers.Contains(DistanceToEnemy)) { DistanceToEnemy = previousState.DistanceToEnemy; } else { DistanceToEnemy = DiscretiseData(maxDistance, DistanceToEnemy, 0); } if (doubleNubmers.Contains(EnemyEnergy)) { EnemyEnergy = previousState.EnemyEnergy; } else { EnemyEnergy = DiscretiseData(100, EnemyEnergy, 1); } if (doubleNubmers.Contains(EnemyVelocity)) { EnemyVelocity = previousState.EnemyVelocity; } else { EnemyVelocity = DiscretiseData(Rules.MAX_VELOCITY, EnemyVelocity, 2); } if (doubleNubmers.Contains(OurEnergy)) { OurEnergy = previousState.OurEnergy; } else { OurEnergy = DiscretiseData(100, OurEnergy, 3); } if (doubleNubmers.Contains(OurVelocity)) { OurVelocity = previousState.OurVelocity; } else { OurVelocity = DiscretiseData(Rules.MAX_VELOCITY, OurVelocity, 4); } }
public override void Run() { var moveAmount = Math.Max(BattleFieldWidth, BattleFieldHeight); var qAction = new QAction(this); var qLearn = new QLearn(); //ReadQFunctionFromFile(); //var a = QFunction.Q; Ahead(moveAmount); TurnGunRight(90); TurnRight(90); while (true) { previousState = currentState; var actionNumber = qAction.SampleAction(); currentState.Discretise(moveAmount, previousState); qLearn.Learn(previousState, currentState, actionNumber, currentState.OurEnergy); } }
} // 0 - Rules.MAX_VELOCITY public void Discretise(double maxDistance, QState previousState) { UpdateValue(previousState, maxDistance); }