public void AddResult(State s1, Action a, double reward, int freq) { int hash = s1.GetHashCode(); double n = (double)Frequencies[hash, (int)a]; double n2 = n + (double)freq; ExpectedReward[hash, (int)a] = ExpectedReward[hash, (int)a] * (n / n2) + reward * ((double)freq / n2); Frequencies[hash, (int)a] += freq; }
public double[] GetDesirabilities(State s1) { return Desirability[s1.GetHashCode()]; }
public void initRelativeQ() { if (Qrelative != null) return; Dictionary<int, Dictionary<int, double>> knownQ = new Dictionary<int, Dictionary<int, double>>(); if (File.Exists(logRelative)) { // Read the known absolute Q-values from the log. FileStream fs = new FileStream(logRelative, FileMode.Open, FileAccess.Read); StreamReader sr = new StreamReader(fs); string line; while ((line = sr.ReadLine()) != null) { string[] parts = line.Split(','); int stateCode = int.Parse(parts[0]); int actionCode = int.Parse(parts[1]); double q = double.Parse(parts[2]); if (!knownQ.ContainsKey(stateCode)) { knownQ[stateCode] = new Dictionary<int, double>(); } knownQ[stateCode][actionCode] = q; } sr.Close(); fs.Close(); } // Initialize the absolute Q-table, taking known Q-values into account. Qrelative = new Dictionary<State, Dictionary<Action, double>>(); // Determine all possible actions, that is [towards] and [away from] for every state param. foreach (StateParameter parameter in (StateParameter[]) Enum.GetValues(typeof(StateParameter))) { Action a = new Action(parameter, ActionDirection.Towards); actions.Add(a); a = new Action(parameter, ActionDirection.AwayFrom); actions.Add(a); } // This code is not actually prepared to handle multiple StateParameters, // but this is not an issue because right now we only have one anyway: OwnAnt. Dictionary<StateParameter, int> distances = new Dictionary<StateParameter, int>(); for (int i = 0; i <= maxDistance; i++) { distances[StateParameter.OwnAnt] = i; State s = new State(new Dictionary<StateParameter, int>(distances)); Qrelative[s] = new Dictionary<Action, double>(); foreach (Action a in actions) { int stateCode = s.GetHashCode(); int actionCode = a.GetHashCode(); double q = 0.0; // Take known Q-value into account if (knownQ.ContainsKey(stateCode) && knownQ[stateCode].ContainsKey(actionCode)) { q = knownQ[stateCode][actionCode]; } Qrelative[s][a] = q; } } }