Beispiel #1
0
 public void AddResult(State s1, Action a, double reward, int freq)
 {
     int hash = s1.GetHashCode();
     double n = (double)Frequencies[hash, (int)a];
     double n2 = n + (double)freq;
     ExpectedReward[hash, (int)a] = ExpectedReward[hash, (int)a] * (n / n2) + reward * ((double)freq / n2);
     Frequencies[hash, (int)a] += freq;
 }
Beispiel #2
0
 public double[] GetDesirabilities(State s1)
 {
     return Desirability[s1.GetHashCode()];
 }
Beispiel #3
0
        public void initRelativeQ()
        {
            if (Qrelative != null) return;

              Dictionary<int, Dictionary<int, double>> knownQ = new Dictionary<int, Dictionary<int, double>>();

              if (File.Exists(logRelative))
              {
            // Read the known absolute Q-values from the log.

            FileStream fs = new FileStream(logRelative, FileMode.Open, FileAccess.Read);
            StreamReader sr = new StreamReader(fs);
            string line;
            while ((line = sr.ReadLine()) != null)
            {
              string[] parts = line.Split(',');
              int stateCode = int.Parse(parts[0]);
              int actionCode = int.Parse(parts[1]);
              double q = double.Parse(parts[2]);
              if (!knownQ.ContainsKey(stateCode))
              {
            knownQ[stateCode] = new Dictionary<int, double>();
              }
              knownQ[stateCode][actionCode] = q;
            }
            sr.Close();
            fs.Close();
              }

              // Initialize the absolute Q-table, taking known Q-values into account.

              Qrelative = new Dictionary<State, Dictionary<Action, double>>();

              // Determine all possible actions, that is [towards] and [away from] for every state param.
              foreach (StateParameter parameter in (StateParameter[]) Enum.GetValues(typeof(StateParameter)))
              {
            Action a = new Action(parameter, ActionDirection.Towards);
            actions.Add(a);
            a = new Action(parameter, ActionDirection.AwayFrom);
            actions.Add(a);
              }

              // This code is not actually prepared to handle multiple StateParameters,
              // but this is not an issue because right now we only have one anyway: OwnAnt.
              Dictionary<StateParameter, int> distances = new Dictionary<StateParameter, int>();
              for (int i = 0; i <= maxDistance; i++)
              {
            distances[StateParameter.OwnAnt] = i;

            State s = new State(new Dictionary<StateParameter, int>(distances));
            Qrelative[s] = new Dictionary<Action, double>();

            foreach (Action a in actions)
            {
              int stateCode = s.GetHashCode();
              int actionCode = a.GetHashCode();
              double q = 0.0;

              // Take known Q-value into account
              if (knownQ.ContainsKey(stateCode) && knownQ[stateCode].ContainsKey(actionCode))
              {
            q = knownQ[stateCode][actionCode];
              }
              Qrelative[s][a] = q;
            }
              }
        }