private void PerformAveraging(QTableBase qtable, int opr, int opc, int boi, int r, int c) { double[] qValues = new double[m_actionsCount]; int[] counts = new int[m_actionsCount]; // iterate through states in k-cyclic neighbors foreach (var neighbor in GetKCyclicNeighbors(r, c, Params.DM.K)) { // if they have enough support if (GetSupport(neighbor[0], neighbor[1], opr, opc, boi) >= Params.DM.MinSupport) { // accumulate Q for high-confidence actions for (int ai = 0; ai < m_actionsCount; ++ai) { if (GetActionConfidence(neighbor[0], neighbor[1], opr, opc, boi, ai) >= Params.DM.MinConfidence) { qValues[ai] += (double)qtable.QTableArray.GetValue(neighbor[0], neighbor[1], opr, opc, boi, ai); counts[ai]++; } } } } // compute the mean of q-Values, and update in the q-Table for (int ai = 0; ai < m_actionsCount; ++ai) { if (counts[ai] > 0) { qValues[ai] /= counts[ai]; qtable.QTableArray.SetValue(qValues[ai], r, c, opr, opc, boi, ai); m_dmUpdatesCount++; } } }
private void PerformAveraging(QTableBase qtable, int tmi, int op1i, int op2i, int boi) { double[] qValues = new double[m_actionsCount]; int[] counts = new int[m_actionsCount]; // iterate through states in k-cyclic neighbors foreach (var neighbor in GetKCyclicNeighborsForPartialModule(Params.DM.K)) { // if they have enough support if (GetSupportNeighbor(neighbor, tmi, op1i, op2i, boi) >= Params.DM.MinSupport) { // accumulate Q for high-confidence actions for (int ai = 0; ai < m_actionsCount; ++ai) { if (GetConfidenceNeighbor(neighbor, tmi, op1i, op2i, boi, ai) >= Params.DM.MinConfidence) { qValues[ai] += GetQValueNeighbor(qtable, neighbor, tmi, op1i, op2i, boi, ai); counts[ai]++; } } } } // compute the mean of q-Values, and update in the q-Table for (int ai = 0; ai < m_actionsCount; ++ai) { if (counts[ai] > 0) { qValues[ai] /= counts[ai]; qtable.QTableArray.SetValue(qValues[ai], tmi, op1i, op2i, boi, ai); m_dmUpdatesCount++; } } }
public override void PerformKCyclicNeighborQUpdate(QTableBase qtable) { m_dmUpdatesCount = 0; for (int tmi = 0; tmi < m_playerStates; ++tmi) { for (int op1i = 0; op1i < m_playerStates; ++op1i) { for (int op2i = 0; op2i < m_playerStates; ++op2i) { for (int boi = 0; boi < m_ballOwnerStatesCount; ++boi) { double support = GetSupport(tmi, op1i, op2i, boi); // if the current state has a low support, there's a need to update the qValues if (support < Params.DM.MinSupport) { if (Params.DM.Method == Params.DM.MethodTypes.Averaging) { PerformAveraging(qtable, tmi, op1i, op2i, boi); } else if (Params.DM.Method == Params.DM.MethodTypes.TopQ) { PerformTopQ(qtable, tmi, op1i, op2i, boi); } else if (Params.DM.Method == Params.DM.MethodTypes.Voting) { PerformVoting(qtable, tmi, op1i, op2i, boi); } } } } } } }
public override void PerformKCyclicNeighborQUpdate(QTableBase qtable) { m_dmUpdatesCount = 0; // boi for ball-owner-index for (int boi = 0; boi < m_ballOwnerStatesCount; ++boi) { for (int r = 0; r < m_rows; ++r) { for (int c = 0; c < m_cols; ++c) { double support = GetSupport(r, c, boi); // if the current state has a low support, there's a need to update the qValues if (support < Params.DM.MinSupport) { if (Params.DM.Method == Params.DM.MethodTypes.Averaging) { PerformAveraging(qtable, boi, r, c); } else if (Params.DM.Method == Params.DM.MethodTypes.TopQ) { PerformTopQ(qtable, boi, r, c); } else if (Params.DM.Method == Params.DM.MethodTypes.Voting) { PerformVoting(qtable, boi, r, c); } } } } } }
private double GetQValueNeighbor(QTableBase qtable, int[] neighbor, int tmi, int op1i, int op2i, int boi, int ai) { return((double)qtable.QTableArray.GetValue( GetMovedLocationIndex(neighbor, tmi), GetMovedLocationIndex(neighbor, op1i), GetMovedLocationIndex(neighbor, op2i), boi, ai)); }
private void SetQValueFromIndices(QTableBase qTable, double value, int r, int c, int tmr, int tmc, int op1r, int op1c, int op2r, int op2c, int boi, int ai) { Position myPos = new Position(r, c); Position tmPos = new Position(tmr, tmc); Position op1Pos = new Position(op1r, op1c); Position op2Pos = new Position(op2r, op2c); qTable.QTableArray.SetValue(value, GetPlayerLocationIndex(myPos, tmPos), GetPlayerLocationIndex(myPos, op1Pos), GetPlayerLocationIndex(myPos, op2Pos), boi, ai); }
private double GetQValueFromIndices(QTableBase qTable, int r, int c, int tmr, int tmc, int op1r, int op1c, int op2r, int op2c, int boi, int ai) { Position myPos = new Position(r, c); Position tmPos = new Position(tmr, tmc); Position op1Pos = new Position(op1r, op1c); Position op2Pos = new Position(op2r, op2c); return((double)qTable.QTableArray.GetValue( GetPlayerLocationIndex(myPos, tmPos), GetPlayerLocationIndex(myPos, op1Pos), GetPlayerLocationIndex(myPos, op2Pos), boi, ai)); }
private void PerformTopQ(QTableBase qtable, int opr, int opc, int boi, int r, int c) { double[] qValues = new double[m_actionsCount]; int[] counts = new int[m_actionsCount]; for (int i = 0; i < qValues.Length; ++i) { qValues[i] = Double.MinValue; } // iterate through states in k-cyclic neighbors foreach (var neighbor in GetKCyclicNeighbors(r, c, Params.DM.K)) { // if they have enough support if (GetSupport(neighbor[0], neighbor[1], opr, opc, boi) >= Params.DM.MinSupport) { // find max Q for high-confidence actions for (int ai = 0; ai < m_actionsCount; ++ai) { if (GetActionConfidence(neighbor[0], neighbor[1], opr, opc, boi, ai) >= Params.DM.MinConfidence) { double qValue = (double)qtable.QTableArray.GetValue(neighbor[0], neighbor[1], opr, opc, boi, ai); if (qValue > qValues[ai]) { qValues[ai] = qValue; } counts[ai]++; } } } } // update in the q-Table for (int ai = 0; ai < m_actionsCount; ++ai) { if (counts[ai] > 0 && qValues[ai] != Double.MinValue) { qtable.QTableArray.SetValue(qValues[ai], r, c, opr, opc, boi, ai); m_dmUpdatesCount++; } } }
private void AddModule(QTableBase module) { m_modules.Add(module); m_moduleSelectionCounts.Add(0L); }
private void PerformVoting(QTableBase qtable, int opr, int opc, int boi, int r, int c) { double[] qValues = new double[m_actionsCount]; int[] counts = new int[m_actionsCount]; for (int i = 0; i < qValues.Length; ++i) { qValues[i] = Double.MinValue; } // iterate through states in k-cyclic neighbors foreach (var neighbor in GetKCyclicNeighbors(r, c, Params.DM.K)) { // if they have enough support if (GetSupport(neighbor[0], neighbor[1], opr, opc, boi) >= Params.DM.MinSupport) { int greedyActIndex = -1; double greedyActValue = Double.MinValue; // Find greedy action index and value for (int ai = 0; ai < m_actionsCount; ++ai) { if (GetActionConfidence(neighbor[0], neighbor[1], opr, opc, boi, ai) >= Params.DM.MinConfidence) { double qValue = (double)qtable.QTableArray.GetValue(neighbor[0], neighbor[1], opr, opc, boi, ai); if (qValue > greedyActValue) { greedyActValue = qValue; greedyActIndex = ai; } } } if (greedyActIndex >= 0) { counts[greedyActIndex]++; if (greedyActValue > qValues[greedyActIndex]) { qValues[greedyActIndex] = greedyActValue; } } } } int maxCountIndex = -1; int maxValue = 0; for (int i = 0; i < counts.Length; i++) { if (counts[i] > maxValue) { maxCountIndex = i; maxValue = counts[i]; } } if (maxCountIndex >= 0 && maxValue > 0) { qtable.QTableArray.SetValue(qValues[maxCountIndex], r, c, opr, opc, boi, maxCountIndex); m_dmUpdatesCount++; } }
private double GetQValue(QTableBase qtable, int tmi, int op1i, int op2i, int boi, int ai) { return((double)qtable.QTableArray.GetValue(tmi, op1i, op2i, boi, ai)); }