// Return the value of performing an action based on the features expected from the action. protected override decimal GetQValue(QStateActionPair p) { decimal qv = 0; Dictionary <QFeature, decimal> features; if (featureCache.ContainsKey(p)) { features = featureCache[p]; } else { features = p.state.GetFeatures(p.action); featureCache[p] = features; } foreach (KeyValuePair <QFeature, decimal> feature in features) { if (!QWeights.ContainsKey(feature.Key)) { QWeights[feature.Key] = 0; } qv += QWeights[feature.Key] * feature.Value; } return(qv); }
protected virtual void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward) { QStateActionPair p = new QStateActionPair(currentState, action); decimal maxQ = GetMaxValue(newState); QValues[p] = (1 - learn) * GetQValue(p) + learn * (reward + discount * maxQ); UpdateLearningTable(n, currentState, action, QValues[p]); }
// Return the value of performing an action at given state or 0 if not done before protected virtual decimal GetQValue(QStateActionPair p) { if (!QValues.ContainsKey(p)) { return(0); } else { return(QValues[p]); } }
// Update the weights of each feature based on their contribution to the reward protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward) { decimal maxQ = GetMaxValue(newState); QStateActionPair p = new QStateActionPair(currentState, action); Dictionary <QFeature, decimal> features; if (featureCache.ContainsKey(p)) { features = featureCache[p]; } else { features = currentState.GetFeatures(action); featureCache[p] = features; } decimal currentQ = GetQValue(p); decimal difference = reward + discount * maxQ - currentQ; foreach (KeyValuePair <QFeature, decimal> feature in features) { try { if (!QWeights.ContainsKey(feature.Key)) { QWeights[feature.Key] = 0; } decimal oldWeight = QWeights[feature.Key]; decimal newWeight = oldWeight + learn * difference * feature.Value; if (Math.Abs(newWeight) <= 1000000) { QWeights[feature.Key] = newWeight; } else { WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true); } } catch (Exception e) { WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true); Abort(); break; } } // Output foreach (QFeature f in features.Keys) { UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]); } }
// Return the value of performing an action based on the features expected from the action. protected override decimal GetQValue(QStateActionPair p) { decimal qv = 0; Dictionary<QFeature, decimal> features; if (featureCache.ContainsKey(p)) features = featureCache[p]; else { features = p.state.GetFeatures(p.action); featureCache[p] = features; } foreach (KeyValuePair<QFeature, decimal> feature in features) { if (!QWeights.ContainsKey(feature.Key)) QWeights[feature.Key] = 0; qv += QWeights[feature.Key] * feature.Value; } return qv; }
// Update the weights of each feature based on their contribution to the reward protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward) { decimal maxQ = GetMaxValue(newState); QStateActionPair p = new QStateActionPair(currentState, action); Dictionary<QFeature, decimal> features; if (featureCache.ContainsKey(p)) features = featureCache[p]; else { features = currentState.GetFeatures(action); featureCache[p] = features; } decimal currentQ = GetQValue(p); decimal difference = reward + discount * maxQ - currentQ; foreach (KeyValuePair<QFeature, decimal> feature in features) { try { if (!QWeights.ContainsKey(feature.Key)) QWeights[feature.Key] = 0; decimal oldWeight = QWeights[feature.Key]; decimal newWeight = oldWeight + learn * difference * feature.Value; if (Math.Abs(newWeight) <= 1000000) { QWeights[feature.Key] = newWeight; } else WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true); } catch (Exception e) { WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true); Abort(); break; } } // Output foreach (QFeature f in features.Keys) { UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]); } }
// Return the value of performing an action at given state or 0 if not done before protected virtual decimal GetQValue(QStateActionPair p) { if (!QValues.ContainsKey(p)) return 0; else return QValues[p]; }
protected virtual void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward) { QStateActionPair p = new QStateActionPair(currentState, action); decimal maxQ = GetMaxValue(newState); QValues[p] = (1 - learn) * GetQValue(p) + learn * (reward + discount * maxQ); UpdateLearningTable(n, currentState, action, QValues[p]); }