// Update the weights of each feature based on their contribution to the reward protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward) { decimal maxQ = GetMaxValue(newState); QStateActionPair p = new QStateActionPair(currentState, action); Dictionary <QFeature, decimal> features; if (featureCache.ContainsKey(p)) { features = featureCache[p]; } else { features = currentState.GetFeatures(action); featureCache[p] = features; } decimal currentQ = GetQValue(p); decimal difference = reward + discount * maxQ - currentQ; foreach (KeyValuePair <QFeature, decimal> feature in features) { try { if (!QWeights.ContainsKey(feature.Key)) { QWeights[feature.Key] = 0; } decimal oldWeight = QWeights[feature.Key]; decimal newWeight = oldWeight + learn * difference * feature.Value; if (Math.Abs(newWeight) <= 1000000) { QWeights[feature.Key] = newWeight; } else { WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true); } } catch (Exception e) { WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true); Abort(); break; } } // Output foreach (QFeature f in features.Keys) { UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]); } }
// Update the weights of each feature based on their contribution to the reward protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward) { decimal maxQ = GetMaxValue(newState); QStateActionPair p = new QStateActionPair(currentState, action); Dictionary<QFeature, decimal> features; if (featureCache.ContainsKey(p)) features = featureCache[p]; else { features = currentState.GetFeatures(action); featureCache[p] = features; } decimal currentQ = GetQValue(p); decimal difference = reward + discount * maxQ - currentQ; foreach (KeyValuePair<QFeature, decimal> feature in features) { try { if (!QWeights.ContainsKey(feature.Key)) QWeights[feature.Key] = 0; decimal oldWeight = QWeights[feature.Key]; decimal newWeight = oldWeight + learn * difference * feature.Value; if (Math.Abs(newWeight) <= 1000000) { QWeights[feature.Key] = newWeight; } else WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true); } catch (Exception e) { WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true); Abort(); break; } } // Output foreach (QFeature f in features.Keys) { UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]); } }