C# (CSharp) QState.GetFeatures示例

        // Update the weights of each feature based on their contribution to the reward
        protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward)
        {
            decimal          maxQ = GetMaxValue(newState);
            QStateActionPair p    = new QStateActionPair(currentState, action);
            Dictionary <QFeature, decimal> features;

            if (featureCache.ContainsKey(p))
            {
                features = featureCache[p];
            }
            else
            {
                features        = currentState.GetFeatures(action);
                featureCache[p] = features;
            }
            decimal currentQ   = GetQValue(p);
            decimal difference = reward + discount * maxQ - currentQ;

            foreach (KeyValuePair <QFeature, decimal> feature in features)
            {
                try
                {
                    if (!QWeights.ContainsKey(feature.Key))
                    {
                        QWeights[feature.Key] = 0;
                    }
                    decimal oldWeight = QWeights[feature.Key];
                    decimal newWeight = oldWeight + learn * difference * feature.Value;
                    if (Math.Abs(newWeight) <= 1000000)
                    {
                        QWeights[feature.Key] = newWeight;
                    }
                    else
                    {
                        WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true);
                    }
                }
                catch (Exception e)
                {
                    WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true);
                    Abort();
                    break;
                }
            }

            // Output
            foreach (QFeature f in features.Keys)
            {
                UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]);
            }
        }

示例#2

显示文件

文件： QLearning_Approximate.cs 项目： vkarthi46/QLearner

 // Update the weights of each feature based on their contribution to the reward
 protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward)
 {
     decimal maxQ = GetMaxValue(newState);
     QStateActionPair p = new QStateActionPair(currentState, action);
     Dictionary<QFeature, decimal> features;
     if (featureCache.ContainsKey(p)) features = featureCache[p];
     else
     {
         features = currentState.GetFeatures(action);
         featureCache[p] = features;
     }
     decimal currentQ = GetQValue(p);
     decimal difference = reward + discount * maxQ - currentQ;
     foreach (KeyValuePair<QFeature, decimal> feature in features)
     {
         try
         {
             if (!QWeights.ContainsKey(feature.Key)) QWeights[feature.Key] = 0;
             decimal oldWeight = QWeights[feature.Key];
             decimal newWeight = oldWeight + learn * difference * feature.Value;
             if (Math.Abs(newWeight) <= 1000000)
             {
                 QWeights[feature.Key] = newWeight;
             }
             else WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true);
         }
         catch (Exception e)
         {
             WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true);
             Abort();
             break;
         }
     }
     
     // Output
     foreach (QFeature f in features.Keys)
     {
         UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]);
     }
 }