Exemplo n.º 1
0
        // Return the value of performing an action based on the features expected from the action.
        protected override decimal GetQValue(QStateActionPair p)
        {
            decimal qv = 0;

            Dictionary <QFeature, decimal> features;

            if (featureCache.ContainsKey(p))
            {
                features = featureCache[p];
            }
            else
            {
                features        = p.state.GetFeatures(p.action);
                featureCache[p] = features;
            }
            foreach (KeyValuePair <QFeature, decimal> feature in features)
            {
                if (!QWeights.ContainsKey(feature.Key))
                {
                    QWeights[feature.Key] = 0;
                }
                qv += QWeights[feature.Key] * feature.Value;
            }

            return(qv);
        }
Exemplo n.º 2
0
        protected virtual void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward)
        {
            QStateActionPair p    = new QStateActionPair(currentState, action);
            decimal          maxQ = GetMaxValue(newState);

            QValues[p] = (1 - learn) * GetQValue(p) + learn * (reward + discount * maxQ);
            UpdateLearningTable(n, currentState, action, QValues[p]);
        }
Exemplo n.º 3
0
 // Return the value of performing an action at given state or 0 if not done before
 protected virtual decimal GetQValue(QStateActionPair p)
 {
     if (!QValues.ContainsKey(p))
     {
         return(0);
     }
     else
     {
         return(QValues[p]);
     }
 }
Exemplo n.º 4
0
        // Update the weights of each feature based on their contribution to the reward
        protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward)
        {
            decimal          maxQ = GetMaxValue(newState);
            QStateActionPair p    = new QStateActionPair(currentState, action);
            Dictionary <QFeature, decimal> features;

            if (featureCache.ContainsKey(p))
            {
                features = featureCache[p];
            }
            else
            {
                features        = currentState.GetFeatures(action);
                featureCache[p] = features;
            }
            decimal currentQ   = GetQValue(p);
            decimal difference = reward + discount * maxQ - currentQ;

            foreach (KeyValuePair <QFeature, decimal> feature in features)
            {
                try
                {
                    if (!QWeights.ContainsKey(feature.Key))
                    {
                        QWeights[feature.Key] = 0;
                    }
                    decimal oldWeight = QWeights[feature.Key];
                    decimal newWeight = oldWeight + learn * difference * feature.Value;
                    if (Math.Abs(newWeight) <= 1000000)
                    {
                        QWeights[feature.Key] = newWeight;
                    }
                    else
                    {
                        WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true);
                    }
                }
                catch (Exception e)
                {
                    WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true);
                    Abort();
                    break;
                }
            }

            // Output
            foreach (QFeature f in features.Keys)
            {
                UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]);
            }
        }
Exemplo n.º 5
0
        // Return the value of performing an action based on the features expected from the action.
        protected override decimal GetQValue(QStateActionPair p)
        {
            decimal qv = 0;

            Dictionary<QFeature, decimal> features;
            if (featureCache.ContainsKey(p)) features = featureCache[p];
            else
            {
                features = p.state.GetFeatures(p.action);
                featureCache[p] = features;
            }
            foreach (KeyValuePair<QFeature, decimal> feature in features)
            {
                if (!QWeights.ContainsKey(feature.Key)) QWeights[feature.Key] = 0;
                qv += QWeights[feature.Key] * feature.Value;
            }

            return qv;
        }
Exemplo n.º 6
0
 // Update the weights of each feature based on their contribution to the reward
 protected override void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward)
 {
     decimal maxQ = GetMaxValue(newState);
     QStateActionPair p = new QStateActionPair(currentState, action);
     Dictionary<QFeature, decimal> features;
     if (featureCache.ContainsKey(p)) features = featureCache[p];
     else
     {
         features = currentState.GetFeatures(action);
         featureCache[p] = features;
     }
     decimal currentQ = GetQValue(p);
     decimal difference = reward + discount * maxQ - currentQ;
     foreach (KeyValuePair<QFeature, decimal> feature in features)
     {
         try
         {
             if (!QWeights.ContainsKey(feature.Key)) QWeights[feature.Key] = 0;
             decimal oldWeight = QWeights[feature.Key];
             decimal newWeight = oldWeight + learn * difference * feature.Value;
             if (Math.Abs(newWeight) <= 1000000)
             {
                 QWeights[feature.Key] = newWeight;
             }
             else WriteOutput("Warning: Weights diverging. Check that your features are valid and measured consistently with everything else.", true);
         }
         catch (Exception e)
         {
             WriteOutput("Exception: " + e + "\n*Check that your features are valid and measured consistently with everything else.*", true);
             Abort();
             break;
         }
     }
     
     // Output
     foreach (QFeature f in features.Keys)
     {
         UpdateLearningTable(-1, f.ToString(), QWeights[f].ToString(), features[f]);
     }
 }
Exemplo n.º 7
0
 // Return the value of performing an action at given state or 0 if not done before
 protected virtual decimal GetQValue(QStateActionPair p)
 {
     if (!QValues.ContainsKey(p)) return 0;
     else return QValues[p];
 }
Exemplo n.º 8
0
 protected virtual void QUpdate(int n, QState currentState, QAction action, QState newState, decimal reward)
 {
     QStateActionPair p = new QStateActionPair(currentState, action);
     decimal maxQ = GetMaxValue(newState);
     QValues[p] = (1 - learn) * GetQValue(p) + learn * (reward + discount * maxQ);
     UpdateLearningTable(n, currentState, action, QValues[p]);
 }