Esempio n. 1
0
        // return the maximum reward in the Reward Table for a given input

        /* public double getMaximumQValue(int inputindex)
         * {
         *   double maxQ = QTable[inputindex][0];
         *   int maxQindex = 0;
         *
         *   int i;
         *   int possibleoutputs = (int)Math.Pow(2, stann.OutputNum);
         *
         *   for (i = 0; i < possibleoutputs; i++)
         *       if (maxQ < QTable[inputindex][i])
         *       {
         *           maxQ = QTable[inputindex][i];
         *           maxQindex = i;
         *       }
         *   return QTable[inputindex][maxQindex];
         * }*/



        // given the last input(previousInputVec) caused PreviousOutput(which led to a satisfactory result),
        //we may assign a reward to the new state that came up, thus backtracking and updating the rewards for
        // a finite number of steps (input-output) that superceeded this succesfull ending

        /*
         * public void assignReward(double[] currentinputvec, double reward)
         * {
         *  int previousinput = 0, currentinput = 0, i;
         *  for (i = 0; i < InputNum; i++) {
         *      previousinput += (int)(Math.Pow(InputRange, i) * PreviousInputVec[i]);
         *      currentinput += (int)(Math.Pow(InputRange, i) * currentinputvec[i]);
         *  }
         *
         *  RewardTable[currentinput] = reward;
         *
         *  // Updating the rewards in the Reward table using the log entry
         *  double currentStepReward = reward;
         *  int tempinput = currentinput;
         *
         *  for (i = IOLogLength - 1; i >= 0; i--)
         *  {
         *      // updating the q-value for the input-output log entry (in three lines)
         *      QTable[IOLog[i].input][IOLog[i].output] = (1 - Qalpha) * QTable[IOLog[i].input][IOLog[i].output];
         *      QTable[IOLog[i].input][IOLog[i].output] += Qalpha * RewardTable[tempinput];
         *      QTable[IOLog[i].input][IOLog[i].output] += Qalpha * Qgamma * getMaximumQValue(tempinput);
         *      // Q-value of the entry updated
         *      tempinput = IOLog[i].input;
         *  }
         *
         *  // clearing the IO Log to avoid re-assigning Q values on the same chain of actions when a new reward appears
         *  clearIOLog();
         * }
         */

        public void assignReward(double[] currentinputvec, double reward)
        {
            int i;
            // mapping previous and current input vectors to integers
            int previousinput = STANN.mapVector2Int(PreviousInputVec, InputRange, InputNum);
            int currentinput  = STANN.mapVector2Int(currentinputvec, InputRange, InputNum);

            // adding a reward entry for the current input

            RewardTableEntry.updateRewardEntry(ref RewardTable, currentinput, reward);


            // Updating the Q - values in the Q table using the existing log entries
            double currentStepReward = reward;
            int    tempinput         = currentinput;

            for (i = IOLogLength - 1; i >= 0; i--)
            {
                // retrieving the Q -table entry for the current input in the log
                QTableEntry entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output);
                if (entry == null)
                {
                    QTableEntry.assignQValue(ref QTable, IOLog[i].input, IOLog[i].output, 0);
                    entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output);
                }
                else
                {
                    entry.Frequency++;
                }
                // The Non-Deterministic MDP coefficient
                double NDPCoefficient = 1.0 / (1.0 + 1.0 * entry.Frequency);

                double qvalue = entry.QValue;
                // updating the q-value for the input-output log entry (in three lines)
                qvalue  = NDPCoefficient * (1 - Qalpha) * qvalue;
                qvalue += NDPCoefficient * Qalpha * RewardTableEntry.getReward(RewardTable, tempinput);
                qvalue += NDPCoefficient * Qalpha * Qgamma * QTableEntry.getMaxQValue(QTable, tempinput).QValue;

                entry.QValue = qvalue;
                // Q-value of the entry updated
                tempinput = IOLog[i].input;
            }
        }
Esempio n. 2
0
        // return the output of a MetaNode branch given the input vector to the leaves

        /*
         * public static double getOutput(MetaNode mnet, double[][] inputvec, int pass)
         * {
         *  int i;
         *  double[] sigmoids;
         *  double theoutput;
         *
         *  if (mnet.ChildrenNum == 0)
         *  {
         *      if (mnet.OutputPass != pass)
         *      {
         *          mnet.OutputPass = pass;
         *          // self training if the node has its ForceSelfTrain attribute set to true
         *          if (mnet.ForcedSelfTrain)
         *              // self training
         *              mnet.stann.selfTrain(inputvec[mnet.LeafIndex]);
         *
         *          // retrieving the sigmoids of the node
         *          sigmoids = mnet.stann.sigmoidLayerOutputs(inputvec[mnet.LeafIndex], mnet.stann.LayerNum - 1);
         *          // calculating the decimal equivalent to the ordered thresholded sigmoid outputs
         *          theoutput = STANN.mapVector2Int(sigmoids, 2, mnet.stann.OutputNum);
         *
         *          mnet.PreviousOutput = mnet.CurrentOutput;
         *          mnet.CurrentOutput = theoutput;
         *      }
         *      else
         *          theoutput = mnet.CurrentOutput;
         *
         *  }
         *  else
         *  {
         *      if (mnet.OutputPass != pass)
         *      {
         *          mnet.OutputPass = pass;
         *          double[] levelinput = new double[mnet.InputNum];
         *
         *          for (i = 0; i < mnet.InputNum; i++)
         *              if (mnet.Children[i].NodeLevel >= mnet.NodeLevel) // requesting output from a higher (or equal ) level node
         *                  levelinput[i] = mnet.Children[i].PreviousOutput; // avoiding circular reference in recursion
         *              else
         *                  levelinput[i] = getOutput(mnet.Children[i], inputvec, pass);
         *
         *          // self training if the aselfrain attribute is on
         *          if (mnet.ForcedSelfTrain) mnet.stann.selfTrain(levelinput);
         *          // retrieving sigmoids
         *          sigmoids = mnet.stann.sigmoidLayerOutputs(levelinput, mnet.stann.LayerNum - 1);
         *          // calculating the decimal equivalent to the thresholded outputs
         *          theoutput = 0;
         *          for (i = 0; i < mnet.stann.OutputNum; i++)
         *          {
         *              int bit = (sigmoids[i] < 0.5) ? 0 : 1;
         *              theoutput += (int)Math.Pow(2, i) * bit;
         *          }
         *          // updating previous Input Vector and Previous Output properties of the metanode
         *          int t;
         *          mnet.PreviousInputVec = new double[mnet.InputNum];
         *          for (t = 0; t < mnet.InputNum; t++)
         *              mnet.PreviousInputVec[t] = levelinput[t];
         *          mnet.PreviousOutput = mnet.CurrentOutput;
         *          mnet.CurrentOutput = theoutput;
         *          // previous input vector and output updated with the new values
         *
         *          // Must now train the network!!!! (in case the qlearning property is on)
         *          if (mnet.ForcedQLearning)
         *          {
         *              // mapping the input to the proper index in the reward table
         *              int inputindex = 0;
         *              for (t = 0; t < mnet.InputNum; t++)
         *                  inputindex += (int)(Math.Pow(mnet.InputRange, t) * levelinput[t]);
         *              // finding the output that corresponds to the maximum Qvaluefor the given input
         *              double maxQvalue = mnet.getMaximumQValue(inputindex);
         *              int maxQvalueOutputindex = 0;
         *              while (mnet.QTable[inputindex][maxQvalueOutputindex] != maxQvalue)
         *                  maxQvalueOutputindex++;
         *
         *              // converting the maximum Q value output to a vector of binary digits
         *              double[] desiredOutput = mnet.stann.int2BinaryVector(maxQvalueOutputindex);
         *              // now training...
         *              mnet.stann.backPropagate(levelinput, desiredOutput);
         *              // updating the IO log
         *              if (mnet.IOLogLength == MAX_IO_LOG_LENGTH)
         *              { // IO Log is full
         *                  // clearing the log and starting all over again
         *                  mnet.IOLogLength = 1;
         *              }
         *              else
         *                  mnet.IOLogLength++;
         *              // updating the IO log entries
         *              mnet.IOLog[mnet.IOLogLength - 1].input = inputindex;
         *              mnet.IOLog[mnet.IOLogLength - 1].output = (int)theoutput;
         *
         *          }
         *      }
         *      else
         *          theoutput = mnet.CurrentOutput;
         *
         *  }
         *
         *  return theoutput;
         * }
         */



        public static double getOutput(MetaNode mnet, double[][] inputvec, int pass)
        {
            int i;

            double[] sigmoids;
            double   theoutput;

            if (mnet.ChildrenNum == 0)
            {
                if (mnet.OutputPass != pass)
                {
                    mnet.OutputPass = pass;
                    // self training if the node has its ForceSelfTrain attribute set to true
                    if (mnet.ForcedSelfTrain)
                    {
                        // self training
                        mnet.stann.selfTrain(inputvec[mnet.LeafIndex]);
                    }

                    // retrieving the sigmoids of the node
                    sigmoids = mnet.stann.sigmoidLayerOutputs(inputvec[mnet.LeafIndex], mnet.stann.LayerNum - 1);
                    // calculating the decimal equivalent to the ordered thresholded sigmoid outputs
                    theoutput = STANN.sigmoids2Int(sigmoids, mnet.stann.OutputNum);

                    mnet.PreviousOutput = mnet.CurrentOutput;
                    mnet.CurrentOutput  = theoutput;
                }
                else
                {
                    theoutput = mnet.CurrentOutput;
                }
            }
            else
            {
                if (mnet.OutputPass != pass)
                {
                    mnet.OutputPass = pass;
                    double[] levelinput = new double[mnet.InputNum];

                    for (i = 0; i < mnet.InputNum; i++)
                    {
                        if (mnet.Children[i].NodeLevel >= mnet.NodeLevel)    // requesting output from a higher (or equal ) level node
                        {
                            levelinput[i] = mnet.Children[i].PreviousOutput; // avoiding circular reference in recursion
                        }
                        else
                        {
                            levelinput[i] = getOutput(mnet.Children[i], inputvec, pass);
                        }
                    }

                    // self training if the aselfrain attribute is on
                    if (mnet.ForcedSelfTrain)
                    {
                        mnet.stann.selfTrain(levelinput);
                    }
                    // retrieving sigmoids
                    sigmoids = mnet.stann.sigmoidLayerOutputs(levelinput, mnet.stann.LayerNum - 1);

                    // calculating the decimal equivalent to the thresholded outputs

                    theoutput = STANN.mapVector2Int(sigmoids, 2, mnet.stann.OutputNum);
                    // updating previous Input Vector and Previous Output properties of the metanode
                    int t;
                    mnet.PreviousInputVec = new double[mnet.InputNum];
                    for (t = 0; t < mnet.InputNum; t++)
                    {
                        mnet.PreviousInputVec[t] = levelinput[t];
                    }


                    mnet.PreviousOutput = mnet.CurrentOutput;
                    mnet.CurrentOutput  = theoutput;
                    // previous input vector and output updated with the new values

                    // Must now train the network!!!! (in case the qlearning property is on)
                    if (mnet.ForcedQLearning)
                    {
                        // mapping the input to the proper index in the reward table
                        int inputindex = STANN.mapVector2Int(levelinput, mnet.InputRange, mnet.InputNum);

                        // finding the output that corresponds to the maximum Qvaluefor the given input

                        QTableEntry maxQvalueEntry = QTableEntry.getMaxQValue(mnet.QTable, inputindex);


                        if (maxQvalueEntry != null)
                        {
                            // converting the maximum Q value output to a vector of binary digits
                            double[] desiredOutput = STANN.mapInt2VectorDouble(maxQvalueEntry.Output, 2, mnet.stann.OutputNum);
                            // now training...
                            mnet.stann.backPropagate(levelinput, desiredOutput);
                        }
                        // updating the IO log
                        if (mnet.IOLogLength == MAX_IO_LOG_LENGTH)
                        { // IO Log is full
                            // clearing the log and starting all over again
                            mnet.IOLogLength = 1;
                        }
                        else
                        {
                            mnet.IOLogLength++;
                        }

                        // updating the IO log entries
                        mnet.IOLog[mnet.IOLogLength - 1].input  = inputindex;
                        mnet.IOLog[mnet.IOLogLength - 1].output = (int)theoutput;
                    }
                }
                else
                {
                    theoutput = mnet.CurrentOutput;
                }
            }

            return(theoutput);
        }