public RewardTableEntry(int input, double reward)
        {
            Reward = reward;
            Input = input;

            next = null;
        }
        public RewardTableEntry(int input, double reward)
        {
            Reward = reward;
            Input  = input;

            next = null;
        }
 // update the Reward Table or add a new entry
 public static void updateRewardEntry(ref RewardTableEntry root, int input, double reward)
 {
     RewardTableEntry entry = findRewardTableEntry(root, input);
     if (entry == null)
         root = addRewardTableEntry(root, input, reward);
     else
         entry.Reward = reward;
 }
        public static double getReward(RewardTableEntry root, int input)
        {
            RewardTableEntry entry = findRewardTableEntry(root, input);
            double reward;
            if (entry != null) reward = entry.Reward;
            else reward = 0;

            return reward;
        }
        // update the Reward Table or add a new entry
        public static void updateRewardEntry(ref RewardTableEntry root, int input, double reward)
        {
            RewardTableEntry entry = findRewardTableEntry(root, input);

            if (entry == null)
            {
                root = addRewardTableEntry(root, input, reward);
            }
            else
            {
                entry.Reward = reward;
            }
        }
        public static RewardTableEntry findRewardTableEntry(RewardTableEntry root, int input)
        {
            Boolean found = false;
            RewardTableEntry temp = root;
            while ((!found) && (temp != null))
            {
                found = (temp.Input == input);
                if (!found)
                    temp = temp.next;
            }

            return temp;
        }
        public static RewardTableEntry findRewardTableEntry(RewardTableEntry root, int input)
        {
            Boolean          found = false;
            RewardTableEntry temp  = root;

            while ((!found) && (temp != null))
            {
                found = (temp.Input == input);
                if (!found)
                {
                    temp = temp.next;
                }
            }

            return(temp);
        }
        public static double getReward(RewardTableEntry root, int input)
        {
            RewardTableEntry entry = findRewardTableEntry(root, input);
            double           reward;

            if (entry != null)
            {
                reward = entry.Reward;
            }
            else
            {
                reward = 0;
            }


            return(reward);
        }
Example #9
0
        // return the maximum reward in the Reward Table for a given input

        /* public double getMaximumQValue(int inputindex)
         * {
         *   double maxQ = QTable[inputindex][0];
         *   int maxQindex = 0;
         *
         *   int i;
         *   int possibleoutputs = (int)Math.Pow(2, stann.OutputNum);
         *
         *   for (i = 0; i < possibleoutputs; i++)
         *       if (maxQ < QTable[inputindex][i])
         *       {
         *           maxQ = QTable[inputindex][i];
         *           maxQindex = i;
         *       }
         *   return QTable[inputindex][maxQindex];
         * }*/



        // given the last input(previousInputVec) caused PreviousOutput(which led to a satisfactory result),
        //we may assign a reward to the new state that came up, thus backtracking and updating the rewards for
        // a finite number of steps (input-output) that superceeded this succesfull ending

        /*
         * public void assignReward(double[] currentinputvec, double reward)
         * {
         *  int previousinput = 0, currentinput = 0, i;
         *  for (i = 0; i < InputNum; i++) {
         *      previousinput += (int)(Math.Pow(InputRange, i) * PreviousInputVec[i]);
         *      currentinput += (int)(Math.Pow(InputRange, i) * currentinputvec[i]);
         *  }
         *
         *  RewardTable[currentinput] = reward;
         *
         *  // Updating the rewards in the Reward table using the log entry
         *  double currentStepReward = reward;
         *  int tempinput = currentinput;
         *
         *  for (i = IOLogLength - 1; i >= 0; i--)
         *  {
         *      // updating the q-value for the input-output log entry (in three lines)
         *      QTable[IOLog[i].input][IOLog[i].output] = (1 - Qalpha) * QTable[IOLog[i].input][IOLog[i].output];
         *      QTable[IOLog[i].input][IOLog[i].output] += Qalpha * RewardTable[tempinput];
         *      QTable[IOLog[i].input][IOLog[i].output] += Qalpha * Qgamma * getMaximumQValue(tempinput);
         *      // Q-value of the entry updated
         *      tempinput = IOLog[i].input;
         *  }
         *
         *  // clearing the IO Log to avoid re-assigning Q values on the same chain of actions when a new reward appears
         *  clearIOLog();
         * }
         */

        public void assignReward(double[] currentinputvec, double reward)
        {
            int i;
            // mapping previous and current input vectors to integers
            int previousinput = STANN.mapVector2Int(PreviousInputVec, InputRange, InputNum);
            int currentinput  = STANN.mapVector2Int(currentinputvec, InputRange, InputNum);

            // adding a reward entry for the current input

            RewardTableEntry.updateRewardEntry(ref RewardTable, currentinput, reward);


            // Updating the Q - values in the Q table using the existing log entries
            double currentStepReward = reward;
            int    tempinput         = currentinput;

            for (i = IOLogLength - 1; i >= 0; i--)
            {
                // retrieving the Q -table entry for the current input in the log
                QTableEntry entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output);
                if (entry == null)
                {
                    QTableEntry.assignQValue(ref QTable, IOLog[i].input, IOLog[i].output, 0);
                    entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output);
                }
                else
                {
                    entry.Frequency++;
                }
                // The Non-Deterministic MDP coefficient
                double NDPCoefficient = 1.0 / (1.0 + 1.0 * entry.Frequency);

                double qvalue = entry.QValue;
                // updating the q-value for the input-output log entry (in three lines)
                qvalue  = NDPCoefficient * (1 - Qalpha) * qvalue;
                qvalue += NDPCoefficient * Qalpha * RewardTableEntry.getReward(RewardTable, tempinput);
                qvalue += NDPCoefficient * Qalpha * Qgamma * QTableEntry.getMaxQValue(QTable, tempinput).QValue;

                entry.QValue = qvalue;
                // Q-value of the entry updated
                tempinput = IOLog[i].input;
            }
        }
        public static RewardTableEntry addRewardTableEntry(RewardTableEntry root, int input, double reward)
        {
            RewardTableEntry temp = root;
            RewardTableEntry newroot;
            if (temp == null)
            {
                temp = new RewardTableEntry(input, reward);
                newroot = temp;
            }
            else
            {

                newroot = root;
                while (temp.next != null)
                    temp = temp.next;

                temp.next = new RewardTableEntry(input, reward);
            }

            return newroot;
        }
        public static RewardTableEntry addRewardTableEntry(RewardTableEntry root, int input, double reward)
        {
            RewardTableEntry temp = root;
            RewardTableEntry newroot;

            if (temp == null)
            {
                temp    = new RewardTableEntry(input, reward);
                newroot = temp;
            }
            else
            {
                newroot = root;
                while (temp.next != null)
                {
                    temp = temp.next;
                }

                temp.next = new RewardTableEntry(input, reward);
            }

            return(newroot);
        }
Example #12
0
        public MetaNode(int rawinputnum, MetaNode[] children, int childrennum,
                        MetaNode[] parents, int parentnum, int nodeoutputnum,
                        int rawinputrange, int layernum, int neuronnum,
                        double threshold, double lr, Random rand,
                        Boolean forceSelfTrain, Boolean forcedQLearning,
                        int nodelevel, double alpha, double gamma, int leafindex)
        {
            int i;

            RawInputNum     = rawinputnum;
            ChildrenNum     = childrennum;
            LayerNum        = layernum;
            NeuronNum       = neuronnum;
            Threshold       = threshold;
            LR              = lr;
            rnd             = rand;
            ParentNum       = parentnum;
            ForcedSelfTrain = forceSelfTrain;
            ForcedQLearning = forcedQLearning;
            NodeLevel       = nodelevel;
            Qalpha          = alpha;
            Qgamma          = gamma;
            LeafIndex       = leafindex;
            // copying children array. also figuring out the input range for the stann
            int maxrange = rawinputrange;

            Children = new MetaNode[ChildrenNum];
            for (i = 0; i < ChildrenNum; i++)
            {
                Children[i] = children[i];
                Children[i].addParent(this);

                maxrange = (maxrange < Math.Pow(2, Children[i].stann.OutputNum)) ? (int)Math.Pow(2, Children[i].stann.OutputNum) : maxrange;
            }
            InputRange = maxrange;
            // copying parent array
            for (i = 0; i < ParentNum; i++)
            {
                Parents[i] = parents[i];
            }

            InputNum = getInputNum();
            // now creating the STANN or the ANN of the node
            stann = new STANN(InputNum, InputRange, LayerNum,
                              NeuronNum, nodeoutputnum, Threshold, LR, rnd, ForcedSelfTrain);
            // initializing previous input vector and previous output properties to zero
            CurrentOutput    = PreviousOutput = 0;
            OutputPass       = 0;
            PreviousInputVec = new double[InputNum];
            for (i = 0; i < InputNum; i++)
            {
                PreviousInputVec[i] = 0;
            }


            // initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :))
            // if the ForcedQLearning Property is set
            if (ForcedQLearning)
            {/*
              * int possibleinputs = (int)Math.Pow(InputRange, InputNum);
              * int possibleoutputs = (int)Math.Pow(2, stann.OutputNum);
              *
              * QTable = new double[possibleinputs][];
              * RewardTable = new double[possibleinputs];
              * for (i = 0; i < possibleinputs; i++)
              * {
              *     QTable[i] = new double[possibleoutputs];
              *     RewardTable[i] = 0;
              *     for (j = 0; j < possibleoutputs; j++)
              *         QTable[i][j] = 0;
              * } */
                RewardTable = null;
                QTable      = null;
                // initializing the IO log
                IOLog       = new NodeIOLogEntry[MAX_IO_LOG_LENGTH];
                IOLogLength = 0;
            }
        }
Example #13
0
        // adds a child to the node. the STANN MUST be recreated (due to change on the inputs)
        public void addChild(MetaNode child)
        {
            int i;

            MetaNode[] temp;

            if (ChildrenNum == 0)
            {
                // increase the number of children
                ChildrenNum++;
                // increase the number of inputs as well!
                InputNum++;
                Children = new MetaNode[ChildrenNum];
            }
            else
            {
                temp = new MetaNode[ChildrenNum];
                for (i = 0; i < ChildrenNum; i++)
                {
                    temp[i] = Children[i];
                }
                // increase number of children
                ChildrenNum++;
                // increase number of inputs as well!
                InputNum++;
                Children = new MetaNode[ChildrenNum];
                for (i = 0; i < ChildrenNum - 1; i++)
                {
                    Children[i] = temp[i];
                }
            }
            Children[ChildrenNum - 1] = child;

            int newinputrange, curoutputnum = stann.OutputNum;

            if (stann.InputRange < (int)Math.Pow(2, child.stann.OutputNum))
            {
                newinputrange = (int)Math.Pow(2, child.stann.OutputNum);
                InputRange    = newinputrange;
            }
            else
            {
                newinputrange = stann.InputRange;
                InputRange    = stann.InputRange;
            }

            // recreating the STANN
            stann = new STANN(getInputNum(), newinputrange, LayerNum,
                              NeuronNum, curoutputnum, Threshold, LR, rnd, ForcedSelfTrain);

            if (ForcedQLearning)
            {
                // Re-initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :))

                /*
                 * int possibleinputs = (int)Math.Pow(InputRange, InputNum);
                 * int possibleoutputs = (int)Math.Pow(2, stann.OutputNum);
                 *
                 * QTable = new double[possibleinputs][];
                 * RewardTable = new double[possibleinputs];
                 * for (i = 0; i < possibleinputs; i++)
                 * {
                 *   QTable[i] = new double[possibleoutputs];
                 *   RewardTable[i] = 0;
                 *   for (j = 0; j < possibleoutputs; j++)
                 *       QTable[i][j] = 0;
                 * }
                 */
                RewardTable = null;
                QTable      = null;
            }
            // Re-creating the previous input vector
            PreviousInputVec = new double[InputNum];
            for (i = 0; i < InputNum; i++)
            {
                PreviousInputVec[i] = 0;
            }
        }
        public MetaNode(int rawinputnum, MetaNode[] children, int childrennum,
                            MetaNode[] parents, int parentnum, int nodeoutputnum,
                            int rawinputrange, int layernum, int neuronnum,
                            double threshold, double lr, Random rand,
                            Boolean forceSelfTrain, Boolean forcedQLearning,
                            int nodelevel, double alpha, double gamma, int leafindex)
        {
            int i;
            RawInputNum = rawinputnum;
            ChildrenNum = childrennum;
            LayerNum = layernum;
            NeuronNum = neuronnum;
            Threshold = threshold;
            LR = lr;
            rnd = rand;
            ParentNum = parentnum;
            ForcedSelfTrain = forceSelfTrain;
            ForcedQLearning = forcedQLearning;
            NodeLevel = nodelevel;
            Qalpha = alpha;
            Qgamma = gamma;
            LeafIndex = leafindex;
            // copying children array. also figuring out the input range for the stann
            int maxrange = rawinputrange;
            Children = new MetaNode[ChildrenNum];
            for (i = 0; i < ChildrenNum; i++)
            {
                Children[i] = children[i];
                Children[i].addParent(this);

                maxrange = (maxrange < Math.Pow(2, Children[i].stann.OutputNum)) ? (int)Math.Pow(2, Children[i].stann.OutputNum) : maxrange;
            }
            InputRange = maxrange;
            // copying parent array
            for (i = 0; i < ParentNum; i++)
                Parents[i] = parents[i];

            InputNum = getInputNum();
            // now creating the STANN or the ANN of the node
                stann = new STANN(InputNum, InputRange, LayerNum,
                                  NeuronNum, nodeoutputnum, Threshold, LR, rnd, ForcedSelfTrain);
            // initializing previous input vector and previous output properties to zero
            CurrentOutput = PreviousOutput = 0;
            OutputPass = 0;
            PreviousInputVec = new double[InputNum];
            for (i = 0; i < InputNum; i++)
                PreviousInputVec[i] = 0;

            // initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :))
            // if the ForcedQLearning Property is set
            if (ForcedQLearning)
            {/*
                int possibleinputs = (int)Math.Pow(InputRange, InputNum);
                int possibleoutputs = (int)Math.Pow(2, stann.OutputNum);

                QTable = new double[possibleinputs][];
                RewardTable = new double[possibleinputs];
                for (i = 0; i < possibleinputs; i++)
                {
                    QTable[i] = new double[possibleoutputs];
                    RewardTable[i] = 0;
                    for (j = 0; j < possibleoutputs; j++)
                        QTable[i][j] = 0;
                } */

                RewardTable = null;
                QTable = null;
                // initializing the IO log
                IOLog = new NodeIOLogEntry[MAX_IO_LOG_LENGTH];
                IOLogLength = 0;
            }
        }
        // adds a child to the node. the STANN MUST be recreated (due to change on the inputs)
        public void addChild(MetaNode child)
        {
            int i;
            MetaNode[] temp;

            if (ChildrenNum == 0)
            {
                // increase the number of children
                ChildrenNum++;
                // increase the number of inputs as well!
                InputNum++;
                Children = new MetaNode[ChildrenNum];
            }
            else
            {
                temp = new MetaNode[ChildrenNum];
                for (i = 0; i < ChildrenNum; i++)
                    temp[i] = Children[i];
                // increase number of children
                ChildrenNum++;
                // increase number of inputs as well!
                InputNum++;
                Children = new MetaNode[ChildrenNum];
                for (i = 0; i < ChildrenNum - 1; i++)
                    Children[i] = temp[i];
            }
            Children[ChildrenNum - 1] = child;

            int newinputrange, curoutputnum = stann.OutputNum;
            if (stann.InputRange < (int)Math.Pow(2, child.stann.OutputNum))
            {
                newinputrange = (int)Math.Pow(2, child.stann.OutputNum);
                InputRange = newinputrange;
            }
            else
            {
                newinputrange = stann.InputRange;
                InputRange = stann.InputRange;
            }

            // recreating the STANN
            stann = new STANN(getInputNum(), newinputrange, LayerNum,
                              NeuronNum, curoutputnum, Threshold, LR, rnd, ForcedSelfTrain);

            if (ForcedQLearning)
            {
                // Re-initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :))
               /*
                int possibleinputs = (int)Math.Pow(InputRange, InputNum);
                int possibleoutputs = (int)Math.Pow(2, stann.OutputNum);

                QTable = new double[possibleinputs][];
                RewardTable = new double[possibleinputs];
                for (i = 0; i < possibleinputs; i++)
                {
                    QTable[i] = new double[possibleoutputs];
                    RewardTable[i] = 0;
                    for (j = 0; j < possibleoutputs; j++)
                        QTable[i][j] = 0;
                }
                */
                RewardTable = null;
                QTable = null;
            }
            // Re-creating the previous input vector
            PreviousInputVec = new double[InputNum];
            for (i = 0; i < InputNum; i++)
                PreviousInputVec[i] = 0;
        }