public RewardTableEntry(int input, double reward) { Reward = reward; Input = input; next = null; }
public RewardTableEntry(int input, double reward) { Reward = reward; Input = input; next = null; }
// update the Reward Table or add a new entry public static void updateRewardEntry(ref RewardTableEntry root, int input, double reward) { RewardTableEntry entry = findRewardTableEntry(root, input); if (entry == null) root = addRewardTableEntry(root, input, reward); else entry.Reward = reward; }
public static double getReward(RewardTableEntry root, int input) { RewardTableEntry entry = findRewardTableEntry(root, input); double reward; if (entry != null) reward = entry.Reward; else reward = 0; return reward; }
// update the Reward Table or add a new entry public static void updateRewardEntry(ref RewardTableEntry root, int input, double reward) { RewardTableEntry entry = findRewardTableEntry(root, input); if (entry == null) { root = addRewardTableEntry(root, input, reward); } else { entry.Reward = reward; } }
public static RewardTableEntry findRewardTableEntry(RewardTableEntry root, int input) { Boolean found = false; RewardTableEntry temp = root; while ((!found) && (temp != null)) { found = (temp.Input == input); if (!found) temp = temp.next; } return temp; }
public static RewardTableEntry findRewardTableEntry(RewardTableEntry root, int input) { Boolean found = false; RewardTableEntry temp = root; while ((!found) && (temp != null)) { found = (temp.Input == input); if (!found) { temp = temp.next; } } return(temp); }
public static double getReward(RewardTableEntry root, int input) { RewardTableEntry entry = findRewardTableEntry(root, input); double reward; if (entry != null) { reward = entry.Reward; } else { reward = 0; } return(reward); }
// return the maximum reward in the Reward Table for a given input /* public double getMaximumQValue(int inputindex) * { * double maxQ = QTable[inputindex][0]; * int maxQindex = 0; * * int i; * int possibleoutputs = (int)Math.Pow(2, stann.OutputNum); * * for (i = 0; i < possibleoutputs; i++) * if (maxQ < QTable[inputindex][i]) * { * maxQ = QTable[inputindex][i]; * maxQindex = i; * } * return QTable[inputindex][maxQindex]; * }*/ // given the last input(previousInputVec) caused PreviousOutput(which led to a satisfactory result), //we may assign a reward to the new state that came up, thus backtracking and updating the rewards for // a finite number of steps (input-output) that superceeded this succesfull ending /* * public void assignReward(double[] currentinputvec, double reward) * { * int previousinput = 0, currentinput = 0, i; * for (i = 0; i < InputNum; i++) { * previousinput += (int)(Math.Pow(InputRange, i) * PreviousInputVec[i]); * currentinput += (int)(Math.Pow(InputRange, i) * currentinputvec[i]); * } * * RewardTable[currentinput] = reward; * * // Updating the rewards in the Reward table using the log entry * double currentStepReward = reward; * int tempinput = currentinput; * * for (i = IOLogLength - 1; i >= 0; i--) * { * // updating the q-value for the input-output log entry (in three lines) * QTable[IOLog[i].input][IOLog[i].output] = (1 - Qalpha) * QTable[IOLog[i].input][IOLog[i].output]; * QTable[IOLog[i].input][IOLog[i].output] += Qalpha * RewardTable[tempinput]; * QTable[IOLog[i].input][IOLog[i].output] += Qalpha * Qgamma * getMaximumQValue(tempinput); * // Q-value of the entry updated * tempinput = IOLog[i].input; * } * * // clearing the IO Log to avoid re-assigning Q values on the same chain of actions when a new reward appears * clearIOLog(); * } */ public void assignReward(double[] currentinputvec, double reward) { int i; // mapping previous and current input vectors to integers int previousinput = STANN.mapVector2Int(PreviousInputVec, InputRange, InputNum); int currentinput = STANN.mapVector2Int(currentinputvec, InputRange, InputNum); // adding a reward entry for the current input RewardTableEntry.updateRewardEntry(ref RewardTable, currentinput, reward); // Updating the Q - values in the Q table using the existing log entries double currentStepReward = reward; int tempinput = currentinput; for (i = IOLogLength - 1; i >= 0; i--) { // retrieving the Q -table entry for the current input in the log QTableEntry entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output); if (entry == null) { QTableEntry.assignQValue(ref QTable, IOLog[i].input, IOLog[i].output, 0); entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output); } else { entry.Frequency++; } // The Non-Deterministic MDP coefficient double NDPCoefficient = 1.0 / (1.0 + 1.0 * entry.Frequency); double qvalue = entry.QValue; // updating the q-value for the input-output log entry (in three lines) qvalue = NDPCoefficient * (1 - Qalpha) * qvalue; qvalue += NDPCoefficient * Qalpha * RewardTableEntry.getReward(RewardTable, tempinput); qvalue += NDPCoefficient * Qalpha * Qgamma * QTableEntry.getMaxQValue(QTable, tempinput).QValue; entry.QValue = qvalue; // Q-value of the entry updated tempinput = IOLog[i].input; } }
public static RewardTableEntry addRewardTableEntry(RewardTableEntry root, int input, double reward) { RewardTableEntry temp = root; RewardTableEntry newroot; if (temp == null) { temp = new RewardTableEntry(input, reward); newroot = temp; } else { newroot = root; while (temp.next != null) temp = temp.next; temp.next = new RewardTableEntry(input, reward); } return newroot; }
public static RewardTableEntry addRewardTableEntry(RewardTableEntry root, int input, double reward) { RewardTableEntry temp = root; RewardTableEntry newroot; if (temp == null) { temp = new RewardTableEntry(input, reward); newroot = temp; } else { newroot = root; while (temp.next != null) { temp = temp.next; } temp.next = new RewardTableEntry(input, reward); } return(newroot); }
public MetaNode(int rawinputnum, MetaNode[] children, int childrennum, MetaNode[] parents, int parentnum, int nodeoutputnum, int rawinputrange, int layernum, int neuronnum, double threshold, double lr, Random rand, Boolean forceSelfTrain, Boolean forcedQLearning, int nodelevel, double alpha, double gamma, int leafindex) { int i; RawInputNum = rawinputnum; ChildrenNum = childrennum; LayerNum = layernum; NeuronNum = neuronnum; Threshold = threshold; LR = lr; rnd = rand; ParentNum = parentnum; ForcedSelfTrain = forceSelfTrain; ForcedQLearning = forcedQLearning; NodeLevel = nodelevel; Qalpha = alpha; Qgamma = gamma; LeafIndex = leafindex; // copying children array. also figuring out the input range for the stann int maxrange = rawinputrange; Children = new MetaNode[ChildrenNum]; for (i = 0; i < ChildrenNum; i++) { Children[i] = children[i]; Children[i].addParent(this); maxrange = (maxrange < Math.Pow(2, Children[i].stann.OutputNum)) ? (int)Math.Pow(2, Children[i].stann.OutputNum) : maxrange; } InputRange = maxrange; // copying parent array for (i = 0; i < ParentNum; i++) { Parents[i] = parents[i]; } InputNum = getInputNum(); // now creating the STANN or the ANN of the node stann = new STANN(InputNum, InputRange, LayerNum, NeuronNum, nodeoutputnum, Threshold, LR, rnd, ForcedSelfTrain); // initializing previous input vector and previous output properties to zero CurrentOutput = PreviousOutput = 0; OutputPass = 0; PreviousInputVec = new double[InputNum]; for (i = 0; i < InputNum; i++) { PreviousInputVec[i] = 0; } // initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :)) // if the ForcedQLearning Property is set if (ForcedQLearning) {/* * int possibleinputs = (int)Math.Pow(InputRange, InputNum); * int possibleoutputs = (int)Math.Pow(2, stann.OutputNum); * * QTable = new double[possibleinputs][]; * RewardTable = new double[possibleinputs]; * for (i = 0; i < possibleinputs; i++) * { * QTable[i] = new double[possibleoutputs]; * RewardTable[i] = 0; * for (j = 0; j < possibleoutputs; j++) * QTable[i][j] = 0; * } */ RewardTable = null; QTable = null; // initializing the IO log IOLog = new NodeIOLogEntry[MAX_IO_LOG_LENGTH]; IOLogLength = 0; } }
// adds a child to the node. the STANN MUST be recreated (due to change on the inputs) public void addChild(MetaNode child) { int i; MetaNode[] temp; if (ChildrenNum == 0) { // increase the number of children ChildrenNum++; // increase the number of inputs as well! InputNum++; Children = new MetaNode[ChildrenNum]; } else { temp = new MetaNode[ChildrenNum]; for (i = 0; i < ChildrenNum; i++) { temp[i] = Children[i]; } // increase number of children ChildrenNum++; // increase number of inputs as well! InputNum++; Children = new MetaNode[ChildrenNum]; for (i = 0; i < ChildrenNum - 1; i++) { Children[i] = temp[i]; } } Children[ChildrenNum - 1] = child; int newinputrange, curoutputnum = stann.OutputNum; if (stann.InputRange < (int)Math.Pow(2, child.stann.OutputNum)) { newinputrange = (int)Math.Pow(2, child.stann.OutputNum); InputRange = newinputrange; } else { newinputrange = stann.InputRange; InputRange = stann.InputRange; } // recreating the STANN stann = new STANN(getInputNum(), newinputrange, LayerNum, NeuronNum, curoutputnum, Threshold, LR, rnd, ForcedSelfTrain); if (ForcedQLearning) { // Re-initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :)) /* * int possibleinputs = (int)Math.Pow(InputRange, InputNum); * int possibleoutputs = (int)Math.Pow(2, stann.OutputNum); * * QTable = new double[possibleinputs][]; * RewardTable = new double[possibleinputs]; * for (i = 0; i < possibleinputs; i++) * { * QTable[i] = new double[possibleoutputs]; * RewardTable[i] = 0; * for (j = 0; j < possibleoutputs; j++) * QTable[i][j] = 0; * } */ RewardTable = null; QTable = null; } // Re-creating the previous input vector PreviousInputVec = new double[InputNum]; for (i = 0; i < InputNum; i++) { PreviousInputVec[i] = 0; } }
public MetaNode(int rawinputnum, MetaNode[] children, int childrennum, MetaNode[] parents, int parentnum, int nodeoutputnum, int rawinputrange, int layernum, int neuronnum, double threshold, double lr, Random rand, Boolean forceSelfTrain, Boolean forcedQLearning, int nodelevel, double alpha, double gamma, int leafindex) { int i; RawInputNum = rawinputnum; ChildrenNum = childrennum; LayerNum = layernum; NeuronNum = neuronnum; Threshold = threshold; LR = lr; rnd = rand; ParentNum = parentnum; ForcedSelfTrain = forceSelfTrain; ForcedQLearning = forcedQLearning; NodeLevel = nodelevel; Qalpha = alpha; Qgamma = gamma; LeafIndex = leafindex; // copying children array. also figuring out the input range for the stann int maxrange = rawinputrange; Children = new MetaNode[ChildrenNum]; for (i = 0; i < ChildrenNum; i++) { Children[i] = children[i]; Children[i].addParent(this); maxrange = (maxrange < Math.Pow(2, Children[i].stann.OutputNum)) ? (int)Math.Pow(2, Children[i].stann.OutputNum) : maxrange; } InputRange = maxrange; // copying parent array for (i = 0; i < ParentNum; i++) Parents[i] = parents[i]; InputNum = getInputNum(); // now creating the STANN or the ANN of the node stann = new STANN(InputNum, InputRange, LayerNum, NeuronNum, nodeoutputnum, Threshold, LR, rnd, ForcedSelfTrain); // initializing previous input vector and previous output properties to zero CurrentOutput = PreviousOutput = 0; OutputPass = 0; PreviousInputVec = new double[InputNum]; for (i = 0; i < InputNum; i++) PreviousInputVec[i] = 0; // initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :)) // if the ForcedQLearning Property is set if (ForcedQLearning) {/* int possibleinputs = (int)Math.Pow(InputRange, InputNum); int possibleoutputs = (int)Math.Pow(2, stann.OutputNum); QTable = new double[possibleinputs][]; RewardTable = new double[possibleinputs]; for (i = 0; i < possibleinputs; i++) { QTable[i] = new double[possibleoutputs]; RewardTable[i] = 0; for (j = 0; j < possibleoutputs; j++) QTable[i][j] = 0; } */ RewardTable = null; QTable = null; // initializing the IO log IOLog = new NodeIOLogEntry[MAX_IO_LOG_LENGTH]; IOLogLength = 0; } }
// adds a child to the node. the STANN MUST be recreated (due to change on the inputs) public void addChild(MetaNode child) { int i; MetaNode[] temp; if (ChildrenNum == 0) { // increase the number of children ChildrenNum++; // increase the number of inputs as well! InputNum++; Children = new MetaNode[ChildrenNum]; } else { temp = new MetaNode[ChildrenNum]; for (i = 0; i < ChildrenNum; i++) temp[i] = Children[i]; // increase number of children ChildrenNum++; // increase number of inputs as well! InputNum++; Children = new MetaNode[ChildrenNum]; for (i = 0; i < ChildrenNum - 1; i++) Children[i] = temp[i]; } Children[ChildrenNum - 1] = child; int newinputrange, curoutputnum = stann.OutputNum; if (stann.InputRange < (int)Math.Pow(2, child.stann.OutputNum)) { newinputrange = (int)Math.Pow(2, child.stann.OutputNum); InputRange = newinputrange; } else { newinputrange = stann.InputRange; InputRange = stann.InputRange; } // recreating the STANN stann = new STANN(getInputNum(), newinputrange, LayerNum, NeuronNum, curoutputnum, Threshold, LR, rnd, ForcedSelfTrain); if (ForcedQLearning) { // Re-initializing the Reward Table and table of Q-values for possible Q-learning use (or abuse :)) /* int possibleinputs = (int)Math.Pow(InputRange, InputNum); int possibleoutputs = (int)Math.Pow(2, stann.OutputNum); QTable = new double[possibleinputs][]; RewardTable = new double[possibleinputs]; for (i = 0; i < possibleinputs; i++) { QTable[i] = new double[possibleoutputs]; RewardTable[i] = 0; for (j = 0; j < possibleoutputs; j++) QTable[i][j] = 0; } */ RewardTable = null; QTable = null; } // Re-creating the previous input vector PreviousInputVec = new double[InputNum]; for (i = 0; i < InputNum; i++) PreviousInputVec[i] = 0; }