public static double getOutputNoUpdate(MetaNode mnet, double[][] inputvec, int pass) { int i; double[] sigmoids; double theoutput; if (mnet.ChildrenNum == 0) { if (mnet.OutputPass != pass) { mnet.OutputPass = pass; // self training if the node has its ForceSelfTrain attribute set to true // retrieving the sigmoids of the node sigmoids = mnet.stann.sigmoidLayerOutputs(inputvec[mnet.LeafIndex], mnet.stann.LayerNum - 1); // calculating the decimal equivalent to the ordered thresholded sigmoid outputs theoutput = STANN.mapVector2Int(sigmoids, 2, mnet.stann.OutputNum); } else { theoutput = mnet.NoUpdatePassOutput; } } else { if (mnet.OutputPass != pass) { mnet.OutputPass = pass; double[] levelinput = new double[mnet.InputNum]; for (i = 0; i < mnet.InputNum; i++) { if (mnet.Children[i].NodeLevel >= mnet.NodeLevel) // requesting output from a higher (or equal ) level node { levelinput[i] = mnet.Children[i].PreviousOutput; // avoiding circular reference in recursion } else { levelinput[i] = getOutputNoUpdate(mnet.Children[i], inputvec, pass); } } // retrieving sigmoids sigmoids = mnet.stann.sigmoidLayerOutputs(levelinput, mnet.stann.LayerNum - 1); // calculating the decimal equivalent to the thresholded outputs theoutput = STANN.mapVector2Int(sigmoids, 2, mnet.stann.OutputNum); } else { theoutput = mnet.CurrentOutput; } } return(theoutput); }
// return the maximum reward in the Reward Table for a given input /* public double getMaximumQValue(int inputindex) * { * double maxQ = QTable[inputindex][0]; * int maxQindex = 0; * * int i; * int possibleoutputs = (int)Math.Pow(2, stann.OutputNum); * * for (i = 0; i < possibleoutputs; i++) * if (maxQ < QTable[inputindex][i]) * { * maxQ = QTable[inputindex][i]; * maxQindex = i; * } * return QTable[inputindex][maxQindex]; * }*/ // given the last input(previousInputVec) caused PreviousOutput(which led to a satisfactory result), //we may assign a reward to the new state that came up, thus backtracking and updating the rewards for // a finite number of steps (input-output) that superceeded this succesfull ending /* * public void assignReward(double[] currentinputvec, double reward) * { * int previousinput = 0, currentinput = 0, i; * for (i = 0; i < InputNum; i++) { * previousinput += (int)(Math.Pow(InputRange, i) * PreviousInputVec[i]); * currentinput += (int)(Math.Pow(InputRange, i) * currentinputvec[i]); * } * * RewardTable[currentinput] = reward; * * // Updating the rewards in the Reward table using the log entry * double currentStepReward = reward; * int tempinput = currentinput; * * for (i = IOLogLength - 1; i >= 0; i--) * { * // updating the q-value for the input-output log entry (in three lines) * QTable[IOLog[i].input][IOLog[i].output] = (1 - Qalpha) * QTable[IOLog[i].input][IOLog[i].output]; * QTable[IOLog[i].input][IOLog[i].output] += Qalpha * RewardTable[tempinput]; * QTable[IOLog[i].input][IOLog[i].output] += Qalpha * Qgamma * getMaximumQValue(tempinput); * // Q-value of the entry updated * tempinput = IOLog[i].input; * } * * // clearing the IO Log to avoid re-assigning Q values on the same chain of actions when a new reward appears * clearIOLog(); * } */ public void assignReward(double[] currentinputvec, double reward) { int i; // mapping previous and current input vectors to integers int previousinput = STANN.mapVector2Int(PreviousInputVec, InputRange, InputNum); int currentinput = STANN.mapVector2Int(currentinputvec, InputRange, InputNum); // adding a reward entry for the current input RewardTableEntry.updateRewardEntry(ref RewardTable, currentinput, reward); // Updating the Q - values in the Q table using the existing log entries double currentStepReward = reward; int tempinput = currentinput; for (i = IOLogLength - 1; i >= 0; i--) { // retrieving the Q -table entry for the current input in the log QTableEntry entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output); if (entry == null) { QTableEntry.assignQValue(ref QTable, IOLog[i].input, IOLog[i].output, 0); entry = QTableEntry.findQTableEntry(QTable, IOLog[i].input, IOLog[i].output); } else { entry.Frequency++; } // The Non-Deterministic MDP coefficient double NDPCoefficient = 1.0 / (1.0 + 1.0 * entry.Frequency); double qvalue = entry.QValue; // updating the q-value for the input-output log entry (in three lines) qvalue = NDPCoefficient * (1 - Qalpha) * qvalue; qvalue += NDPCoefficient * Qalpha * RewardTableEntry.getReward(RewardTable, tempinput); qvalue += NDPCoefficient * Qalpha * Qgamma * QTableEntry.getMaxQValue(QTable, tempinput).QValue; entry.QValue = qvalue; // Q-value of the entry updated tempinput = IOLog[i].input; } }
// return the output of a MetaNode branch given the input vector to the leaves /* * public static double getOutput(MetaNode mnet, double[][] inputvec, int pass) * { * int i; * double[] sigmoids; * double theoutput; * * if (mnet.ChildrenNum == 0) * { * if (mnet.OutputPass != pass) * { * mnet.OutputPass = pass; * // self training if the node has its ForceSelfTrain attribute set to true * if (mnet.ForcedSelfTrain) * // self training * mnet.stann.selfTrain(inputvec[mnet.LeafIndex]); * * // retrieving the sigmoids of the node * sigmoids = mnet.stann.sigmoidLayerOutputs(inputvec[mnet.LeafIndex], mnet.stann.LayerNum - 1); * // calculating the decimal equivalent to the ordered thresholded sigmoid outputs * theoutput = STANN.mapVector2Int(sigmoids, 2, mnet.stann.OutputNum); * * mnet.PreviousOutput = mnet.CurrentOutput; * mnet.CurrentOutput = theoutput; * } * else * theoutput = mnet.CurrentOutput; * * } * else * { * if (mnet.OutputPass != pass) * { * mnet.OutputPass = pass; * double[] levelinput = new double[mnet.InputNum]; * * for (i = 0; i < mnet.InputNum; i++) * if (mnet.Children[i].NodeLevel >= mnet.NodeLevel) // requesting output from a higher (or equal ) level node * levelinput[i] = mnet.Children[i].PreviousOutput; // avoiding circular reference in recursion * else * levelinput[i] = getOutput(mnet.Children[i], inputvec, pass); * * // self training if the aselfrain attribute is on * if (mnet.ForcedSelfTrain) mnet.stann.selfTrain(levelinput); * // retrieving sigmoids * sigmoids = mnet.stann.sigmoidLayerOutputs(levelinput, mnet.stann.LayerNum - 1); * // calculating the decimal equivalent to the thresholded outputs * theoutput = 0; * for (i = 0; i < mnet.stann.OutputNum; i++) * { * int bit = (sigmoids[i] < 0.5) ? 0 : 1; * theoutput += (int)Math.Pow(2, i) * bit; * } * // updating previous Input Vector and Previous Output properties of the metanode * int t; * mnet.PreviousInputVec = new double[mnet.InputNum]; * for (t = 0; t < mnet.InputNum; t++) * mnet.PreviousInputVec[t] = levelinput[t]; * mnet.PreviousOutput = mnet.CurrentOutput; * mnet.CurrentOutput = theoutput; * // previous input vector and output updated with the new values * * // Must now train the network!!!! (in case the qlearning property is on) * if (mnet.ForcedQLearning) * { * // mapping the input to the proper index in the reward table * int inputindex = 0; * for (t = 0; t < mnet.InputNum; t++) * inputindex += (int)(Math.Pow(mnet.InputRange, t) * levelinput[t]); * // finding the output that corresponds to the maximum Qvaluefor the given input * double maxQvalue = mnet.getMaximumQValue(inputindex); * int maxQvalueOutputindex = 0; * while (mnet.QTable[inputindex][maxQvalueOutputindex] != maxQvalue) * maxQvalueOutputindex++; * * // converting the maximum Q value output to a vector of binary digits * double[] desiredOutput = mnet.stann.int2BinaryVector(maxQvalueOutputindex); * // now training... * mnet.stann.backPropagate(levelinput, desiredOutput); * // updating the IO log * if (mnet.IOLogLength == MAX_IO_LOG_LENGTH) * { // IO Log is full * // clearing the log and starting all over again * mnet.IOLogLength = 1; * } * else * mnet.IOLogLength++; * // updating the IO log entries * mnet.IOLog[mnet.IOLogLength - 1].input = inputindex; * mnet.IOLog[mnet.IOLogLength - 1].output = (int)theoutput; * * } * } * else * theoutput = mnet.CurrentOutput; * * } * * return theoutput; * } */ public static double getOutput(MetaNode mnet, double[][] inputvec, int pass) { int i; double[] sigmoids; double theoutput; if (mnet.ChildrenNum == 0) { if (mnet.OutputPass != pass) { mnet.OutputPass = pass; // self training if the node has its ForceSelfTrain attribute set to true if (mnet.ForcedSelfTrain) { // self training mnet.stann.selfTrain(inputvec[mnet.LeafIndex]); } // retrieving the sigmoids of the node sigmoids = mnet.stann.sigmoidLayerOutputs(inputvec[mnet.LeafIndex], mnet.stann.LayerNum - 1); // calculating the decimal equivalent to the ordered thresholded sigmoid outputs theoutput = STANN.sigmoids2Int(sigmoids, mnet.stann.OutputNum); mnet.PreviousOutput = mnet.CurrentOutput; mnet.CurrentOutput = theoutput; } else { theoutput = mnet.CurrentOutput; } } else { if (mnet.OutputPass != pass) { mnet.OutputPass = pass; double[] levelinput = new double[mnet.InputNum]; for (i = 0; i < mnet.InputNum; i++) { if (mnet.Children[i].NodeLevel >= mnet.NodeLevel) // requesting output from a higher (or equal ) level node { levelinput[i] = mnet.Children[i].PreviousOutput; // avoiding circular reference in recursion } else { levelinput[i] = getOutput(mnet.Children[i], inputvec, pass); } } // self training if the aselfrain attribute is on if (mnet.ForcedSelfTrain) { mnet.stann.selfTrain(levelinput); } // retrieving sigmoids sigmoids = mnet.stann.sigmoidLayerOutputs(levelinput, mnet.stann.LayerNum - 1); // calculating the decimal equivalent to the thresholded outputs theoutput = STANN.mapVector2Int(sigmoids, 2, mnet.stann.OutputNum); // updating previous Input Vector and Previous Output properties of the metanode int t; mnet.PreviousInputVec = new double[mnet.InputNum]; for (t = 0; t < mnet.InputNum; t++) { mnet.PreviousInputVec[t] = levelinput[t]; } mnet.PreviousOutput = mnet.CurrentOutput; mnet.CurrentOutput = theoutput; // previous input vector and output updated with the new values // Must now train the network!!!! (in case the qlearning property is on) if (mnet.ForcedQLearning) { // mapping the input to the proper index in the reward table int inputindex = STANN.mapVector2Int(levelinput, mnet.InputRange, mnet.InputNum); // finding the output that corresponds to the maximum Qvaluefor the given input QTableEntry maxQvalueEntry = QTableEntry.getMaxQValue(mnet.QTable, inputindex); if (maxQvalueEntry != null) { // converting the maximum Q value output to a vector of binary digits double[] desiredOutput = STANN.mapInt2VectorDouble(maxQvalueEntry.Output, 2, mnet.stann.OutputNum); // now training... mnet.stann.backPropagate(levelinput, desiredOutput); } // updating the IO log if (mnet.IOLogLength == MAX_IO_LOG_LENGTH) { // IO Log is full // clearing the log and starting all over again mnet.IOLogLength = 1; } else { mnet.IOLogLength++; } // updating the IO log entries mnet.IOLog[mnet.IOLogLength - 1].input = inputindex; mnet.IOLog[mnet.IOLogLength - 1].output = (int)theoutput; } } else { theoutput = mnet.CurrentOutput; } } return(theoutput); }