private double trainingStats(EvalParameters evalParams) { int numCorrect = 0; for (int ei = 0; ei < numUniqueEvents; ei++) { for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++) { double[] modelDistribution = new double[numOutcomes]; if (values != null) { PerceptronModel.eval(contexts[ei], values[ei], modelDistribution, evalParams, false); } else { PerceptronModel.eval(contexts[ei], null, modelDistribution, evalParams, false); } int max = maxIndex(modelDistribution); if (max == outcomeList[ei]) { numCorrect++; } } } double trainingAccuracy = (double)numCorrect / numEvents; display("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n"); return(trainingAccuracy); }
public virtual void nextIteration(int iteration) { iteration--; //move to 0-based index int numCorrect = 0; int oei = 0; int si = 0; IDictionary <string, float?>[] featureCounts = (IDictionary <string, float?>[]) new IDictionary[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { featureCounts[oi] = new Dictionary <string, float?>(); } PerceptronModel model = new PerceptronModel(parameters, predLabels, pmap, outcomeLabels); foreach (Sequence <Event> sequence in sequenceStream) { Event[] taggerEvents = sequenceStream.updateContext(sequence, model); Event[] events = sequence.Events; bool update = false; for (int ei = 0; ei < events.Length; ei++, oei++) { if (!taggerEvents[ei].Outcome.Equals(events[ei].Outcome)) { update = true; //break; } else { numCorrect++; } } if (update) { for (int oi = 0; oi < numOutcomes; oi++) { featureCounts[oi].Clear(); } //System.err.print("train:");for (int ei=0;ei<events.length;ei++) {System.err.print(" "+events[ei].getOutcome());} System.err.println(); //training feature count computation for (int ei = 0; ei < events.Length; ei++, oei++) { string[] contextStrings = events[ei].Context; float[] values = events[ei].Values; int oi = (int)omap[events[ei].Outcome]; for (int ci = 0; ci < contextStrings.Length; ci++) { float value = 1; if (values != null) { value = values[ci]; } float?c = featureCounts[oi][contextStrings[ci]]; if (c == null) { c = value; } else { c += value; } featureCounts[oi][contextStrings[ci]] = c; } } //evaluation feature count computation //System.err.print("test: ");for (int ei=0;ei<taggerEvents.length;ei++) {System.err.print(" "+taggerEvents[ei].getOutcome());} System.err.println(); foreach (Event taggerEvent in taggerEvents) { string[] contextStrings = taggerEvent.Context; float[] values = taggerEvent.Values; int oi = (int)omap[taggerEvent.Outcome]; for (int ci = 0; ci < contextStrings.Length; ci++) { float value = 1; if (values != null) { value = values[ci]; } float?c = featureCounts[oi][contextStrings[ci]]; if (c == null) { c = -1 * value; } else { c -= value; } if (c == 0f) { featureCounts[oi].Remove(contextStrings[ci]); } else { featureCounts[oi][contextStrings[ci]] = c; } } } for (int oi = 0; oi < numOutcomes; oi++) { foreach (string feature in featureCounts[oi].Keys) { int pi = pmap.get(feature); if (pi != -1) { //System.err.println(si+" "+outcomeLabels[oi]+" "+feature+" "+featureCounts[oi].get(feature)); parameters[pi].updateParameter(oi, (double)featureCounts[oi][feature]); if (useAverage) { if (updates[pi][oi][VALUE] != 0) { averageParams[pi].updateParameter(oi, updates[pi][oi][VALUE] * (numSequences * (iteration - updates[pi][oi][ITER]) + (si - updates[pi][oi][EVENT]))); //System.err.println("p avp["+pi+"]."+oi+"="+averageParams[pi].getParameters()[oi]); } //System.err.println("p updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iteration+","+oei+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]); updates[pi][oi][VALUE] = (int)parameters[pi].Parameters[oi]; updates[pi][oi][ITER] = iteration; updates[pi][oi][EVENT] = si; } } } } model = new PerceptronModel(parameters, predLabels, pmap, outcomeLabels); } si++; } //finish average computation double totIterations = (double)iterations * si; if (useAverage && iteration == iterations - 1) { for (int pi = 0; pi < numPreds; pi++) { double[] predParams = averageParams[pi].Parameters; for (int oi = 0; oi < numOutcomes; oi++) { if (updates[pi][oi][VALUE] != 0) { predParams[oi] += updates[pi][oi][VALUE] * (numSequences * (iterations - updates[pi][oi][ITER]) - updates[pi][oi][EVENT]); } if (predParams[oi] != 0) { predParams[oi] /= totIterations; averageParams[pi].setParameter(oi, predParams[oi]); //System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iterations+","+0+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]); } } } } display(". (" + numCorrect + "/" + numEvents + ") " + ((double)numCorrect / numEvents) + "\n"); }
private MutableContext[] findParameters(int iterations, bool useAverage) { display("Performing " + iterations + " iterations.\n"); int[] allOutcomesPattern = new int[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } /// <summary> /// Stores the estimated parameter value of each predicate during iteration. </summary> MutableContext[] parameters = new MutableContext[numPreds]; for (int pi = 0; pi < numPreds; pi++) { parameters[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (int aoi = 0; aoi < numOutcomes; aoi++) { parameters[pi].setParameter(aoi, 0.0); } } EvalParameters evalParams = new EvalParameters(parameters, numOutcomes); /// <summary> /// Stores the sum of parameter values of each predicate over many iterations. </summary> MutableContext[] summedParams = new MutableContext[numPreds]; if (useAverage) { for (int pi = 0; pi < numPreds; pi++) { summedParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].setParameter(aoi, 0.0); } } } // Keep track of the previous three accuracies. The difference of // the mean of these and the current training set accuracy is used // with tolerance to decide whether to stop. double prevAccuracy1 = 0.0; double prevAccuracy2 = 0.0; double prevAccuracy3 = 0.0; // A counter for the denominator for averaging. int numTimesSummed = 0; double stepsize = 1; for (int i = 1; i <= iterations; i++) { // Decrease the stepsize by a small amount. if (stepSizeDecrease != null) { stepsize *= 1 - stepSizeDecrease.GetValueOrDefault(); } displayIteration(i); int numCorrect = 0; for (int ei = 0; ei < numUniqueEvents; ei++) { int targetOutcome = outcomeList[ei]; for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++) { // Compute the model's prediction according to the current parameters. double[] modelDistribution = new double[numOutcomes]; if (values != null) { PerceptronModel.eval(contexts[ei], values[ei], modelDistribution, evalParams, false); } else { PerceptronModel.eval(contexts[ei], null, modelDistribution, evalParams, false); } int maxOutcome = maxIndex(modelDistribution); // If the predicted outcome is different from the target // outcome, do the standard update: boost the parameters // associated with the target and reduce those associated // with the incorrect predicted outcome. if (maxOutcome != targetOutcome) { for (int ci = 0; ci < contexts[ei].Length; ci++) { int pi = contexts[ei][ci]; if (values == null) { parameters[pi].updateParameter(targetOutcome, stepsize); parameters[pi].updateParameter(maxOutcome, -stepsize); } else { parameters[pi].updateParameter(targetOutcome, stepsize * values[ei][ci]); parameters[pi].updateParameter(maxOutcome, -stepsize * values[ei][ci]); } } } // Update the counts for accuracy. if (maxOutcome == targetOutcome) { numCorrect++; } } } // Calculate the training accuracy and display. double trainingAccuracy = (double)numCorrect / numEvents; if (i < 10 || (i % 10) == 0) { display(". (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n"); } // TODO: Make averaging configurable !!! bool doAveraging; if (useAverage && useSkippedlAveraging && (i < 20 || isPerfectSquare(i))) { doAveraging = true; } else if (useAverage) { doAveraging = true; } else { doAveraging = false; } if (doAveraging) { numTimesSummed++; for (int pi = 0; pi < numPreds; pi++) { for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].updateParameter(aoi, parameters[pi].Parameters[aoi]); } } } // If the tolerance is greater than the difference between the // current training accuracy and all of the previous three // training accuracies, stop training. if (Math.Abs(prevAccuracy1 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy2 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy3 - trainingAccuracy) < tolerance) { display("Stopping: change in training set accuracy less than " + tolerance + "\n"); break; } // Update the previous training accuracies. prevAccuracy1 = prevAccuracy2; prevAccuracy2 = prevAccuracy3; prevAccuracy3 = trainingAccuracy; } // Output the final training stats. trainingStats(evalParams); // Create averaged parameters if (useAverage) { for (int pi = 0; pi < numPreds; pi++) { for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].setParameter(aoi, summedParams[pi].Parameters[aoi] / numTimesSummed); } } return(summedParams); } else { return(parameters); } }