Exemple #1
0
        private double trainingStats(EvalParameters evalParams)
        {
            int numCorrect = 0;

            for (int ei = 0; ei < numUniqueEvents; ei++)
            {
                for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++)
                {
                    double[] modelDistribution = new double[numOutcomes];

                    if (values != null)
                    {
                        PerceptronModel.eval(contexts[ei], values[ei], modelDistribution, evalParams, false);
                    }
                    else
                    {
                        PerceptronModel.eval(contexts[ei], null, modelDistribution, evalParams, false);
                    }

                    int max = maxIndex(modelDistribution);
                    if (max == outcomeList[ei])
                    {
                        numCorrect++;
                    }
                }
            }
            double trainingAccuracy = (double)numCorrect / numEvents;

            display("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n");
            return(trainingAccuracy);
        }
        public virtual void nextIteration(int iteration)
        {
            iteration--; //move to 0-based index
            int numCorrect = 0;
            int oei        = 0;
            int si         = 0;

            IDictionary <string, float?>[] featureCounts = (IDictionary <string, float?>[]) new IDictionary[numOutcomes];
            for (int oi = 0; oi < numOutcomes; oi++)
            {
                featureCounts[oi] = new Dictionary <string, float?>();
            }
            PerceptronModel model = new PerceptronModel(parameters, predLabels, pmap, outcomeLabels);

            foreach (Sequence <Event> sequence in sequenceStream)
            {
                Event[] taggerEvents = sequenceStream.updateContext(sequence, model);
                Event[] events       = sequence.Events;
                bool    update       = false;
                for (int ei = 0; ei < events.Length; ei++, oei++)
                {
                    if (!taggerEvents[ei].Outcome.Equals(events[ei].Outcome))
                    {
                        update = true;
                        //break;
                    }
                    else
                    {
                        numCorrect++;
                    }
                }
                if (update)
                {
                    for (int oi = 0; oi < numOutcomes; oi++)
                    {
                        featureCounts[oi].Clear();
                    }
                    //System.err.print("train:");for (int ei=0;ei<events.length;ei++) {System.err.print(" "+events[ei].getOutcome());} System.err.println();
                    //training feature count computation
                    for (int ei = 0; ei < events.Length; ei++, oei++)
                    {
                        string[] contextStrings = events[ei].Context;
                        float[]  values         = events[ei].Values;
                        int      oi             = (int)omap[events[ei].Outcome];
                        for (int ci = 0; ci < contextStrings.Length; ci++)
                        {
                            float value = 1;
                            if (values != null)
                            {
                                value = values[ci];
                            }
                            float?c = featureCounts[oi][contextStrings[ci]];
                            if (c == null)
                            {
                                c = value;
                            }
                            else
                            {
                                c += value;
                            }
                            featureCounts[oi][contextStrings[ci]] = c;
                        }
                    }
                    //evaluation feature count computation
                    //System.err.print("test: ");for (int ei=0;ei<taggerEvents.length;ei++) {System.err.print(" "+taggerEvents[ei].getOutcome());} System.err.println();
                    foreach (Event taggerEvent in taggerEvents)
                    {
                        string[] contextStrings = taggerEvent.Context;
                        float[]  values         = taggerEvent.Values;
                        int      oi             = (int)omap[taggerEvent.Outcome];
                        for (int ci = 0; ci < contextStrings.Length; ci++)
                        {
                            float value = 1;
                            if (values != null)
                            {
                                value = values[ci];
                            }
                            float?c = featureCounts[oi][contextStrings[ci]];
                            if (c == null)
                            {
                                c = -1 * value;
                            }
                            else
                            {
                                c -= value;
                            }
                            if (c == 0f)
                            {
                                featureCounts[oi].Remove(contextStrings[ci]);
                            }
                            else
                            {
                                featureCounts[oi][contextStrings[ci]] = c;
                            }
                        }
                    }
                    for (int oi = 0; oi < numOutcomes; oi++)
                    {
                        foreach (string feature in featureCounts[oi].Keys)
                        {
                            int pi = pmap.get(feature);
                            if (pi != -1)
                            {
                                //System.err.println(si+" "+outcomeLabels[oi]+" "+feature+" "+featureCounts[oi].get(feature));
                                parameters[pi].updateParameter(oi, (double)featureCounts[oi][feature]);
                                if (useAverage)
                                {
                                    if (updates[pi][oi][VALUE] != 0)
                                    {
                                        averageParams[pi].updateParameter(oi,
                                                                          updates[pi][oi][VALUE] *
                                                                          (numSequences * (iteration - updates[pi][oi][ITER]) +
                                                                           (si - updates[pi][oi][EVENT])));
                                        //System.err.println("p avp["+pi+"]."+oi+"="+averageParams[pi].getParameters()[oi]);
                                    }
                                    //System.err.println("p updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iteration+","+oei+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]);
                                    updates[pi][oi][VALUE] = (int)parameters[pi].Parameters[oi];
                                    updates[pi][oi][ITER]  = iteration;
                                    updates[pi][oi][EVENT] = si;
                                }
                            }
                        }
                    }
                    model = new PerceptronModel(parameters, predLabels, pmap, outcomeLabels);
                }
                si++;
            }
            //finish average computation
            double totIterations = (double)iterations * si;

            if (useAverage && iteration == iterations - 1)
            {
                for (int pi = 0; pi < numPreds; pi++)
                {
                    double[] predParams = averageParams[pi].Parameters;
                    for (int oi = 0; oi < numOutcomes; oi++)
                    {
                        if (updates[pi][oi][VALUE] != 0)
                        {
                            predParams[oi] += updates[pi][oi][VALUE] *
                                              (numSequences * (iterations - updates[pi][oi][ITER]) -
                                               updates[pi][oi][EVENT]);
                        }
                        if (predParams[oi] != 0)
                        {
                            predParams[oi] /= totIterations;
                            averageParams[pi].setParameter(oi, predParams[oi]);
                            //System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iterations+","+0+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]);
                        }
                    }
                }
            }
            display(". (" + numCorrect + "/" + numEvents + ") " + ((double)numCorrect / numEvents) + "\n");
        }
Exemple #3
0
        private MutableContext[] findParameters(int iterations, bool useAverage)
        {
            display("Performing " + iterations + " iterations.\n");

            int[] allOutcomesPattern = new int[numOutcomes];
            for (int oi = 0; oi < numOutcomes; oi++)
            {
                allOutcomesPattern[oi] = oi;
            }

            /// <summary>
            /// Stores the estimated parameter value of each predicate during iteration. </summary>
            MutableContext[] parameters = new MutableContext[numPreds];
            for (int pi = 0; pi < numPreds; pi++)
            {
                parameters[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]);
                for (int aoi = 0; aoi < numOutcomes; aoi++)
                {
                    parameters[pi].setParameter(aoi, 0.0);
                }
            }

            EvalParameters evalParams = new EvalParameters(parameters, numOutcomes);

            /// <summary>
            /// Stores the sum of parameter values of each predicate over many iterations. </summary>
            MutableContext[] summedParams = new MutableContext[numPreds];
            if (useAverage)
            {
                for (int pi = 0; pi < numPreds; pi++)
                {
                    summedParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]);
                    for (int aoi = 0; aoi < numOutcomes; aoi++)
                    {
                        summedParams[pi].setParameter(aoi, 0.0);
                    }
                }
            }

            // Keep track of the previous three accuracies. The difference of
            // the mean of these and the current training set accuracy is used
            // with tolerance to decide whether to stop.
            double prevAccuracy1 = 0.0;
            double prevAccuracy2 = 0.0;
            double prevAccuracy3 = 0.0;

            // A counter for the denominator for averaging.
            int numTimesSummed = 0;

            double stepsize = 1;

            for (int i = 1; i <= iterations; i++)
            {
                // Decrease the stepsize by a small amount.
                if (stepSizeDecrease != null)
                {
                    stepsize *= 1 - stepSizeDecrease.GetValueOrDefault();
                }

                displayIteration(i);

                int numCorrect = 0;

                for (int ei = 0; ei < numUniqueEvents; ei++)
                {
                    int targetOutcome = outcomeList[ei];

                    for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++)
                    {
                        // Compute the model's prediction according to the current parameters.
                        double[] modelDistribution = new double[numOutcomes];
                        if (values != null)
                        {
                            PerceptronModel.eval(contexts[ei], values[ei], modelDistribution, evalParams, false);
                        }
                        else
                        {
                            PerceptronModel.eval(contexts[ei], null, modelDistribution, evalParams, false);
                        }

                        int maxOutcome = maxIndex(modelDistribution);

                        // If the predicted outcome is different from the target
                        // outcome, do the standard update: boost the parameters
                        // associated with the target and reduce those associated
                        // with the incorrect predicted outcome.
                        if (maxOutcome != targetOutcome)
                        {
                            for (int ci = 0; ci < contexts[ei].Length; ci++)
                            {
                                int pi = contexts[ei][ci];
                                if (values == null)
                                {
                                    parameters[pi].updateParameter(targetOutcome, stepsize);
                                    parameters[pi].updateParameter(maxOutcome, -stepsize);
                                }
                                else
                                {
                                    parameters[pi].updateParameter(targetOutcome, stepsize * values[ei][ci]);
                                    parameters[pi].updateParameter(maxOutcome, -stepsize * values[ei][ci]);
                                }
                            }
                        }

                        // Update the counts for accuracy.
                        if (maxOutcome == targetOutcome)
                        {
                            numCorrect++;
                        }
                    }
                }

                // Calculate the training accuracy and display.
                double trainingAccuracy = (double)numCorrect / numEvents;
                if (i < 10 || (i % 10) == 0)
                {
                    display(". (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n");
                }

                // TODO: Make averaging configurable !!!

                bool doAveraging;

                if (useAverage && useSkippedlAveraging && (i < 20 || isPerfectSquare(i)))
                {
                    doAveraging = true;
                }
                else if (useAverage)
                {
                    doAveraging = true;
                }
                else
                {
                    doAveraging = false;
                }

                if (doAveraging)
                {
                    numTimesSummed++;
                    for (int pi = 0; pi < numPreds; pi++)
                    {
                        for (int aoi = 0; aoi < numOutcomes; aoi++)
                        {
                            summedParams[pi].updateParameter(aoi, parameters[pi].Parameters[aoi]);
                        }
                    }
                }

                // If the tolerance is greater than the difference between the
                // current training accuracy and all of the previous three
                // training accuracies, stop training.
                if (Math.Abs(prevAccuracy1 - trainingAccuracy) < tolerance &&
                    Math.Abs(prevAccuracy2 - trainingAccuracy) < tolerance &&
                    Math.Abs(prevAccuracy3 - trainingAccuracy) < tolerance)
                {
                    display("Stopping: change in training set accuracy less than " + tolerance + "\n");
                    break;
                }

                // Update the previous training accuracies.
                prevAccuracy1 = prevAccuracy2;
                prevAccuracy2 = prevAccuracy3;
                prevAccuracy3 = trainingAccuracy;
            }

            // Output the final training stats.
            trainingStats(evalParams);

            // Create averaged parameters
            if (useAverage)
            {
                for (int pi = 0; pi < numPreds; pi++)
                {
                    for (int aoi = 0; aoi < numOutcomes; aoi++)
                    {
                        summedParams[pi].setParameter(aoi, summedParams[pi].Parameters[aoi] / numTimesSummed);
                    }
                }

                return(summedParams);
            }
            else
            {
                return(parameters);
            }
        }