private MutableContext[] FindParameters() { var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } /* Stores the estimated parameter value of each predicate during iteration. */ var parameters = new MutableContext[numPreds]; for (var pi = 0; pi < numPreds; pi++) { parameters[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (var aoi = 0; aoi < numOutcomes; aoi++) { parameters[pi].SetParameter(aoi, 0.0); } } // ReSharper disable once CoVariantArrayConversion var evalParams = new EvalParameters(parameters, numOutcomes); const double stepSize = 1; for (var ei = 0; ei < numUniqueEvents; ei++) { var targetOutcome = outcomeList[ei]; for (var ni = 0; ni < numTimesEventsSeen[ei]; ni++) { for (var ci = 0; ci < contexts[ei].Length; ci++) { var pi = contexts[ei][ci]; if (values == null) { parameters[pi].UpdateParameter(targetOutcome, stepSize); } else { parameters[pi].UpdateParameter(targetOutcome, stepSize * values[ei][ci]); } } } } // Output the final training stats. TrainingStats(evalParams); return(parameters); }
public virtual AbstractModel trainModel(int iterations, SequenceStream sequenceStream, int cutoff, bool useAverage) { this.iterations = iterations; this.sequenceStream = sequenceStream; DataIndexer di = new OnePassDataIndexer(new SequenceStreamEventStream(sequenceStream), cutoff, false); numSequences = 0; foreach (Sequence <Event> s in sequenceStream) { numSequences++; } outcomeList = di.OutcomeList; predLabels = di.PredLabels; pmap = new IndexHashTable <string>(predLabels, 0.7d); display("Incorporating indexed data for training... \n"); this.useAverage = useAverage; numEvents = di.NumEvents; this.iterations = iterations; outcomeLabels = di.OutcomeLabels; omap = new Dictionary <string, int?>(); for (int oli = 0; oli < outcomeLabels.Length; oli++) { omap[outcomeLabels[oli]] = oli; } outcomeList = di.OutcomeList; numPreds = predLabels.Length; numOutcomes = outcomeLabels.Length; if (useAverage) { updates = RectangularArrays.ReturnRectangularIntArray(numPreds, numOutcomes, 3); } display("done.\n"); display("\tNumber of Event Tokens: " + numEvents + "\n"); display("\t Number of Outcomes: " + numOutcomes + "\n"); display("\t Number of Predicates: " + numPreds + "\n"); parameters = new MutableContext[numPreds]; if (useAverage) { averageParams = new MutableContext[numPreds]; } allOutcomesPattern = new int[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } for (int pi = 0; pi < numPreds; pi++) { parameters[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); if (useAverage) { averageParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); } for (int aoi = 0; aoi < numOutcomes; aoi++) { parameters[pi].setParameter(aoi, 0.0); if (useAverage) { averageParams[pi].setParameter(aoi, 0.0); } } } modelDistribution = new double[numOutcomes]; display("Computing model parameters...\n"); findParameters(iterations); display("...done.\n"); /// <summary> ///************* Create and return the model ***************** </summary> string[] updatedPredLabels = predLabels; /* * String[] updatedPredLabels = new String[pmap.size()]; * for (String pred : pmap.keySet()) { * updatedPredLabels[pmap.get(pred)]=pred; * } */ if (useAverage) { return(new PerceptronModel(averageParams, updatedPredLabels, outcomeLabels)); } else { return(new PerceptronModel(parameters, updatedPredLabels, outcomeLabels)); } }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelPrior">The prior distribution used to train this model.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <param name="threads">The number of threads used to train this model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads) { if (threads <= 0) throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater."); modelExpects = new MutableContext[threads][]; info.Append("Trained using GIS algorithm.\n\n"); // Executes the data indexer di.Execute(); // Incorporate all of the needed info. Display("Incorporating indexed data for training..."); contexts = di.GetContexts(); values = di.Values; cutoff = modelCutoff; predicateCounts = di.GetPredCounts(); numTimesEventsSeen = di.GetNumTimesEventsSeen(); numUniqueEvents = contexts.Length; prior = modelPrior; // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { var cl = values[ci][0]; for (var vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } Display("done."); outcomeLabels = di.GetOutcomeLabels(); outcomeList = di.GetOutcomeList(); numOutcomes = outcomeLabels.Length; predLabels = di.GetPredLabels(); prior.SetLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; info.Append("Number of Event Tokens: {0}\n", numUniqueEvents); info.Append(" Number of Outcomes: {0}\n", numOutcomes); info.Append(" Number of Predicates: {0}\n", numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); // set up feature arrays //var predCount = new float[numPreds][numOutcomes]; var predCount = new float[numPreds][]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (predCount[contexts[ti][j]] == null) { predCount[contexts[ti][j]] = new float[numOutcomes]; } if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]*values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } // ReSharper disable once RedundantAssignment di = null; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is canceled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. param = new MutableContext[numPreds]; for (var i = 0; i < modelExpects.Length; i++) modelExpects[i] = new MutableContext[numPreds]; observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. // ReSharper disable once CoVariantArrayConversion evalParams = new EvalParameters(param, 0, 1, numOutcomes); var activeOutcomes = new int[numOutcomes]; var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } for (var pi = 0; pi < numPreds; pi++) { var numActiveOutcomes = 0; int[] outcomePattern; if (Smoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else { //determine active outcomes for (var oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); foreach (MutableContext[] me in modelExpects) me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { var oi = outcomePattern[aoi]; param[pi].SetParameter(aoi, 0.0); foreach (var modelExpect in modelExpects) { modelExpect[pi].SetParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].SetParameter(aoi, predCount[pi][oi]); } else if (Smoothing) { observedExpects[pi].SetParameter(aoi, SmoothingObservation); } } } Display("...done."); /***************** Find the parameters ************************/ if (threads == 1) Display("Computing model parameters ..."); else Display("Computing model parameters in " + threads + " threads..."); FindParameters(iterations, correctionConstant); /*************** Create and return the model ******************/ // To be compatible with old models the correction constant is always 1 // ReSharper disable once CoVariantArrayConversion return new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam) { info = TrainingInfo }; }
public AbstractModel TrainModel(int trainIterations, ISequenceStream trainStream, int cutoff, bool trainUseAverage) { iterations = trainIterations; useAverage = trainUseAverage; sequenceStream = trainStream; info.Append("Trained using Perceptron Sequence algorithm.\n\n"); var di = new OnePassDataIndexer(new SequenceStreamEventStream(trainStream), cutoff, false); trainStream.Reset(); numSequences = 0; while (trainStream.Read() != null) { numSequences++; } outcomeList = di.GetOutcomeList(); predLabels = di.GetPredLabels(); pMap = new IndexHashTable <string>(predLabels, 0.7d); // Incorporation indexed data for training... numEvents = di.GetNumEvents(); outcomeLabels = di.GetOutcomeLabels(); oMap = new Dictionary <string, int>(); for (var i = 0; i < outcomeLabels.Length; i++) { oMap.Add(outcomeLabels[i], i); } outcomeList = di.GetOutcomeList(); numPreds = predLabels.Length; numOutcomes = outcomeLabels.Length; if (trainUseAverage) { updates = new int[numPreds][][]; for (var i = 0; i < numPreds; i++) { updates[i] = new int[numOutcomes][]; for (var j = 0; j < numOutcomes; j++) { updates[i][j] = new int[3]; } } } // done. Display("done.\n"); info.Append("Number of Event Tokens: {0}\n" + " Number of Outcomes: {1}\n" + " Number of Predicates: {2}\n", numEvents, numOutcomes, numPreds); Display("\tNumber of Event Tokens: " + numEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); param = new MutableContext[numPreds]; if (trainUseAverage) { averageParams = new MutableContext[numPreds]; } allOutcomesPattern = new int[numOutcomes]; for (var i = 0; i < numOutcomes; i++) { allOutcomesPattern[i] = i; } for (var pi = 0; pi < numPreds; pi++) { param[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); if (trainUseAverage) { averageParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); } for (var aoi = 0; aoi < numOutcomes; aoi++) { param[pi].SetParameter(aoi, 0.0d); if (trainUseAverage) { averageParams[pi].SetParameter(aoi, 0.0d); } } } Display("Computing model parameters..."); FindParameters(); Display("...done."); /*************** Create and return the model ******************/ // ReSharper disable CoVariantArrayConversion if (trainUseAverage) { return new PerceptronModel(averageParams, predLabels, outcomeLabels) { info = info } } ; return(new PerceptronModel(param, predLabels, outcomeLabels) { info = info }); // ReSharper restore CoVariantArrayConversion }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelPrior">The prior distribution used to train this model.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <param name="threads">The number of threads used to train this model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads) { if (threads <= 0) { throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater."); } modelExpects = new MutableContext[threads][]; info.Append("Trained using GIS algorithm.\n\n"); // Executes the data indexer di.Execute(); // Incorporate all of the needed info. Display("Incorporating indexed data for training..."); contexts = di.GetContexts(); values = di.Values; cutoff = modelCutoff; predicateCounts = di.GetPredCounts(); numTimesEventsSeen = di.GetNumTimesEventsSeen(); numUniqueEvents = contexts.Length; prior = modelPrior; // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { var cl = values[ci][0]; for (var vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } Display("done."); outcomeLabels = di.GetOutcomeLabels(); outcomeList = di.GetOutcomeList(); numOutcomes = outcomeLabels.Length; predLabels = di.GetPredLabels(); prior.SetLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; info.Append("Number of Event Tokens: {0}\n", numUniqueEvents); info.Append(" Number of Outcomes: {0}\n", numOutcomes); info.Append(" Number of Predicates: {0}\n", numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); // set up feature arrays //var predCount = new float[numPreds][numOutcomes]; var predCount = new float[numPreds][]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (predCount[contexts[ti][j]] == null) { predCount[contexts[ti][j]] = new float[numOutcomes]; } if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti] * values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } // ReSharper disable once RedundantAssignment di = null; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is canceled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. param = new MutableContext[numPreds]; for (var i = 0; i < modelExpects.Length; i++) { modelExpects[i] = new MutableContext[numPreds]; } observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. // ReSharper disable once CoVariantArrayConversion evalParams = new EvalParameters(param, 0, 1, numOutcomes); var activeOutcomes = new int[numOutcomes]; var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } for (var pi = 0; pi < numPreds; pi++) { var numActiveOutcomes = 0; int[] outcomePattern; if (Smoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else { //determine active outcomes for (var oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); foreach (MutableContext[] me in modelExpects) { me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); } observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { var oi = outcomePattern[aoi]; param[pi].SetParameter(aoi, 0.0); foreach (var modelExpect in modelExpects) { modelExpect[pi].SetParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].SetParameter(aoi, predCount[pi][oi]); } else if (Smoothing) { observedExpects[pi].SetParameter(aoi, SmoothingObservation); } } } Display("...done."); /***************** Find the parameters ************************/ if (threads == 1) { Display("Computing model parameters ..."); } else { Display("Computing model parameters in " + threads + " threads..."); } FindParameters(iterations, correctionConstant); /*************** Create and return the model ******************/ // To be compatible with old models the correction constant is always 1 // ReSharper disable once CoVariantArrayConversion return(new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam) { info = TrainingInfo }); }
private MutableContext[] FindParameters(int iterations, bool useAverage) { info.Append(" Number of Iterations: {0}\n", iterations); Display("\nPerforming " + iterations + " iterations.\n"); var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } /* Stores the estimated parameter value of each predicate during iteration. */ var param = new MutableContext[numPreds]; for (var pi = 0; pi < numPreds; pi++) { param[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (var aoi = 0; aoi < numOutcomes; aoi++) { param[pi].SetParameter(aoi, 0.0); } } // ReSharper disable once CoVariantArrayConversion var evalParams = new EvalParameters(param, numOutcomes); // Stores the sum of parameter values of each predicate over many iterations. var summedParams = new MutableContext[numPreds]; if (useAverage) { for (var pi = 0; pi < numPreds; pi++) { summedParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (var aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].SetParameter(aoi, 0.0); } } } // Keep track of the previous three accuracies. The difference of // the mean of these and the current training set accuracy is used // with tolerance to decide whether to stop. var prevAccuracy1 = 0.0; var prevAccuracy2 = 0.0; var prevAccuracy3 = 0.0; // A counter for the denominator for averaging. var numTimesSummed = 0; double stepSize = 1; for (var i = 1; i <= iterations; i++) { // Decrease the step size by a small amount. if (stepSizeDecrease > 0) { stepSize *= 1 - stepSizeDecrease; } if (Monitor != null && Monitor.Token.CanBeCanceled) { Monitor.Token.ThrowIfCancellationRequested(); } var numCorrect = 0; for (var ei = 0; ei < numUniqueEvents; ei++) { var targetOutcome = outcomeList[ei]; for (var ni = 0; ni < numTimesEventsSeen[ei]; ni++) { // Compute the model's prediction according to the current parameters. var modelDistribution = new double[numOutcomes]; PerceptronModel.Eval( contexts[ei], values != null ? values[ei] : null, modelDistribution, evalParams, false); var maxOutcome = MaxIndex(modelDistribution); // If the predicted outcome is different from the target // outcome, do the standard update: boost the parameters // associated with the target and reduce those associated // with the incorrect predicted outcome. if (maxOutcome != targetOutcome) { for (var ci = 0; ci < contexts[ei].Length; ci++) { var pi = contexts[ei][ci]; if (values == null) { param[pi].UpdateParameter(targetOutcome, stepSize); param[pi].UpdateParameter(maxOutcome, -stepSize); } else { param[pi].UpdateParameter(targetOutcome, stepSize * values[ei][ci]); param[pi].UpdateParameter(maxOutcome, -stepSize * values[ei][ci]); } } } // Update the counts for accuracy. if (maxOutcome == targetOutcome) { numCorrect++; } } } // Calculate the training accuracy and display. var trainingAccuracy = (double)numCorrect / numEvents; Display($"{i,-4} {numCorrect} of {numEvents} - {trainingAccuracy}"); // TODO: Make averaging configurable !!! bool doAveraging; if (useAverage && UseSkippedAveraging && (i < 20 || IsPerfectSquare(i))) { doAveraging = true; } else if (useAverage) { doAveraging = true; } else { doAveraging = false; } if (doAveraging) { numTimesSummed++; for (var pi = 0; pi < numPreds; pi++) { for (var aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].UpdateParameter(aoi, param[pi].Parameters[aoi]); } } } // If the tolerance is greater than the difference between the // current training accuracy and all of the previous three // training accuracies, stop training. if (Math.Abs(prevAccuracy1 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy2 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy3 - trainingAccuracy) < tolerance) { Display("Stopping: change in training set accuracy less than " + tolerance + "\n"); break; } // Update the previous training accuracies. prevAccuracy1 = prevAccuracy2; prevAccuracy2 = prevAccuracy3; prevAccuracy3 = trainingAccuracy; } // Output the final training stats. TrainingStats(evalParams); if (!useAverage) { return(param); } if (numTimesSummed == 0) // Improbable but possible according to the Coverity. { numTimesSummed = 1; } // Create averaged parameters for (var pi = 0; pi < numPreds; pi++) { for (var aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].SetParameter(aoi, summedParams[pi].Parameters[aoi] / numTimesSummed); } } return(summedParams); }
private MutableContext[] findParameters(int iterations, bool useAverage) { display("Performing " + iterations + " iterations.\n"); int[] allOutcomesPattern = new int[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } /// <summary> /// Stores the estimated parameter value of each predicate during iteration. </summary> MutableContext[] parameters = new MutableContext[numPreds]; for (int pi = 0; pi < numPreds; pi++) { parameters[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (int aoi = 0; aoi < numOutcomes; aoi++) { parameters[pi].setParameter(aoi, 0.0); } } EvalParameters evalParams = new EvalParameters(parameters, numOutcomes); /// <summary> /// Stores the sum of parameter values of each predicate over many iterations. </summary> MutableContext[] summedParams = new MutableContext[numPreds]; if (useAverage) { for (int pi = 0; pi < numPreds; pi++) { summedParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].setParameter(aoi, 0.0); } } } // Keep track of the previous three accuracies. The difference of // the mean of these and the current training set accuracy is used // with tolerance to decide whether to stop. double prevAccuracy1 = 0.0; double prevAccuracy2 = 0.0; double prevAccuracy3 = 0.0; // A counter for the denominator for averaging. int numTimesSummed = 0; double stepsize = 1; for (int i = 1; i <= iterations; i++) { // Decrease the stepsize by a small amount. if (stepSizeDecrease != null) { stepsize *= 1 - stepSizeDecrease.GetValueOrDefault(); } displayIteration(i); int numCorrect = 0; for (int ei = 0; ei < numUniqueEvents; ei++) { int targetOutcome = outcomeList[ei]; for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++) { // Compute the model's prediction according to the current parameters. double[] modelDistribution = new double[numOutcomes]; if (values != null) { PerceptronModel.eval(contexts[ei], values[ei], modelDistribution, evalParams, false); } else { PerceptronModel.eval(contexts[ei], null, modelDistribution, evalParams, false); } int maxOutcome = maxIndex(modelDistribution); // If the predicted outcome is different from the target // outcome, do the standard update: boost the parameters // associated with the target and reduce those associated // with the incorrect predicted outcome. if (maxOutcome != targetOutcome) { for (int ci = 0; ci < contexts[ei].Length; ci++) { int pi = contexts[ei][ci]; if (values == null) { parameters[pi].updateParameter(targetOutcome, stepsize); parameters[pi].updateParameter(maxOutcome, -stepsize); } else { parameters[pi].updateParameter(targetOutcome, stepsize * values[ei][ci]); parameters[pi].updateParameter(maxOutcome, -stepsize * values[ei][ci]); } } } // Update the counts for accuracy. if (maxOutcome == targetOutcome) { numCorrect++; } } } // Calculate the training accuracy and display. double trainingAccuracy = (double)numCorrect / numEvents; if (i < 10 || (i % 10) == 0) { display(". (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n"); } // TODO: Make averaging configurable !!! bool doAveraging; if (useAverage && useSkippedlAveraging && (i < 20 || isPerfectSquare(i))) { doAveraging = true; } else if (useAverage) { doAveraging = true; } else { doAveraging = false; } if (doAveraging) { numTimesSummed++; for (int pi = 0; pi < numPreds; pi++) { for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].updateParameter(aoi, parameters[pi].Parameters[aoi]); } } } // If the tolerance is greater than the difference between the // current training accuracy and all of the previous three // training accuracies, stop training. if (Math.Abs(prevAccuracy1 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy2 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy3 - trainingAccuracy) < tolerance) { display("Stopping: change in training set accuracy less than " + tolerance + "\n"); break; } // Update the previous training accuracies. prevAccuracy1 = prevAccuracy2; prevAccuracy2 = prevAccuracy3; prevAccuracy3 = trainingAccuracy; } // Output the final training stats. trainingStats(evalParams); // Create averaged parameters if (useAverage) { for (int pi = 0; pi < numPreds; pi++) { for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].setParameter(aoi, summedParams[pi].Parameters[aoi] / numTimesSummed); } } return(summedParams); } else { return(parameters); } }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations"> The number of GIS iterations to perform. </param> /// <param name="di"> The data indexer used to compress events in memory. </param> /// <param name="modelPrior"> The prior distribution used to train this model. </param> /// <returns> The newly trained model, which can be used immediately or saved /// to disk using an opennlp.maxent.io.GISModelWriter object. </returns> public virtual GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int cutoff, int threads) { if (threads <= 0) { throw new System.ArgumentException("threads must be at least one or greater but is " + threads + "!"); } modelExpects = new MutableContext[threads][]; /// <summary> ///************ Incorporate all of the needed info ***************** </summary> display("Incorporating indexed data for training... \n"); contexts = di.Contexts; values = di.Values; this.cutoff = cutoff; predicateCounts = di.PredCounts; numTimesEventsSeen = di.NumTimesEventsSeen; numUniqueEvents = contexts.Length; this.prior = modelPrior; //printTable(contexts); // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { float cl = values[ci][0]; for (int vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } display("done.\n"); outcomeLabels = di.OutcomeLabels; outcomeList = di.OutcomeList; numOutcomes = outcomeLabels.Length; predLabels = di.PredLabels; prior.setLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); display("\t Number of Outcomes: " + numOutcomes + "\n"); display("\t Number of Predicates: " + numPreds + "\n"); // set up feature arrays float[][] predCount = RectangularArrays.ReturnRectangularFloatArray(numPreds, numOutcomes); for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti] * values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } //printTable(predCount); di = null; // don't need it anymore // A fake "observation" to cover features which are not detected in // the data. The default is to assume that we observed "1/10th" of a // feature during training. double smoothingObservation = _smoothingObservation; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is cancelled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. parameters = new MutableContext[numPreds]; for (int i = 0; i < modelExpects.Length; i++) { modelExpects[i] = new MutableContext[numPreds]; } observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. evalParams = new EvalParameters(parameters, 0, 1, numOutcomes); int[] activeOutcomes = new int[numOutcomes]; int[] outcomePattern; int[] allOutcomesPattern = new int[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } int numActiveOutcomes = 0; for (int pi = 0; pi < numPreds; pi++) { numActiveOutcomes = 0; if (useSimpleSmoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else //determine active outcomes { for (int oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (int aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } parameters[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (int i = 0; i < modelExpects.Length; i++) { modelExpects[i][pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); } observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (int aoi = 0; aoi < numActiveOutcomes; aoi++) { int oi = outcomePattern[aoi]; parameters[pi].setParameter(aoi, 0.0); foreach (MutableContext[] modelExpect in modelExpects) { modelExpect[pi].setParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].setParameter(aoi, predCount[pi][oi]); } else if (useSimpleSmoothing) { observedExpects[pi].setParameter(aoi, smoothingObservation); } } } predCount = null; // don't need it anymore display("...done.\n"); /// <summary> ///*************** Find the parameters *********************** </summary> if (threads == 1) { display("Computing model parameters ...\n"); } else { display("Computing model parameters in " + threads + " threads...\n"); } findParameters(iterations, correctionConstant); /// <summary> ///************* Create and return the model ***************** </summary> // To be compatible with old models the correction constant is always 1 return(new GISModel(parameters, predLabels, outcomeLabels, 1, evalParams.CorrectionParam)); }