private void TrainingStats(EvalParameters evalParams) { var numCorrect = 0; for (var ei = 0; ei < numUniqueEvents; ei++) { for (var ni = 0; ni < numTimesEventsSeen[ei]; ni++) { var modelDistribution = new double[numOutcomes]; PerceptronModel.Eval( contexts[ei], values?[ei], modelDistribution, evalParams, false); var max = MaxIndex(modelDistribution); if (max == outcomeList[ei]) { numCorrect++; } } } var trainingAccuracy = (double)numCorrect / numEvents; info.Append(" Correct Events: {0}\n" + " Total Events: {1}\n" + " Accuracy: {2}\n", numCorrect, numEvents, trainingAccuracy); Display("\nPerceptron training complete:\n"); Display("\t Correct Events : " + numCorrect); Display("\t Total Events : " + numEvents); Display("\t Accuracy : " + trainingAccuracy); }
private double trainingStats(EvalParameters evalParams) { int numCorrect = 0; for (int ei = 0; ei < numUniqueEvents; ei++) { for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++) { double[] modelDistribution = new double[numOutcomes]; if (values != null) { PerceptronModel.eval(contexts[ei], values[ei], modelDistribution, evalParams, false); } else { PerceptronModel.eval(contexts[ei], null, modelDistribution, evalParams, false); } int max = maxIndex(modelDistribution); if (max == outcomeList[ei]) { numCorrect++; } } } double trainingAccuracy = (double)numCorrect / numEvents; display("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n"); return(trainingAccuracy); }
/// <summary> /// Use this model to evaluate a context and return an array of the likelihood of each outcome given that context. /// </summary> /// <param name="context">The names of the predicates which have been observed at the present decision point.</param> /// <param name="values">This is where the distribution is stored.</param> /// <param name="prior">The prior distribution for the specified context.</param> /// <param name="evalParams">The set of parameters used in this computation.</param> /// <param name="normalize">if set to <c>true</c> the probabilities will be normalized.</param> /// <returns>The normalized probabilities for the outcomes given the context. /// The indexes of the double[] are the outcome ids, and the actual string representation of /// the outcomes can be obtained from the method getOutcome(int i).</returns> public static double[] Eval(int[] context, float[] values, double[] prior, EvalParameters evalParams, bool normalize) { double value = 1; for (var ci = 0; ci < context.Length; ci++) { if (context[ci] >= 0) { var predParams = evalParams.Parameters[context[ci]]; var activeOutcomes = predParams.Outcomes; var activeParameters = predParams.Parameters; if (values != null) { value = values[ci]; } for (var ai = 0; ai < activeOutcomes.Length; ai++) { var oid = activeOutcomes[ai]; prior[oid] += activeParameters[ai] * value; } } } if (!normalize) { return(prior); } var numOutcomes = evalParams.NumOutcomes; double maxPrior = 1; for (var oid = 0; oid < numOutcomes; oid++) { if (maxPrior < Math.Abs(prior[oid])) { maxPrior = Math.Abs(prior[oid]); } } var normal = 0.0; for (var oid = 0; oid < numOutcomes; oid++) { prior[oid] = Math.Exp(prior[oid] / maxPrior); normal += prior[oid]; } for (var oid = 0; oid < numOutcomes; oid++) { prior[oid] /= normal; } return(prior); }
public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model, bool normalize) { Context[] parameters = model.Params; double[] activeParameters; int[] activeOutcomes; double value = 1; for (int ci = 0; ci < context.Length; ci++) { if (context[ci] >= 0) { Context predParams = parameters[context[ci]]; activeOutcomes = predParams.Outcomes; activeParameters = predParams.Parameters; if (values != null) { value = values[ci]; } for (int ai = 0; ai < activeOutcomes.Length; ai++) { int oid = activeOutcomes[ai]; prior[oid] += activeParameters[ai] * value; } } } if (normalize) { int numOutcomes = model.NumOutcomes; double maxPrior = 1; for (int oid = 0; oid < numOutcomes; oid++) { if (maxPrior < Math.Abs(prior[oid])) { maxPrior = Math.Abs(prior[oid]); } } double normal = 0.0; for (int oid = 0; oid < numOutcomes; oid++) { prior[oid] = Math.Exp(prior[oid] / maxPrior); normal += prior[oid]; } for (int oid = 0; oid < numOutcomes; oid++) { prior[oid] /= normal; } } return(prior); }
public static double[] Eval(int[] context, float[] values, double[] prior, EvalParameters parameters) { var probabilities = new LogProbabilities <int>(); var bayesEvalParameters = parameters as NaiveBayesEvalParameters; var outcomeTotals = bayesEvalParameters != null ? bayesEvalParameters.OutcomeTotals : new double[prior.Length]; var vocabulary = bayesEvalParameters != null ? bayesEvalParameters.Vocabulary : 0; double value = 1; for (var ci = 0; ci < context.Length; ci++) { if (context[ci] < 0) { continue; } var predParams = parameters.Parameters[context[ci]]; var activeOutcomes = predParams.Outcomes; var activeParameters = predParams.Parameters; if (values != null) { value = values[ci]; } var ai = 0; for (var i = 0; i < outcomeTotals.Length && ai < activeOutcomes.Length; ++i) { var oid = activeOutcomes[ai]; var numerator = oid == i ? activeParameters[ai++] * value : 0; var denominator = outcomeTotals[i]; probabilities.AddIn(i, GetProbability(numerator, denominator, vocabulary, true), 1); } } var total = outcomeTotals.Sum(); for (var i = 0; i < outcomeTotals.Length; ++i) { var numerator = outcomeTotals[i]; var denominator = total; probabilities.AddIn(i, numerator / denominator, 1); } for (var i = 0; i < outcomeTotals.Length; ++i) { prior[i] = probabilities.Get(i); } return(prior); }
/// <summary> /// Use this model to evaluate a context and return an array of the likelihood /// of each outcome given the specified context and the specified parameters. /// </summary> /// <param name="context"> /// The integer values of the predicates which have been observed at /// the present decision point. </param> /// <param name="values"> /// The values for each of the parameters. </param> /// <param name="prior"> /// The prior distribution for the specified context. </param> /// <param name="model"> /// The set of parametes used in this computation. </param> /// <returns> The normalized probabilities for the outcomes given the context. /// The indexes of the double[] are the outcome ids, and the actual /// string representation of the outcomes can be obtained from the /// method getOutcome(int i). </returns> public static double[] eval(int[] context, float[] values, double[] prior, EvalParameters model) { Context[] parameters = model.Params; int[] numfeats = new int[model.NumOutcomes]; int[] activeOutcomes; double[] activeParameters; double value = 1; for (int ci = 0; ci < context.Length; ci++) { if (context[ci] >= 0) { Context predParams = parameters[context[ci]]; activeOutcomes = predParams.Outcomes; activeParameters = predParams.Parameters; if (values != null) { value = values[ci]; } for (int ai = 0; ai < activeOutcomes.Length; ai++) { int oid = activeOutcomes[ai]; numfeats[oid]++; prior[oid] += activeParameters[ai] * value; } } } double normal = 0.0; for (int oid = 0; oid < model.NumOutcomes; oid++) { if (model.CorrectionParam != 0) { prior[oid] = Math.Exp(prior[oid] * model.ConstantInverse + ((1.0 - ((double)numfeats[oid] / model.CorrectionConstant)) * model.CorrectionParam)); } else { prior[oid] = Math.Exp(prior[oid] * model.ConstantInverse); } normal += prior[oid]; } for (int oid = 0; oid < model.NumOutcomes; oid++) { prior[oid] /= normal; } return(prior); }
private MutableContext[] FindParameters() { var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } /* Stores the estimated parameter value of each predicate during iteration. */ var parameters = new MutableContext[numPreds]; for (var pi = 0; pi < numPreds; pi++) { parameters[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (var aoi = 0; aoi < numOutcomes; aoi++) { parameters[pi].SetParameter(aoi, 0.0); } } // ReSharper disable once CoVariantArrayConversion var evalParams = new EvalParameters(parameters, numOutcomes); const double stepSize = 1; for (var ei = 0; ei < numUniqueEvents; ei++) { var targetOutcome = outcomeList[ei]; for (var ni = 0; ni < numTimesEventsSeen[ei]; ni++) { for (var ci = 0; ci < contexts[ei].Length; ci++) { var pi = contexts[ei][ci]; if (values == null) { parameters[pi].UpdateParameter(targetOutcome, stepSize); } else { parameters[pi].UpdateParameter(targetOutcome, stepSize * values[ei][ci]); } } } } // Output the final training stats. TrainingStats(evalParams); return(parameters); }
/// <summary> /// Use this model to evaluate a context and return an array of the likelihood of each outcome given the specified context and the specified parameters. /// </summary> /// <param name="context">The integer values of the predicates which have been observed at the present decision point.</param> /// <param name="values">The values for each of the parameters.</param> /// <param name="prior">The prior distribution for the specified context.</param> /// <param name="evalParams">The set of parameters used in this computation.</param> /// <returns> /// The normalized probabilities for the outcomes given the context. /// The indexes of the double[] are the outcome ids, and the actual /// string representation of the outcomes can be obtained from the /// method getOutcome(int i). /// </returns> public static double[] Eval(int[] context, float[] values, double[] prior, EvalParameters evalParams) { var numfeats = new int[evalParams.NumOutcomes]; double value = 1; for (int ci = 0; ci < context.Length; ci++) { if (context[ci] >= 0) { var activeParameters = evalParams.Parameters[context[ci]].Parameters; if (values != null) { value = values[ci]; } for (int ai = 0; ai < evalParams.Parameters[context[ci]].Outcomes.Length; ai++) { int oid = evalParams.Parameters[context[ci]].Outcomes[ai]; numfeats[oid]++; prior[oid] += activeParameters[ai] * value; } } } double normal = 0.0; for (int oid = 0; oid < evalParams.NumOutcomes; oid++) { if (!evalParams.CorrectionParam.Equals(0d)) { //prior[oid] = Math.Exp(prior[oid] * model.ConstantInverse + ((1.0 - (numfeats[oid]/model.CorrectionConstant)) * model.CorrectionParam)); prior[oid] = Math.Exp(prior[oid] * evalParams.ConstantInverse + ((1.0 - (numfeats[oid] / evalParams.CorrectionConstant)) * evalParams.CorrectionParam)); } else { prior[oid] = Math.Exp(prior[oid] * evalParams.ConstantInverse); } normal += prior[oid]; } for (int oid = 0; oid < evalParams.NumOutcomes; oid++) { prior[oid] /= normal; } return(prior); }
private void TrainingStats(EvalParameters evalParams) { var numCorrect = 0; for (var ei = 0; ei < numUniqueEvents; ei++) { for (var ni = 0; ni < numTimesEventsSeen[ei]; ni++) { var modelDistribution = new double[numOutcomes]; NaiveBayesModel.Eval(contexts[ei], values?[ei], modelDistribution, evalParams); var max = MaxIndex(modelDistribution); if (max == outcomeList[ei]) { numCorrect++; } } } var trainingAccuracy = (double)numCorrect / numEvents; Display("Stats: (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy); }
/// <summary> /// Use this model to evaluate a context and return an array of the likelihood /// of each outcome given the specified context and the specified parameters. /// </summary> /// <param name="context"> /// The integer values of the predicates which have been observed at /// the present decision point. </param> /// <param name="prior"> /// The prior distribution for the specified context. </param> /// <param name="model"> /// The set of parametes used in this computation. </param> /// <returns> The normalized probabilities for the outcomes given the context. /// The indexes of the double[] are the outcome ids, and the actual /// string representation of the outcomes can be obtained from the /// method getOutcome(int i). </returns> public static double[] eval(int[] context, double[] prior, EvalParameters model) { return(eval(context, null, prior, model)); }
/// <summary> /// Use this model to evaluate a context and return an array of the likelihood of each outcome given the specified context and the specified parameters. /// </summary> /// <param name="context">The integer values of the predicates which have been observed at the present decision point.</param> /// <param name="prior">The prior distribution for the specified context.</param> /// <param name="evalParams">The set of parameters used in this computation.</param> /// <returns> /// The normalized probabilities for the outcomes given the context. /// The indexes of the double[] are the outcome ids, and the actual /// string representation of the outcomes can be obtained from the /// method getOutcome(int i). /// </returns> public static double[] Eval(int[] context, double[] prior, EvalParameters evalParams) { return(Eval(context, null, prior, evalParams)); }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelPrior">The prior distribution used to train this model.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <param name="threads">The number of threads used to train this model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads) { if (threads <= 0) throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater."); modelExpects = new MutableContext[threads][]; info.Append("Trained using GIS algorithm.\n\n"); // Executes the data indexer di.Execute(); // Incorporate all of the needed info. Display("Incorporating indexed data for training..."); contexts = di.GetContexts(); values = di.Values; cutoff = modelCutoff; predicateCounts = di.GetPredCounts(); numTimesEventsSeen = di.GetNumTimesEventsSeen(); numUniqueEvents = contexts.Length; prior = modelPrior; // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { var cl = values[ci][0]; for (var vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } Display("done."); outcomeLabels = di.GetOutcomeLabels(); outcomeList = di.GetOutcomeList(); numOutcomes = outcomeLabels.Length; predLabels = di.GetPredLabels(); prior.SetLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; info.Append("Number of Event Tokens: {0}\n", numUniqueEvents); info.Append(" Number of Outcomes: {0}\n", numOutcomes); info.Append(" Number of Predicates: {0}\n", numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); // set up feature arrays //var predCount = new float[numPreds][numOutcomes]; var predCount = new float[numPreds][]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (predCount[contexts[ti][j]] == null) { predCount[contexts[ti][j]] = new float[numOutcomes]; } if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]*values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } // ReSharper disable once RedundantAssignment di = null; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is canceled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. param = new MutableContext[numPreds]; for (var i = 0; i < modelExpects.Length; i++) modelExpects[i] = new MutableContext[numPreds]; observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. // ReSharper disable once CoVariantArrayConversion evalParams = new EvalParameters(param, 0, 1, numOutcomes); var activeOutcomes = new int[numOutcomes]; var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } for (var pi = 0; pi < numPreds; pi++) { var numActiveOutcomes = 0; int[] outcomePattern; if (Smoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else { //determine active outcomes for (var oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); foreach (MutableContext[] me in modelExpects) me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { var oi = outcomePattern[aoi]; param[pi].SetParameter(aoi, 0.0); foreach (var modelExpect in modelExpects) { modelExpect[pi].SetParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].SetParameter(aoi, predCount[pi][oi]); } else if (Smoothing) { observedExpects[pi].SetParameter(aoi, SmoothingObservation); } } } Display("...done."); /***************** Find the parameters ************************/ if (threads == 1) Display("Computing model parameters ..."); else Display("Computing model parameters in " + threads + " threads..."); FindParameters(iterations, correctionConstant); /*************** Create and return the model ******************/ // To be compatible with old models the correction constant is always 1 // ReSharper disable once CoVariantArrayConversion return new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam) { info = TrainingInfo }; }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations">The number of GIS iterations to perform.</param> /// <param name="di">The data indexer used to compress events in memory.</param> /// <param name="modelPrior">The prior distribution used to train this model.</param> /// <param name="modelCutoff">The number of times a feature must occur to be included.</param> /// <param name="threads">The number of threads used to train this model.</param> /// <returns>The newly trained model, which can be used immediately or saved to disk using an <see cref="GISModelWriter"/> object.</returns> public GISModel TrainModel(int iterations, IDataIndexer di, IPrior modelPrior, int modelCutoff, int threads) { if (threads <= 0) { throw new ArgumentOutOfRangeException("threads", threads, @"Threads must be at least one or greater."); } modelExpects = new MutableContext[threads][]; info.Append("Trained using GIS algorithm.\n\n"); // Executes the data indexer di.Execute(); // Incorporate all of the needed info. Display("Incorporating indexed data for training..."); contexts = di.GetContexts(); values = di.Values; cutoff = modelCutoff; predicateCounts = di.GetPredCounts(); numTimesEventsSeen = di.GetNumTimesEventsSeen(); numUniqueEvents = contexts.Length; prior = modelPrior; // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { var cl = values[ci][0]; for (var vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } Display("done."); outcomeLabels = di.GetOutcomeLabels(); outcomeList = di.GetOutcomeList(); numOutcomes = outcomeLabels.Length; predLabels = di.GetPredLabels(); prior.SetLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; info.Append("Number of Event Tokens: {0}\n", numUniqueEvents); info.Append(" Number of Outcomes: {0}\n", numOutcomes); info.Append(" Number of Predicates: {0}\n", numPreds); Display("\tNumber of Event Tokens: " + numUniqueEvents); Display("\t Number of Outcomes: " + numOutcomes); Display("\t Number of Predicates: " + numPreds); // set up feature arrays //var predCount = new float[numPreds][numOutcomes]; var predCount = new float[numPreds][]; for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (predCount[contexts[ti][j]] == null) { predCount[contexts[ti][j]] = new float[numOutcomes]; } if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti] * values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } // ReSharper disable once RedundantAssignment di = null; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is canceled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. param = new MutableContext[numPreds]; for (var i = 0; i < modelExpects.Length; i++) { modelExpects[i] = new MutableContext[numPreds]; } observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. // ReSharper disable once CoVariantArrayConversion evalParams = new EvalParameters(param, 0, 1, numOutcomes); var activeOutcomes = new int[numOutcomes]; var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } for (var pi = 0; pi < numPreds; pi++) { var numActiveOutcomes = 0; int[] outcomePattern; if (Smoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else { //determine active outcomes for (var oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } param[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); foreach (MutableContext[] me in modelExpects) { me[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); } observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (var aoi = 0; aoi < numActiveOutcomes; aoi++) { var oi = outcomePattern[aoi]; param[pi].SetParameter(aoi, 0.0); foreach (var modelExpect in modelExpects) { modelExpect[pi].SetParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].SetParameter(aoi, predCount[pi][oi]); } else if (Smoothing) { observedExpects[pi].SetParameter(aoi, SmoothingObservation); } } } Display("...done."); /***************** Find the parameters ************************/ if (threads == 1) { Display("Computing model parameters ..."); } else { Display("Computing model parameters in " + threads + " threads..."); } FindParameters(iterations, correctionConstant); /*************** Create and return the model ******************/ // To be compatible with old models the correction constant is always 1 // ReSharper disable once CoVariantArrayConversion return(new GISModel(param, predLabels, outcomeLabels, 1, evalParams.CorrectionParam) { info = TrainingInfo }); }
private MutableContext[] FindParameters(int iterations, bool useAverage) { info.Append(" Number of Iterations: {0}\n", iterations); Display("\nPerforming " + iterations + " iterations.\n"); var allOutcomesPattern = new int[numOutcomes]; for (var oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } /* Stores the estimated parameter value of each predicate during iteration. */ var param = new MutableContext[numPreds]; for (var pi = 0; pi < numPreds; pi++) { param[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (var aoi = 0; aoi < numOutcomes; aoi++) { param[pi].SetParameter(aoi, 0.0); } } // ReSharper disable once CoVariantArrayConversion var evalParams = new EvalParameters(param, numOutcomes); // Stores the sum of parameter values of each predicate over many iterations. var summedParams = new MutableContext[numPreds]; if (useAverage) { for (var pi = 0; pi < numPreds; pi++) { summedParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (var aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].SetParameter(aoi, 0.0); } } } // Keep track of the previous three accuracies. The difference of // the mean of these and the current training set accuracy is used // with tolerance to decide whether to stop. var prevAccuracy1 = 0.0; var prevAccuracy2 = 0.0; var prevAccuracy3 = 0.0; // A counter for the denominator for averaging. var numTimesSummed = 0; double stepSize = 1; for (var i = 1; i <= iterations; i++) { // Decrease the step size by a small amount. if (stepSizeDecrease > 0) { stepSize *= 1 - stepSizeDecrease; } if (Monitor != null && Monitor.Token.CanBeCanceled) { Monitor.Token.ThrowIfCancellationRequested(); } var numCorrect = 0; for (var ei = 0; ei < numUniqueEvents; ei++) { var targetOutcome = outcomeList[ei]; for (var ni = 0; ni < numTimesEventsSeen[ei]; ni++) { // Compute the model's prediction according to the current parameters. var modelDistribution = new double[numOutcomes]; PerceptronModel.Eval( contexts[ei], values != null ? values[ei] : null, modelDistribution, evalParams, false); var maxOutcome = MaxIndex(modelDistribution); // If the predicted outcome is different from the target // outcome, do the standard update: boost the parameters // associated with the target and reduce those associated // with the incorrect predicted outcome. if (maxOutcome != targetOutcome) { for (var ci = 0; ci < contexts[ei].Length; ci++) { var pi = contexts[ei][ci]; if (values == null) { param[pi].UpdateParameter(targetOutcome, stepSize); param[pi].UpdateParameter(maxOutcome, -stepSize); } else { param[pi].UpdateParameter(targetOutcome, stepSize * values[ei][ci]); param[pi].UpdateParameter(maxOutcome, -stepSize * values[ei][ci]); } } } // Update the counts for accuracy. if (maxOutcome == targetOutcome) { numCorrect++; } } } // Calculate the training accuracy and display. var trainingAccuracy = (double)numCorrect / numEvents; Display($"{i,-4} {numCorrect} of {numEvents} - {trainingAccuracy}"); // TODO: Make averaging configurable !!! bool doAveraging; if (useAverage && UseSkippedAveraging && (i < 20 || IsPerfectSquare(i))) { doAveraging = true; } else if (useAverage) { doAveraging = true; } else { doAveraging = false; } if (doAveraging) { numTimesSummed++; for (var pi = 0; pi < numPreds; pi++) { for (var aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].UpdateParameter(aoi, param[pi].Parameters[aoi]); } } } // If the tolerance is greater than the difference between the // current training accuracy and all of the previous three // training accuracies, stop training. if (Math.Abs(prevAccuracy1 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy2 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy3 - trainingAccuracy) < tolerance) { Display("Stopping: change in training set accuracy less than " + tolerance + "\n"); break; } // Update the previous training accuracies. prevAccuracy1 = prevAccuracy2; prevAccuracy2 = prevAccuracy3; prevAccuracy3 = trainingAccuracy; } // Output the final training stats. TrainingStats(evalParams); if (!useAverage) { return(param); } if (numTimesSummed == 0) // Improbable but possible according to the Coverity. { numTimesSummed = 1; } // Create averaged parameters for (var pi = 0; pi < numPreds; pi++) { for (var aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].SetParameter(aoi, summedParams[pi].Parameters[aoi] / numTimesSummed); } } return(summedParams); }
private MutableContext[] findParameters(int iterations, bool useAverage) { display("Performing " + iterations + " iterations.\n"); int[] allOutcomesPattern = new int[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } /// <summary> /// Stores the estimated parameter value of each predicate during iteration. </summary> MutableContext[] parameters = new MutableContext[numPreds]; for (int pi = 0; pi < numPreds; pi++) { parameters[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (int aoi = 0; aoi < numOutcomes; aoi++) { parameters[pi].setParameter(aoi, 0.0); } } EvalParameters evalParams = new EvalParameters(parameters, numOutcomes); /// <summary> /// Stores the sum of parameter values of each predicate over many iterations. </summary> MutableContext[] summedParams = new MutableContext[numPreds]; if (useAverage) { for (int pi = 0; pi < numPreds; pi++) { summedParams[pi] = new MutableContext(allOutcomesPattern, new double[numOutcomes]); for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].setParameter(aoi, 0.0); } } } // Keep track of the previous three accuracies. The difference of // the mean of these and the current training set accuracy is used // with tolerance to decide whether to stop. double prevAccuracy1 = 0.0; double prevAccuracy2 = 0.0; double prevAccuracy3 = 0.0; // A counter for the denominator for averaging. int numTimesSummed = 0; double stepsize = 1; for (int i = 1; i <= iterations; i++) { // Decrease the stepsize by a small amount. if (stepSizeDecrease != null) { stepsize *= 1 - stepSizeDecrease.GetValueOrDefault(); } displayIteration(i); int numCorrect = 0; for (int ei = 0; ei < numUniqueEvents; ei++) { int targetOutcome = outcomeList[ei]; for (int ni = 0; ni < this.numTimesEventsSeen[ei]; ni++) { // Compute the model's prediction according to the current parameters. double[] modelDistribution = new double[numOutcomes]; if (values != null) { PerceptronModel.eval(contexts[ei], values[ei], modelDistribution, evalParams, false); } else { PerceptronModel.eval(contexts[ei], null, modelDistribution, evalParams, false); } int maxOutcome = maxIndex(modelDistribution); // If the predicted outcome is different from the target // outcome, do the standard update: boost the parameters // associated with the target and reduce those associated // with the incorrect predicted outcome. if (maxOutcome != targetOutcome) { for (int ci = 0; ci < contexts[ei].Length; ci++) { int pi = contexts[ei][ci]; if (values == null) { parameters[pi].updateParameter(targetOutcome, stepsize); parameters[pi].updateParameter(maxOutcome, -stepsize); } else { parameters[pi].updateParameter(targetOutcome, stepsize * values[ei][ci]); parameters[pi].updateParameter(maxOutcome, -stepsize * values[ei][ci]); } } } // Update the counts for accuracy. if (maxOutcome == targetOutcome) { numCorrect++; } } } // Calculate the training accuracy and display. double trainingAccuracy = (double)numCorrect / numEvents; if (i < 10 || (i % 10) == 0) { display(". (" + numCorrect + "/" + numEvents + ") " + trainingAccuracy + "\n"); } // TODO: Make averaging configurable !!! bool doAveraging; if (useAverage && useSkippedlAveraging && (i < 20 || isPerfectSquare(i))) { doAveraging = true; } else if (useAverage) { doAveraging = true; } else { doAveraging = false; } if (doAveraging) { numTimesSummed++; for (int pi = 0; pi < numPreds; pi++) { for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].updateParameter(aoi, parameters[pi].Parameters[aoi]); } } } // If the tolerance is greater than the difference between the // current training accuracy and all of the previous three // training accuracies, stop training. if (Math.Abs(prevAccuracy1 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy2 - trainingAccuracy) < tolerance && Math.Abs(prevAccuracy3 - trainingAccuracy) < tolerance) { display("Stopping: change in training set accuracy less than " + tolerance + "\n"); break; } // Update the previous training accuracies. prevAccuracy1 = prevAccuracy2; prevAccuracy2 = prevAccuracy3; prevAccuracy3 = trainingAccuracy; } // Output the final training stats. trainingStats(evalParams); // Create averaged parameters if (useAverage) { for (int pi = 0; pi < numPreds; pi++) { for (int aoi = 0; aoi < numOutcomes; aoi++) { summedParams[pi].setParameter(aoi, summedParams[pi].Parameters[aoi] / numTimesSummed); } } return(summedParams); } else { return(parameters); } }
/// <summary> /// Train a model using the GIS algorithm. /// </summary> /// <param name="iterations"> The number of GIS iterations to perform. </param> /// <param name="di"> The data indexer used to compress events in memory. </param> /// <param name="modelPrior"> The prior distribution used to train this model. </param> /// <returns> The newly trained model, which can be used immediately or saved /// to disk using an opennlp.maxent.io.GISModelWriter object. </returns> public virtual GISModel trainModel(int iterations, DataIndexer di, Prior modelPrior, int cutoff, int threads) { if (threads <= 0) { throw new System.ArgumentException("threads must be at least one or greater but is " + threads + "!"); } modelExpects = new MutableContext[threads][]; /// <summary> ///************ Incorporate all of the needed info ***************** </summary> display("Incorporating indexed data for training... \n"); contexts = di.Contexts; values = di.Values; this.cutoff = cutoff; predicateCounts = di.PredCounts; numTimesEventsSeen = di.NumTimesEventsSeen; numUniqueEvents = contexts.Length; this.prior = modelPrior; //printTable(contexts); // determine the correction constant and its inverse double correctionConstant = 0; for (int ci = 0; ci < contexts.Length; ci++) { if (values == null || values[ci] == null) { if (contexts[ci].Length > correctionConstant) { correctionConstant = contexts[ci].Length; } } else { float cl = values[ci][0]; for (int vi = 1; vi < values[ci].Length; vi++) { cl += values[ci][vi]; } if (cl > correctionConstant) { correctionConstant = cl; } } } display("done.\n"); outcomeLabels = di.OutcomeLabels; outcomeList = di.OutcomeList; numOutcomes = outcomeLabels.Length; predLabels = di.PredLabels; prior.setLabels(outcomeLabels, predLabels); numPreds = predLabels.Length; display("\tNumber of Event Tokens: " + numUniqueEvents + "\n"); display("\t Number of Outcomes: " + numOutcomes + "\n"); display("\t Number of Predicates: " + numPreds + "\n"); // set up feature arrays float[][] predCount = RectangularArrays.ReturnRectangularFloatArray(numPreds, numOutcomes); for (int ti = 0; ti < numUniqueEvents; ti++) { for (int j = 0; j < contexts[ti].Length; j++) { if (values != null && values[ti] != null) { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti] * values[ti][j]; } else { predCount[contexts[ti][j]][outcomeList[ti]] += numTimesEventsSeen[ti]; } } } //printTable(predCount); di = null; // don't need it anymore // A fake "observation" to cover features which are not detected in // the data. The default is to assume that we observed "1/10th" of a // feature during training. double smoothingObservation = _smoothingObservation; // Get the observed expectations of the features. Strictly speaking, // we should divide the counts by the number of Tokens, but because of // the way the model's expectations are approximated in the // implementation, this is cancelled out when we compute the next // iteration of a parameter, making the extra divisions wasteful. parameters = new MutableContext[numPreds]; for (int i = 0; i < modelExpects.Length; i++) { modelExpects[i] = new MutableContext[numPreds]; } observedExpects = new MutableContext[numPreds]; // The model does need the correction constant and the correction feature. The correction constant // is only needed during training, and the correction feature is not necessary. // For compatibility reasons the model contains form now on a correction constant of 1, // and a correction param 0. evalParams = new EvalParameters(parameters, 0, 1, numOutcomes); int[] activeOutcomes = new int[numOutcomes]; int[] outcomePattern; int[] allOutcomesPattern = new int[numOutcomes]; for (int oi = 0; oi < numOutcomes; oi++) { allOutcomesPattern[oi] = oi; } int numActiveOutcomes = 0; for (int pi = 0; pi < numPreds; pi++) { numActiveOutcomes = 0; if (useSimpleSmoothing) { numActiveOutcomes = numOutcomes; outcomePattern = allOutcomesPattern; } else //determine active outcomes { for (int oi = 0; oi < numOutcomes; oi++) { if (predCount[pi][oi] > 0 && predicateCounts[pi] >= cutoff) { activeOutcomes[numActiveOutcomes] = oi; numActiveOutcomes++; } } if (numActiveOutcomes == numOutcomes) { outcomePattern = allOutcomesPattern; } else { outcomePattern = new int[numActiveOutcomes]; for (int aoi = 0; aoi < numActiveOutcomes; aoi++) { outcomePattern[aoi] = activeOutcomes[aoi]; } } } parameters[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (int i = 0; i < modelExpects.Length; i++) { modelExpects[i][pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); } observedExpects[pi] = new MutableContext(outcomePattern, new double[numActiveOutcomes]); for (int aoi = 0; aoi < numActiveOutcomes; aoi++) { int oi = outcomePattern[aoi]; parameters[pi].setParameter(aoi, 0.0); foreach (MutableContext[] modelExpect in modelExpects) { modelExpect[pi].setParameter(aoi, 0.0); } if (predCount[pi][oi] > 0) { observedExpects[pi].setParameter(aoi, predCount[pi][oi]); } else if (useSimpleSmoothing) { observedExpects[pi].setParameter(aoi, smoothingObservation); } } } predCount = null; // don't need it anymore display("...done.\n"); /// <summary> ///*************** Find the parameters *********************** </summary> if (threads == 1) { display("Computing model parameters ...\n"); } else { display("Computing model parameters in " + threads + " threads...\n"); } findParameters(iterations, correctionConstant); /// <summary> ///************* Create and return the model ***************** </summary> // To be compatible with old models the correction constant is always 1 return(new GISModel(parameters, predLabels, outcomeLabels, 1, evalParams.CorrectionParam)); }