public CrossValidationResult Evaluate() { var crossvalidation = new KFoldCrossValidation(_inputs.Count, 10); crossvalidation.Evaluation = delegate(int k, int[] indicesTrain, int[] indicesValidation) { var trainingInputs = _inputs.SubArray(indicesTrain); var trainingModel = new Dictionary <string, List <ContextInfo> >(); foreach (var trainingInput in trainingInputs) { trainingModel.ChainedAdd(trainingInput.Item1, trainingInput.Item2); } var validationInputs = _inputs.SubArray(indicesValidation); var validationOutputs = _outputs.SubArray(indicesValidation); var geneCscc = new GeneCSCC.GeneCSCC(new ContextModel <ContextInfo>(trainingModel)); var top1Matches = 0.0; var top3Matches = 0.0; var predictionsMade = 0.0; /*var sw = new Stopwatch(); * sw.Start();*/ for (var i = 0; i < validationInputs.Length; i++) { var predictions = geneCscc.GetPredictions(validationInputs[i].Item2, validationInputs[i].Item1); if (predictions.Count == 0) { continue; } predictionsMade++; if (validationOutputs[i].Validate(predictions[0]) == ValidationInfo.Result.Match) { top1Matches++; top3Matches++; } else if (predictions.Count > 1 && validationOutputs[i].Validate(predictions[1]) == ValidationInfo.Result.Match) { top3Matches++; } else if (predictions.Count > 2 && validationOutputs[i].Validate(predictions[2]) == ValidationInfo.Result.Match) { top3Matches++; } } //sw.Stop(); //Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / validationInputs.Length); return(new PredictionQualityValues(top1Matches / validationInputs.Length, predictionsMade / validationInputs.Length)); }; var predictionQualities = crossvalidation.Compute(); return(predictionQualities); }
public void TestSmallSample5Fold() { var kFoldCrossValidation = new KFoldCrossValidation <string>(smallSample, 5, 1); string[] expected2 = { "7", "9" }; Assert.AreEqual(expected2, kFoldCrossValidation.GetTestFold(0).ToArray()); }
public void TestSmallSample2Fold() { var kFoldCrossValidation = new KFoldCrossValidation <string>(smallSample, 2, 1); string[] expected3 = { "7", "9", "5", "2", "10" }; Assert.AreEqual(expected3, kFoldCrossValidation.GetTestFold(0).ToArray()); }
/** * <summary> Execute Single K-fold cross-validation with the given classifier on the given data set using the given parameters.</summary> * * <param name="experiment">Experiment to be run.</param> * <returns>A Performance instance</returns> */ public Performance.Performance Execute(Experiment experiment) { var crossValidation = new KFoldCrossValidation <Instance.Instance>(experiment.GetDataSet().GetInstances(), _k, experiment.GetParameter().GetSeed()); return(runExperiment(experiment.GetClassifier(), experiment.GetParameter(), crossValidation)); }
/** * <summary> Execute K-fold cross-validation with the given classifier on the given data set using the given parameters.</summary> * * <param name="experiment">Experiment to be run.</param> * <returns>An ExperimentPerformance instance.</returns> */ public virtual ExperimentPerformance Execute(Experiment experiment) { var result = new ExperimentPerformance(); var crossValidation = new KFoldCrossValidation <Instance.Instance>(experiment.GetDataSet().GetInstances(), K, experiment.GetParameter().GetSeed()); RunExperiment(experiment.GetClassifier(), experiment.GetParameter(), result, crossValidation); return(result); }
/** * Execute K-fold cross-validation with separate test set with the given classifier on the given data set using the given parameters. * * @param experiment Experiment to be run. * @return An ExperimentPerformance instance. */ public override ExperimentPerformance Execute(Experiment experiment) { var result = new ExperimentPerformance(); var instanceList = experiment.GetDataSet().GetInstanceList(); var partition = instanceList.Partition(0.25, new Random(experiment.GetParameter().GetSeed())); var crossValidation = new KFoldCrossValidation <Instance.Instance>( partition.Get(1).GetInstances(), K, experiment.GetParameter().GetSeed()); RunExperiment(experiment.GetClassifier(), experiment.GetParameter(), result, crossValidation, partition.Get(0)); return(result); }
public void TestLargeSample2Fold() { var kFoldCrossValidation = new KFoldCrossValidation <int>(largeSample, 2, 1); for (var i = 0; i < 2; i++) { var items = new HashSet <int>(); items.UnionWith(kFoldCrossValidation.GetTrainFold(i)); items.UnionWith(kFoldCrossValidation.GetTestFold(i)); Assert.AreEqual(500, kFoldCrossValidation.GetTestFold(i).Count); Assert.AreEqual(500, kFoldCrossValidation.GetTrainFold(i).Count); Assert.AreEqual(1000, items.Count); } }
/** * <summary>Wrapper function to learn the parameter (delta) in additive smoothing. The function first creates K NGrams * with the train folds of the corpus. Then optimizes delta with respect to the test folds of the corpus</summary> * <param name="corpus">Train corpus used to optimize delta parameter</param> * <param name="n">N in N-Gram.</param> */ protected override void LearnParameters(List <List <TSymbol> > corpus, int n) { const int k = 10; var nGrams = new NGram <TSymbol> [k]; var kFoldCrossValidation = new KFoldCrossValidation <List <TSymbol> >(corpus, k, 0); for (var i = 0; i < k; i++) { nGrams[i] = new NGram <TSymbol>(kFoldCrossValidation.GetTrainFold(i), n); } _delta = LearnBestDelta(nGrams, kFoldCrossValidation, 0.1); }
/** * <summary>The algorithm tries to optimize the best lambda for a given corpus. The algorithm uses perplexity on the validation * set as the optimization criterion.</summary> * * <param name="nGrams">10 N-Grams learned for different folds of the corpus. nGrams[i] is the N-Gram trained with i'th train fold of the corpus.</param> * <param name="kFoldCrossValidation">Cross-validation data used in training and testing the N-grams.</param> * <param name="lowerBound">Initial lower bound for optimizing the best lambda.</param> * <returns> Best lambda optimized with k-fold crossvalidation.</returns> */ private double LearnBestLambda(NGram <TSymbol>[] nGrams, KFoldCrossValidation <List <TSymbol> > kFoldCrossValidation, double lowerBound) { double bestPrevious = -1, upperBound = 0.999; var bestLambda = (lowerBound + upperBound) / 2; var numberOfParts = 5; var testFolds = new List <List <TSymbol> > [10]; for (var i = 0; i < 10; i++) { testFolds[i] = kFoldCrossValidation.GetTestFold(i); } while (true) { var bestPerplexity = double.MaxValue; for (var value = lowerBound; value <= upperBound; value += (upperBound - lowerBound) / numberOfParts) { double perplexity = 0; for (var i = 0; i < 10; i++) { nGrams[i].SetLambda(value); perplexity += nGrams[i].GetPerplexity(testFolds[i]); } if (perplexity < bestPerplexity) { bestPerplexity = perplexity; bestLambda = value; } } lowerBound = NewLowerBound(bestLambda, lowerBound, upperBound, numberOfParts); upperBound = NewUpperBound(bestLambda, lowerBound, upperBound, numberOfParts); if (bestPrevious != -1) { if (System.Math.Abs(bestPrevious - bestPerplexity) / bestPerplexity < 0.001) { break; } } bestPrevious = bestPerplexity; } return(bestLambda); }
private void button1_Click(object sender, EventArgs e) { KFoldCrossValidation validation = null; // Stopwatch a = new Stopwatch(); // a.Start(); List<Data> trainingDataset = Utility.LoadDataset(DatasetType.Training); // a.Stop(); // MessageBox.Show("Time : " + a.Elapsed.TotalSeconds); if (comboBox1.SelectedIndex == 0) validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.Bayesian, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance); else if (comboBox1.SelectedIndex == 1) validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.KNearestNeighbour, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance, int.Parse(KTextBox.Text)); validation.Validate(int.Parse(NumofTimesTextBox.Text)); ErrorMeanLabel.Text = validation.MeanError.ToString(); OverallAccuracyLabel.Text = validation.Accuracy.ToString(); ErrorVarianceLabel.Text = validation.VarianceError.ToString(); }
/** * <summary>The algorithm tries to optimize the best delta for a given corpus. The algorithm uses perplexity on the validation * set as the optimization criterion.</summary> * <param name="nGrams">10 N-Grams learned for different folds of the corpus. nGrams[i] is the N-Gram trained with i'th train</param> * fold of the corpus. * <param name="kFoldCrossValidation">Cross-validation data used in training and testing the N-grams.</param> * <param name="lowerBound">Initial lower bound for optimizing the best delta.</param> * <returns>Best delta optimized with k-fold crossvalidation.</returns> */ private double LearnBestDelta(NGram <TSymbol>[] nGrams, KFoldCrossValidation <List <TSymbol> > kFoldCrossValidation, double lowerBound) { double bestPrevious = -1, upperBound = 1; var bestDelta = (lowerBound + upperBound) / 2; const int numberOfParts = 5; while (true) { var bestPerplexity = Double.MaxValue; for (var value = lowerBound; value <= upperBound; value += (upperBound - lowerBound) / numberOfParts) { double perplexity = 0; for (var i = 0; i < 10; i++) { nGrams[i].SetProbabilityWithPseudoCount(value, nGrams[i].GetN()); perplexity += nGrams[i].GetPerplexity(kFoldCrossValidation.GetTestFold(i)); } if (perplexity < bestPerplexity) { bestPerplexity = perplexity; bestDelta = value; } } lowerBound = NewLowerBound(bestDelta, lowerBound, upperBound, numberOfParts); upperBound = NewUpperBound(bestDelta, lowerBound, upperBound, numberOfParts); if (bestPrevious != -1) { if (System.Math.Abs(bestPrevious - bestPerplexity) / bestPerplexity < 0.001) { break; } } bestPrevious = bestPerplexity; } return(bestDelta); }
private void button1_Click(object sender, EventArgs e) { KFoldCrossValidation validation = null; // Stopwatch a = new Stopwatch(); // a.Start(); List <Data> trainingDataset = Utility.LoadDataset(DatasetType.Training); // a.Stop(); // MessageBox.Show("Time : " + a.Elapsed.TotalSeconds); if (comboBox1.SelectedIndex == 0) { validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.Bayesian, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance); } else if (comboBox1.SelectedIndex == 1) { validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.KNearestNeighbour, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance, int.Parse(KTextBox.Text)); } validation.Validate(int.Parse(NumofTimesTextBox.Text)); ErrorMeanLabel.Text = validation.MeanError.ToString(); OverallAccuracyLabel.Text = validation.Accuracy.ToString(); ErrorVarianceLabel.Text = validation.VarianceError.ToString(); }
/** * <summary>Wrapper function to learn the parameters (lambda1 and lambda2) in interpolated smoothing. The function first creates K NGrams * with the train folds of the corpus. Then optimizes lambdas with respect to the test folds of the corpus depending on given N.</summary> * <param name="corpus">Train corpus used to optimize lambda parameters</param> * <param name="n">N in N-Gram.</param> */ protected override void LearnParameters(List <List <TSymbol> > corpus, int n) { if (n <= 1) { return; } var K = 10; var nGrams = new NGram <TSymbol> [K]; var kFoldCrossValidation = new KFoldCrossValidation <List <TSymbol> >(corpus, K, 0); for (var i = 0; i < K; i++) { nGrams[i] = new NGram <TSymbol>(kFoldCrossValidation.GetTrainFold(i), n); for (var j = 2; j <= n; j++) { nGrams[i].CalculateNGramProbabilities(_simpleSmoothing, j); } nGrams[i].CalculateNGramProbabilities(_simpleSmoothing, 1); } if (n == 2) { _lambda1 = LearnBestLambda(nGrams, kFoldCrossValidation, 0.1); } else { if (n == 3) { var bestLambdas = LearnBestLambdas(nGrams, kFoldCrossValidation, 0.1, 0.1); _lambda1 = bestLambdas[0]; _lambda2 = bestLambdas[1]; } } }
public CrossValidationResult Evaluate(Chromosome chromosome) { var similarityThreshold = chromosome.SimilarityThreshold; var refinementThreshold = chromosome.RefinementThreshold; var maximumRefinedCandidates = chromosome.RefinedCandidates; var crossvalidation = new KFoldCrossValidation(0, _folds); crossvalidation.Evaluation = delegate(int k, int[] indicesTrain, int[] indicesValidation) { var trainingModel = _trainingFolds[k]; var validationInputs = _validationFolds[k].Item1; var validationOutputs = _validationFolds[k].Item2; var predictionMatches = 0.0; var predictionsReturned = 0; for (var i = 0; i < validationInputs.Count; i++) { var baseCandidates = trainingModel.TryGet(validationInputs[i].Type); // Hamming distance closeness var similarities = new List <PredictionInfo>(); foreach (var baseCandidate in baseCandidates) { var extendedSimilarity = baseCandidate.ExtendedSimilarity(validationInputs[i].ContextInfo); var localSimilarity = baseCandidate.ExtendedSimilarity(validationInputs[i].ContextInfo); var similarity = localSimilarity / extendedSimilarity > similarityThreshold ? localSimilarity : extendedSimilarity; similarities.Add(new PredictionInfo(baseCandidate, similarity)); } // Sort by Hamming distance similarities.Sort(); // Take k = 200 most similar candidate contexts var refinedCandidates = similarities.Take(maximumRefinedCandidates).ToList(); for (var j = 0; j < refinedCandidates.Count; j++) { refinedCandidates[j].ExtendedSimilarity = refinedCandidates[j].Context.NormalizedLCS(validationInputs[i].ContextInfo); refinedCandidates[j].LocalSimilarity = refinedCandidates[j].Context.LevenshteinSimilarity(validationInputs[i].ContextInfo); if (refinedCandidates[j].ExtendedSimilarity < refinementThreshold && refinedCandidates[j].LocalSimilarity < refinementThreshold) { refinedCandidates.RemoveAt(j); } } refinedCandidates.Sort(); if (refinedCandidates.Count == 0) { continue; } if (validationOutputs[i].Validate(refinedCandidates[0].Context.Invocation) == ValidationInfo.Result.Match) { predictionMatches++; } } // TODO: Not all validationInputs may be processed return(new PredictionQualityValues(predictionMatches / validationInputs.Count, 1.0)); }; var predictionQualities = crossvalidation.Compute(); return(predictionQualities); }
/** * <summary>The algorithm tries to optimize the best lambdas (lambda1, lambda2) for a given corpus. The algorithm uses perplexity on the validation * set as the optimization criterion.</summary> * * <param name="nGrams">10 N-Grams learned for different folds of the corpus. nGrams[i] is the N-Gram trained with i'th train fold of the corpus.</param> * <param name="kFoldCrossValidation">Cross-validation data used in training and testing the N-grams.</param> * <param name="lowerBound1">Initial lower bound for optimizing the best lambda1.</param> * <param name="lowerBound2">Initial lower bound for optimizing the best lambda2.</param> */ private double[] LearnBestLambdas(NGram <TSymbol>[] nGrams, KFoldCrossValidation <List <TSymbol> > kFoldCrossValidation, double lowerBound1, double lowerBound2) { double upperBound1 = 0.999, upperBound2 = 0.999, bestPrevious = -1; double bestLambda1 = (lowerBound1 + upperBound1) / 2, bestLambda2 = (lowerBound2 + upperBound2) / 2; var testFolds = new List <List <TSymbol> > [10]; const int numberOfParts = 5; for (var i = 0; i < 10; i++) { testFolds[i] = kFoldCrossValidation.GetTestFold(i); } while (true) { var bestPerplexity = double.MaxValue; for (var value1 = lowerBound1; value1 <= upperBound1; value1 += (upperBound1 - lowerBound1) / numberOfParts) { for (var value2 = lowerBound2; value2 <= upperBound2 && value1 + value2 < 1; value2 += (upperBound2 - lowerBound2) / numberOfParts) { double perplexity = 0; for (var i = 0; i < 10; i++) { nGrams[i].SetLambda(value1, value2); perplexity += nGrams[i].GetPerplexity(testFolds[i]); } if (perplexity < bestPerplexity) { bestPerplexity = perplexity; bestLambda1 = value1; bestLambda2 = value2; } } } lowerBound1 = NewLowerBound(bestLambda1, lowerBound1, upperBound1, numberOfParts); upperBound1 = NewUpperBound(bestLambda1, lowerBound1, upperBound1, numberOfParts); lowerBound2 = NewLowerBound(bestLambda2, lowerBound2, upperBound2, numberOfParts); upperBound2 = NewUpperBound(bestLambda2, lowerBound2, upperBound2, numberOfParts); if (bestPrevious != -1) { if (System.Math.Abs(bestPrevious - bestPerplexity) / bestPerplexity < 0.001) { break; } } bestPrevious = bestPerplexity; } return(new[] { bestLambda1, bestLambda2 }); }
/** * <summary>The shuffleSentences method randomly shuffles sentences {@link ArrayList} with given seed value.</summary> * * <param name="seed">value to randomize shuffling.</param> */ public void ShuffleSentences(int seed) { KFoldCrossValidation <Sentence> .Shuffle(sentences, new Random(seed)); }
/** * <summary> Shuffles the instance list.</summary> * <param name="seed">Seed is used for random number generation.</param> */ public void Shuffle(int seed) { KFoldCrossValidation <Instance.Instance> .Shuffle(_list, new Random(seed)); }