Exemple #1
0
        public CrossValidationResult Evaluate()
        {
            var crossvalidation = new KFoldCrossValidation(_inputs.Count, 10);

            crossvalidation.Evaluation = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                var trainingInputs = _inputs.SubArray(indicesTrain);
                var trainingModel  = new Dictionary <string, List <ContextInfo> >();
                foreach (var trainingInput in trainingInputs)
                {
                    trainingModel.ChainedAdd(trainingInput.Item1, trainingInput.Item2);
                }

                var validationInputs  = _inputs.SubArray(indicesValidation);
                var validationOutputs = _outputs.SubArray(indicesValidation);

                var geneCscc = new GeneCSCC.GeneCSCC(new ContextModel <ContextInfo>(trainingModel));

                var top1Matches     = 0.0;
                var top3Matches     = 0.0;
                var predictionsMade = 0.0;

                /*var sw = new Stopwatch();
                 * sw.Start();*/

                for (var i = 0; i < validationInputs.Length; i++)
                {
                    var predictions = geneCscc.GetPredictions(validationInputs[i].Item2, validationInputs[i].Item1);

                    if (predictions.Count == 0)
                    {
                        continue;
                    }

                    predictionsMade++;

                    if (validationOutputs[i].Validate(predictions[0]) == ValidationInfo.Result.Match)
                    {
                        top1Matches++;
                        top3Matches++;
                    }
                    else if (predictions.Count > 1 && validationOutputs[i].Validate(predictions[1]) == ValidationInfo.Result.Match)
                    {
                        top3Matches++;
                    }
                    else if (predictions.Count > 2 && validationOutputs[i].Validate(predictions[2]) == ValidationInfo.Result.Match)
                    {
                        top3Matches++;
                    }
                }
                //sw.Stop();
                //Console.WriteLine("Elapsed time: " + sw.ElapsedMilliseconds / validationInputs.Length);

                return(new PredictionQualityValues(top1Matches / validationInputs.Length, predictionsMade / validationInputs.Length));
            };

            var predictionQualities = crossvalidation.Compute();

            return(predictionQualities);
        }
Exemple #2
0
        public void TestSmallSample5Fold()
        {
            var kFoldCrossValidation = new KFoldCrossValidation <string>(smallSample, 5, 1);

            string[] expected2 = { "7", "9" };
            Assert.AreEqual(expected2, kFoldCrossValidation.GetTestFold(0).ToArray());
        }
Exemple #3
0
        public void TestSmallSample2Fold()
        {
            var kFoldCrossValidation = new KFoldCrossValidation <string>(smallSample, 2, 1);

            string[] expected3 = { "7", "9", "5", "2", "10" };
            Assert.AreEqual(expected3, kFoldCrossValidation.GetTestFold(0).ToArray());
        }
Exemple #4
0
        /**
         * <summary> Execute Single K-fold cross-validation with the given classifier on the given data set using the given parameters.</summary>
         *
         * <param name="experiment">Experiment to be run.</param>
         * <returns>A Performance instance</returns>
         */
        public Performance.Performance Execute(Experiment experiment)
        {
            var crossValidation =
                new KFoldCrossValidation <Instance.Instance>(experiment.GetDataSet().GetInstances(), _k,
                                                             experiment.GetParameter().GetSeed());

            return(runExperiment(experiment.GetClassifier(), experiment.GetParameter(), crossValidation));
        }
Exemple #5
0
        /**
         * <summary> Execute K-fold cross-validation with the given classifier on the given data set using the given parameters.</summary>
         *
         * <param name="experiment">Experiment to be run.</param>
         * <returns>An ExperimentPerformance instance.</returns>
         */
        public virtual ExperimentPerformance Execute(Experiment experiment)
        {
            var result = new ExperimentPerformance();

            var crossValidation = new KFoldCrossValidation <Instance.Instance>(experiment.GetDataSet().GetInstances(),
                                                                               K, experiment.GetParameter().GetSeed());

            RunExperiment(experiment.GetClassifier(), experiment.GetParameter(), result, crossValidation);
            return(result);
        }
Exemple #6
0
        /**
         * Execute K-fold cross-validation with separate test set with the given classifier on the given data set using the given parameters.
         *
         * @param experiment Experiment to be run.
         * @return An ExperimentPerformance instance.
         */
        public override ExperimentPerformance Execute(Experiment experiment)
        {
            var result          = new ExperimentPerformance();
            var instanceList    = experiment.GetDataSet().GetInstanceList();
            var partition       = instanceList.Partition(0.25, new Random(experiment.GetParameter().GetSeed()));
            var crossValidation = new KFoldCrossValidation <Instance.Instance>(
                partition.Get(1).GetInstances(),
                K, experiment.GetParameter().GetSeed());

            RunExperiment(experiment.GetClassifier(), experiment.GetParameter(), result, crossValidation,
                          partition.Get(0));
            return(result);
        }
Exemple #7
0
        public void TestLargeSample2Fold()
        {
            var kFoldCrossValidation = new KFoldCrossValidation <int>(largeSample, 2, 1);

            for (var i = 0; i < 2; i++)
            {
                var items = new HashSet <int>();
                items.UnionWith(kFoldCrossValidation.GetTrainFold(i));
                items.UnionWith(kFoldCrossValidation.GetTestFold(i));
                Assert.AreEqual(500, kFoldCrossValidation.GetTestFold(i).Count);
                Assert.AreEqual(500, kFoldCrossValidation.GetTrainFold(i).Count);
                Assert.AreEqual(1000, items.Count);
            }
        }
        /**
         * <summary>Wrapper function to learn the parameter (delta) in additive smoothing. The function first creates K NGrams
         * with the train folds of the corpus. Then optimizes delta with respect to the test folds of the corpus</summary>
         * <param name="corpus">Train corpus used to optimize delta parameter</param>
         * <param name="n">N in N-Gram.</param>
         */
        protected override void LearnParameters(List <List <TSymbol> > corpus, int n)
        {
            const int k      = 10;
            var       nGrams = new NGram <TSymbol> [k];
            var       kFoldCrossValidation =
                new KFoldCrossValidation <List <TSymbol> >(corpus, k, 0);

            for (var i = 0; i < k; i++)
            {
                nGrams[i] = new NGram <TSymbol>(kFoldCrossValidation.GetTrainFold(i), n);
            }

            _delta = LearnBestDelta(nGrams, kFoldCrossValidation, 0.1);
        }
        /**
         * <summary>The algorithm tries to optimize the best lambda for a given corpus. The algorithm uses perplexity on the validation
         * set as the optimization criterion.</summary>
         *
         * <param name="nGrams">10 N-Grams learned for different folds of the corpus. nGrams[i] is the N-Gram trained with i'th train fold of the corpus.</param>
         * <param name="kFoldCrossValidation">Cross-validation data used in training and testing the N-grams.</param>
         * <param name="lowerBound">Initial lower bound for optimizing the best lambda.</param>
         * <returns> Best lambda optimized with k-fold crossvalidation.</returns>
         */
        private double LearnBestLambda(NGram <TSymbol>[] nGrams, KFoldCrossValidation <List <TSymbol> > kFoldCrossValidation,
                                       double lowerBound)
        {
            double bestPrevious = -1,
                   upperBound   = 0.999;
            var bestLambda      = (lowerBound + upperBound) / 2;
            var numberOfParts   = 5;
            var testFolds       = new List <List <TSymbol> > [10];

            for (var i = 0; i < 10; i++)
            {
                testFolds[i] = kFoldCrossValidation.GetTestFold(i);
            }

            while (true)
            {
                var bestPerplexity = double.MaxValue;
                for (var value = lowerBound; value <= upperBound; value += (upperBound - lowerBound) / numberOfParts)
                {
                    double perplexity = 0;
                    for (var i = 0; i < 10; i++)
                    {
                        nGrams[i].SetLambda(value);
                        perplexity += nGrams[i].GetPerplexity(testFolds[i]);
                    }

                    if (perplexity < bestPerplexity)
                    {
                        bestPerplexity = perplexity;
                        bestLambda     = value;
                    }
                }

                lowerBound = NewLowerBound(bestLambda, lowerBound, upperBound, numberOfParts);
                upperBound = NewUpperBound(bestLambda, lowerBound, upperBound, numberOfParts);
                if (bestPrevious != -1)
                {
                    if (System.Math.Abs(bestPrevious - bestPerplexity) / bestPerplexity < 0.001)
                    {
                        break;
                    }
                }

                bestPrevious = bestPerplexity;
            }

            return(bestLambda);
        }
 private void button1_Click(object sender, EventArgs e)
 {
     KFoldCrossValidation validation = null;
        // Stopwatch a = new Stopwatch();
        // a.Start();
     List<Data> trainingDataset = Utility.LoadDataset(DatasetType.Training);
        // a.Stop();
        // MessageBox.Show("Time : " + a.Elapsed.TotalSeconds);
     if (comboBox1.SelectedIndex == 0)
         validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.Bayesian, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance);
     else if (comboBox1.SelectedIndex == 1)
         validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.KNearestNeighbour, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance, int.Parse(KTextBox.Text));
     validation.Validate(int.Parse(NumofTimesTextBox.Text));
     ErrorMeanLabel.Text = validation.MeanError.ToString();
     OverallAccuracyLabel.Text = validation.Accuracy.ToString();
     ErrorVarianceLabel.Text = validation.VarianceError.ToString();
 }
        /**
         * <summary>The algorithm tries to optimize the best delta for a given corpus. The algorithm uses perplexity on the validation
         * set as the optimization criterion.</summary>
         * <param name="nGrams">10 N-Grams learned for different folds of the corpus. nGrams[i] is the N-Gram trained with i'th train</param>
         *               fold of the corpus.
         * <param name="kFoldCrossValidation">Cross-validation data used in training and testing the N-grams.</param>
         * <param name="lowerBound">Initial lower bound for optimizing the best delta.</param>
         * <returns>Best delta optimized with k-fold crossvalidation.</returns>
         */
        private double LearnBestDelta(NGram <TSymbol>[] nGrams, KFoldCrossValidation <List <TSymbol> > kFoldCrossValidation,
                                      double lowerBound)
        {
            double    bestPrevious = -1, upperBound = 1;
            var       bestDelta     = (lowerBound + upperBound) / 2;
            const int numberOfParts = 5;

            while (true)
            {
                var bestPerplexity = Double.MaxValue;
                for (var value = lowerBound; value <= upperBound; value += (upperBound - lowerBound) / numberOfParts)
                {
                    double perplexity = 0;
                    for (var i = 0; i < 10; i++)
                    {
                        nGrams[i].SetProbabilityWithPseudoCount(value, nGrams[i].GetN());
                        perplexity += nGrams[i].GetPerplexity(kFoldCrossValidation.GetTestFold(i));
                    }

                    if (perplexity < bestPerplexity)
                    {
                        bestPerplexity = perplexity;
                        bestDelta      = value;
                    }
                }

                lowerBound = NewLowerBound(bestDelta, lowerBound, upperBound, numberOfParts);
                upperBound = NewUpperBound(bestDelta, lowerBound, upperBound, numberOfParts);
                if (bestPrevious != -1)
                {
                    if (System.Math.Abs(bestPrevious - bestPerplexity) / bestPerplexity < 0.001)
                    {
                        break;
                    }
                }

                bestPrevious = bestPerplexity;
            }

            return(bestDelta);
        }
Exemple #12
0
        private void button1_Click(object sender, EventArgs e)
        {
            KFoldCrossValidation validation = null;
            // Stopwatch a = new Stopwatch();
            // a.Start();
            List <Data> trainingDataset = Utility.LoadDataset(DatasetType.Training);

            // a.Stop();
            // MessageBox.Show("Time : " + a.Elapsed.TotalSeconds);
            if (comboBox1.SelectedIndex == 0)
            {
                validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.Bayesian, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance);
            }
            else if (comboBox1.SelectedIndex == 1)
            {
                validation = new KFoldCrossValidation(trainingDataset, 3, ClassifierType.KNearestNeighbour, ViewModel.FeatureExtraction.FeatureExtractionType.EuclideanDistance, int.Parse(KTextBox.Text));
            }
            validation.Validate(int.Parse(NumofTimesTextBox.Text));
            ErrorMeanLabel.Text       = validation.MeanError.ToString();
            OverallAccuracyLabel.Text = validation.Accuracy.ToString();
            ErrorVarianceLabel.Text   = validation.VarianceError.ToString();
        }
        /**
         * <summary>Wrapper function to learn the parameters (lambda1 and lambda2) in interpolated smoothing. The function first creates K NGrams
         * with the train folds of the corpus. Then optimizes lambdas with respect to the test folds of the corpus depending on given N.</summary>
         * <param name="corpus">Train corpus used to optimize lambda parameters</param>
         * <param name="n">N in N-Gram.</param>
         */
        protected override void LearnParameters(List <List <TSymbol> > corpus, int n)
        {
            if (n <= 1)
            {
                return;
            }

            var K      = 10;
            var nGrams = new NGram <TSymbol> [K];
            var kFoldCrossValidation = new KFoldCrossValidation <List <TSymbol> >(corpus, K, 0);

            for (var i = 0; i < K; i++)
            {
                nGrams[i] = new NGram <TSymbol>(kFoldCrossValidation.GetTrainFold(i), n);
                for (var j = 2; j <= n; j++)
                {
                    nGrams[i].CalculateNGramProbabilities(_simpleSmoothing, j);
                }

                nGrams[i].CalculateNGramProbabilities(_simpleSmoothing, 1);
            }

            if (n == 2)
            {
                _lambda1 = LearnBestLambda(nGrams, kFoldCrossValidation, 0.1);
            }
            else
            {
                if (n == 3)
                {
                    var bestLambdas = LearnBestLambdas(nGrams, kFoldCrossValidation, 0.1, 0.1);
                    _lambda1 = bestLambdas[0];
                    _lambda2 = bestLambdas[1];
                }
            }
        }
Exemple #14
0
        public CrossValidationResult Evaluate(Chromosome chromosome)
        {
            var similarityThreshold      = chromosome.SimilarityThreshold;
            var refinementThreshold      = chromosome.RefinementThreshold;
            var maximumRefinedCandidates = chromosome.RefinedCandidates;

            var crossvalidation = new KFoldCrossValidation(0, _folds);

            crossvalidation.Evaluation = delegate(int k, int[] indicesTrain, int[] indicesValidation)
            {
                var trainingModel = _trainingFolds[k];

                var validationInputs  = _validationFolds[k].Item1;
                var validationOutputs = _validationFolds[k].Item2;

                var predictionMatches   = 0.0;
                var predictionsReturned = 0;

                for (var i = 0; i < validationInputs.Count; i++)
                {
                    var baseCandidates = trainingModel.TryGet(validationInputs[i].Type);

                    // Hamming distance closeness
                    var similarities = new List <PredictionInfo>();
                    foreach (var baseCandidate in baseCandidates)
                    {
                        var extendedSimilarity = baseCandidate.ExtendedSimilarity(validationInputs[i].ContextInfo);
                        var localSimilarity    = baseCandidate.ExtendedSimilarity(validationInputs[i].ContextInfo);
                        var similarity         = localSimilarity / extendedSimilarity > similarityThreshold ? localSimilarity : extendedSimilarity;

                        similarities.Add(new PredictionInfo(baseCandidate, similarity));
                    }

                    // Sort by Hamming distance
                    similarities.Sort();

                    // Take k = 200 most similar candidate contexts
                    var refinedCandidates = similarities.Take(maximumRefinedCandidates).ToList();

                    for (var j = 0; j < refinedCandidates.Count; j++)
                    {
                        refinedCandidates[j].ExtendedSimilarity = refinedCandidates[j].Context.NormalizedLCS(validationInputs[i].ContextInfo);
                        refinedCandidates[j].LocalSimilarity    = refinedCandidates[j].Context.LevenshteinSimilarity(validationInputs[i].ContextInfo);

                        if (refinedCandidates[j].ExtendedSimilarity < refinementThreshold &&
                            refinedCandidates[j].LocalSimilarity < refinementThreshold)
                        {
                            refinedCandidates.RemoveAt(j);
                        }
                    }

                    refinedCandidates.Sort();

                    if (refinedCandidates.Count == 0)
                    {
                        continue;
                    }

                    if (validationOutputs[i].Validate(refinedCandidates[0].Context.Invocation) == ValidationInfo.Result.Match)
                    {
                        predictionMatches++;
                    }
                }

                // TODO: Not all validationInputs may be processed
                return(new PredictionQualityValues(predictionMatches / validationInputs.Count, 1.0));
            };

            var predictionQualities = crossvalidation.Compute();

            return(predictionQualities);
        }
        /**
         * <summary>The algorithm tries to optimize the best lambdas (lambda1, lambda2) for a given corpus. The algorithm uses perplexity on the validation
         * set as the optimization criterion.</summary>
         *
         * <param name="nGrams">10 N-Grams learned for different folds of the corpus. nGrams[i] is the N-Gram trained with i'th train fold of the corpus.</param>
         * <param name="kFoldCrossValidation">Cross-validation data used in training and testing the N-grams.</param>
         * <param name="lowerBound1">Initial lower bound for optimizing the best lambda1.</param>
         * <param name="lowerBound2">Initial lower bound for optimizing the best lambda2.</param>
         */
        private double[] LearnBestLambdas(NGram <TSymbol>[] nGrams,
                                          KFoldCrossValidation <List <TSymbol> > kFoldCrossValidation,
                                          double lowerBound1, double lowerBound2)
        {
            double upperBound1      = 0.999,
                   upperBound2      = 0.999,
                   bestPrevious     = -1;
            double bestLambda1      = (lowerBound1 + upperBound1) / 2,
                   bestLambda2      = (lowerBound2 + upperBound2) / 2;
            var       testFolds     = new List <List <TSymbol> > [10];
            const int numberOfParts = 5;

            for (var i = 0; i < 10; i++)
            {
                testFolds[i] = kFoldCrossValidation.GetTestFold(i);
            }

            while (true)
            {
                var bestPerplexity = double.MaxValue;
                for (var value1 = lowerBound1;
                     value1 <= upperBound1;
                     value1 += (upperBound1 - lowerBound1) / numberOfParts)
                {
                    for (var value2 = lowerBound2;
                         value2 <= upperBound2 && value1 + value2 < 1;
                         value2 += (upperBound2 - lowerBound2) / numberOfParts)
                    {
                        double perplexity = 0;
                        for (var i = 0; i < 10; i++)
                        {
                            nGrams[i].SetLambda(value1, value2);
                            perplexity += nGrams[i].GetPerplexity(testFolds[i]);
                        }

                        if (perplexity < bestPerplexity)
                        {
                            bestPerplexity = perplexity;
                            bestLambda1    = value1;
                            bestLambda2    = value2;
                        }
                    }
                }

                lowerBound1 = NewLowerBound(bestLambda1, lowerBound1, upperBound1, numberOfParts);
                upperBound1 = NewUpperBound(bestLambda1, lowerBound1, upperBound1, numberOfParts);
                lowerBound2 = NewLowerBound(bestLambda2, lowerBound2, upperBound2, numberOfParts);
                upperBound2 = NewUpperBound(bestLambda2, lowerBound2, upperBound2, numberOfParts);
                if (bestPrevious != -1)
                {
                    if (System.Math.Abs(bestPrevious - bestPerplexity) / bestPerplexity < 0.001)
                    {
                        break;
                    }
                }

                bestPrevious = bestPerplexity;
            }

            return(new[] { bestLambda1, bestLambda2 });
        }
Exemple #16
0
 /**
  * <summary>The shuffleSentences method randomly shuffles sentences {@link ArrayList} with given seed value.</summary>
  *
  * <param name="seed">value to randomize shuffling.</param>
  */
 public void ShuffleSentences(int seed)
 {
     KFoldCrossValidation <Sentence> .Shuffle(sentences, new Random(seed));
 }
Exemple #17
0
 /**
  * <summary> Shuffles the instance list.</summary>
  * <param name="seed">Seed is used for random number generation.</param>
  */
 public void Shuffle(int seed)
 {
     KFoldCrossValidation <Instance.Instance> .Shuffle(_list, new Random(seed));
 }