private WeightedClassifier Build(TextDocument[] trainingSet, BaggingParams baggingParams, ClassifierParams classifierParams)
        {
            var underSampledSet = baggingParams.NeedUnderSampling
                ? trainingSet.RandomShuffle().Take(trainingSet.Length*85/100).ToArray()
                : trainingSet;

            return new WeightedClassifier
            {
                Classifier = classifierBuilder.Build(underSampledSet, classifierParams).Classifier,
                Weight = 1.0
            };
        }
 private void PrepareDataForBoosting(TextDocument[] problems, out TextDocument[] trainingSet, out TextDocument[] evaluationSet, int trainingSetPercent)
 {
     problems.RandomShuffle().Split(problems.Length * trainingSetPercent / 100, out trainingSet, out evaluationSet);
 }
 //        [Test]
 //        [TestCase("graphs")]
 //        public void LearnSvmWithBoosting(string targetTag)
 //        {
 //            var problems = problemService.LoadAllDocumentsFromStorage();
 //            TextDocument[] trainingSet;
 //            TextDocument[] evaluationSet;
 //            PrepareDataForBoosting(problems, out trainingSet, out evaluationSet);
 //            PrintStats("Training set: ", trainingSet, targetTag);
 //            PrintStats("Evaluation set: ", evaluationSet, targetTag);
 //
 //            var algorithms = Enumerable.Repeat(classificationAlgorithmBuilder.BuildSupportVectorMachine(), 3).ToArray();
 //            var featureSelector = featureSelectorBuilder.BuildChiSquared(0, 4, targetTag);
 //            var result = binaryClassifierBuilder.BuildBoosted(algorithms, featureSelector, trainingSet, targetTag);
 //
 //            var evaluationResult = classifierEvaluator.Evaluate(result.Classifier, evaluationSet, targetTag);
 //            
 //            var falseNegativeIds = evaluationResult.FalseNegatives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //            var falsePositivesIds = evaluationResult.FalsePositives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //
 //            var falseNegatives = falseNegativeIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //            var falsePositives = falsePositivesIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //
 //            Console.WriteLine("Evaluation FScore = {0}", evaluationResult.FScore);
 //            Console.WriteLine("Evaluation Precision = {0}", evaluationResult.Precision);
 //            Console.WriteLine("Evaluation Recall = {0}", evaluationResult.Recall);
 //            Console.WriteLine("Evaluation Accuracy = {0}", evaluationResult.Accuracy);
 //
 //            localStorageHandler.Write("Experiments\\SVM_Boosting\\", string.Format("{0}_false_negatives", targetTag), falseNegatives);
 //            localStorageHandler.Write("Experiments\\SVM_Boosting\\", string.Format("{0}_false_positives", targetTag), falsePositives);
 //        }
 //        [Test]
 //        [TestCase("math")]
 //        public void LearnWithBagging(string targetTag)
 //        {
 //            var problems = problemService.LoadAllDocumentsFromStorage();
 //            TextDocument[] trainingSet;
 //            TextDocument[] evaluationSet;
 //            PrepareDataForBoosting(problems, out trainingSet, out evaluationSet);
 //            PrintStats("Training set: ", trainingSet, targetTag);
 //            PrintStats("Evaluation set: ", evaluationSet, targetTag);
 //
 //            var algorithms = new []
 //            {
 //                classificationAlgorithmBuilder.BuildDecisionTree(),
 //                classificationAlgorithmBuilder.BuildSupportVectorMachine(),
 //                classificationAlgorithmBuilder.BuildNaiveBayes()
 //            };
 //            var featureSelector = featureSelectorBuilder.BuildChiSquared(0, 6, targetTag);
 //            var result = binaryClassifierBuilder.BuildBagged(algorithms, featureSelector, trainingSet, targetTag, false);
 //
 //            var evaluationResult = classifierEvaluator.Evaluate(result.Classifier, evaluationSet, targetTag);
 //
 //            var falseNegativeIds = evaluationResult.FalseNegatives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //            var falsePositivesIds = evaluationResult.FalsePositives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //
 //            var falseNegatives = falseNegativeIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //            var falsePositives = falsePositivesIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //
 //            Console.WriteLine("Evaluation FScore = {0}", evaluationResult.FScore);
 //            Console.WriteLine("Evaluation Precision = {0}", evaluationResult.Precision);
 //            Console.WriteLine("Evaluation Recall = {0}", evaluationResult.Recall);
 //            Console.WriteLine("Evaluation Accuracy = {0}", evaluationResult.Accuracy);
 //
 //            localStorageHandler.Write("Experiments\\Bagging\\", string.Format("{0}_false_negatives", targetTag), falseNegatives);
 //            localStorageHandler.Write("Experiments\\Bagging\\", string.Format("{0}_false_positives", targetTag), falsePositives);
 //        }
 //        [Test]
 //        [TestCase("math")]
 //        [TestCase("graphs")]
 //        [TestCase("strings")]
 //        [TestCase("geometry")]
 //        [TestCase("games")]
 //        public void LearnWithSvmBagging(string targetTag)
 //        {
 //            var problems = problemService.LoadAllDocumentsFromStorage();
 //            TextDocument[] trainingSet;
 //            TextDocument[] evaluationSet;
 //            PrepareDataForBoosting(problems, out trainingSet, out evaluationSet);
 //            PrintStats("Training set: ", trainingSet, targetTag);
 //            PrintStats("Evaluation set: ", evaluationSet, targetTag);
 //
 //            var algorithms = Enumerable.Range(0, 11).Select(x => classificationAlgorithmBuilder.BuildSupportVectorMachine()).ToArray();
 //            var featureSelector = featureSelectorBuilder.BuildChiSquared(0, 4, targetTag);
 //            var result = binaryClassifierBuilder.BuildBagged(algorithms, featureSelector, trainingSet, targetTag, true);
 //
 //            var evaluationResult = classifierEvaluator.Evaluate(result.Classifier, evaluationSet, targetTag);
 //
 //            var falseNegativeIds = evaluationResult.FalseNegatives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //            var falsePositivesIds = evaluationResult.FalsePositives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //
 //            var falseNegatives = falseNegativeIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //            var falsePositives = falsePositivesIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //
 //            Console.WriteLine("Evaluation FScore = {0}", evaluationResult.FScore);
 //            Console.WriteLine("Evaluation Precision = {0}", evaluationResult.Precision);
 //            Console.WriteLine("Evaluation Recall = {0}", evaluationResult.Recall);
 //            Console.WriteLine("Evaluation Accuracy = {0}", evaluationResult.Accuracy);
 //
 //            localStorageHandler.Write("Experiments\\SVM_Bagging\\", string.Format("{0}_false_negatives", targetTag), falseNegatives);
 //            localStorageHandler.Write("Experiments\\SVM_Bagging\\", string.Format("{0}_false_positives", targetTag), falsePositives);
 //        }
 //        
 //        [Test]
 //        [TestCase("math")]
 //        [TestCase("graphs")]
 //        [TestCase("strings")]
 //        [TestCase("geometry")]
 //        [TestCase("games")]
 //        public void LearnWithBayesBagging(string targetTag)
 //        {
 //            var problems = problemService.LoadAllDocumentsFromStorage();
 //            TextDocument[] trainingSet;
 //            TextDocument[] evaluationSet;
 //            PrepareDataForBoosting(problems, out trainingSet, out evaluationSet);
 //            PrintStats("Training set: ", trainingSet, targetTag);
 //            PrintStats("Evaluation set: ", evaluationSet, targetTag);
 //
 //            var algorithms = Enumerable.Range(0, 11).Select(x => classificationAlgorithmBuilder.BuildNaiveBayes()).ToArray();
 //            var featureSelector = featureSelectorBuilder.BuildChiSquared(0, 4, targetTag);
 //            var result = binaryClassifierBuilder.BuildBagged(algorithms, featureSelector, trainingSet, targetTag, true);
 //
 //            var evaluationResult = classifierEvaluator.Evaluate(result.Classifier, evaluationSet, targetTag);
 //
 //            var falseNegativeIds = evaluationResult.FalseNegatives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //            var falsePositivesIds = evaluationResult.FalsePositives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //
 //            var falseNegatives = falseNegativeIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //            var falsePositives = falsePositivesIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //
 //            Console.WriteLine("Evaluation FScore = {0}", evaluationResult.FScore);
 //            Console.WriteLine("Evaluation Precision = {0}", evaluationResult.Precision);
 //            Console.WriteLine("Evaluation Recall = {0}", evaluationResult.Recall);
 //            Console.WriteLine("Evaluation Accuracy = {0}", evaluationResult.Accuracy);
 //
 //            localStorageHandler.Write("Experiments\\Bayes_Bagging\\", string.Format("{0}_false_negatives", targetTag), falseNegatives);
 //            localStorageHandler.Write("Experiments\\Bayes_Bagging\\", string.Format("{0}_false_positives", targetTag), falsePositives);
 //        }
 //        
 //        [Test]
 //        [TestCase("math")]
 //        [TestCase("graphs")]
 //        [TestCase("strings")]
 //        [TestCase("geometry")]
 //        [TestCase("games")]
 //        public void LearnWithTreesBagging(string targetTag)
 //        {
 //            var problems = problemService.LoadAllDocumentsFromStorage();
 //            TextDocument[] trainingSet;
 //            TextDocument[] evaluationSet;
 //            PrepareDataForBoosting(problems, out trainingSet, out evaluationSet);
 //            PrintStats("Training set: ", trainingSet, targetTag);
 //            PrintStats("Evaluation set: ", evaluationSet, targetTag);
 //
 //            var algorithms = Enumerable.Range(0, 11).Select(x => classificationAlgorithmBuilder.BuildDecisionTree()).ToArray();
 //            var featureSelector = featureSelectorBuilder.BuildChiSquared(0, 4, targetTag);
 //            var result = binaryClassifierBuilder.BuildBagged(algorithms, featureSelector, trainingSet, targetTag, true);
 //
 //            var evaluationResult = classifierEvaluator.Evaluate(result.Classifier, evaluationSet, targetTag);
 //
 //            var falseNegativeIds = evaluationResult.FalseNegatives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //            var falsePositivesIds = evaluationResult.FalsePositives.OrderByDescending(x => x.Result.ConfidenceMeasure).Select(x => x.DocumentId).ToArray();
 //
 //            var falseNegatives = falseNegativeIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //            var falsePositives = falsePositivesIds.Join(problems, x => x, x => x.Id, (x, y) => y).ToArray();
 //
 //            Console.WriteLine("Evaluation FScore = {0}", evaluationResult.FScore);
 //            Console.WriteLine("Evaluation Precision = {0}", evaluationResult.Precision);
 //            Console.WriteLine("Evaluation Recall = {0}", evaluationResult.Recall);
 //            Console.WriteLine("Evaluation Accuracy = {0}", evaluationResult.Accuracy);
 //
 //            localStorageHandler.Write("Experiments\\Trees_Bagging\\", string.Format("{0}_false_negatives", targetTag), falseNegatives);
 //            localStorageHandler.Write("Experiments\\Trees_Bagging\\", string.Format("{0}_false_positives", targetTag), falsePositives);
 //        }
 private void PrepareData(TextDocument[] problems, out TextDocument[] trainingSet, out TextDocument[] crossValidationSet, out TextDocument[] evaluationSet)
 {
     problems.RandomShuffle().Split(problems.Length * 60 / 100, out trainingSet, out problems);
     problems.RandomShuffle().Split(problems.Length / 2, out crossValidationSet, out evaluationSet);
 }