public CrossValidationService(CrossValidationModel crossValidationModel,
                               ProgressChangedEventHandler crossValidationProgressChanged)
 {
     _worker        = new BackgroundWorker();
     _k             = crossValidationModel.K;
     _trainingModel = crossValidationModel.TrainingModel;
     _testingModel  = crossValidationModel.TestingModel;
     _progressChangedEventHandler = crossValidationProgressChanged;
 }
        ///// <summary>
        ///// Prepares the notes.
        ///// </summary>
        ///// <param name="_rootFolder">The root folder.</param>
        ///// <param name="_experimentDescription">The experiment description.</param>
        ///// <param name="logger">The logger.</param>
        //public void PrepareNotes()
        //{

        //}


        /// <summary>
        /// Prepares the dataset.
        /// </summary>
        /// <param name="dataset">The dataset.</param>
        /// <param name="validationModel">The validation model.</param>
        public void PrepareDataset(IEnumerable <WebSiteDocumentsSet> __dataset, CrossValidationModel validationModel)
        {
            crossValidation = validationModel;
            if (validationModel != null)
            {
                validationModel.Describe(notes);
            }
            else
            {
                validationModel            = new CrossValidationModel();
                validationModel.SingleFold = true;
            }

            truthTable = new ExperimentTruthTable();

            dataset = new ExperimentDataSetFold(__dataset.ToList(), runName);



            // ------------------ creation of Experiment Folds ------------------ //
            if (validationModel != null)
            {
                if (notes != null)
                {
                    notes.logStartPhase("[-] Creating k-fold crossvalidation datasets", "The input dataset with [" + dataset.Count + "] categories, is sliced into k=[" + validationModel.K + "] mutually exclusive folds, of ~equal size");
                }
                folds = new ExperimentDataSetFolds();
                folds.Deploy(validationModel, dataset, notes);
            }
            else
            {
            }

            truthTable.Deploy(dataset, notes);

            /*
             * testReportsByFold = new Dictionary<string, List<classificationReport>>();
             * foreach (var fold in folds)
             * {
             *  testReportsByFold.Add(fold.name, new List<classificationReport>());
             * }*/

            if (notes != null)
            {
                notes.logEndPhase();
            }
        }
Exemple #3
0
        public static void SetReportDataFields(this classificationReport report, CrossValidationModel crossValidationModel, ExperimentModelExecutionContext mainContext)
        {
            report.data.Add(nameof(ReportDataFieldEnum.DataSetName), mainContext.dataset.name, "Name of dataset used in the experiment");
            report.data.Add(nameof(ReportDataFieldEnum.ValidationK), crossValidationModel.GetShortSignature(), "Cross validation model signature");

            double testPerFold     = 0;
            double trainingPerFold = 0;
            Int32  c = 0;

            foreach (var frep in mainContext.testSummaries)
            {
                testPerFold += frep.Targets;
                c++;
            }

            trainingPerFold = testPerFold;
            testPerFold     = testPerFold.GetRatio(c);
            trainingPerFold = trainingPerFold - testPerFold;

            report.data.Add(nameof(ReportDataFieldEnum.TestSetCount), testPerFold.ToString("F2"), "Average number of test instances per fold");
            report.data.Add(nameof(ReportDataFieldEnum.TrainingSetCount), trainingPerFold.ToString("F2"), "Average number of training instances per fold");
        }
        /// <summary>
        /// Deploys the specified settings.
        /// </summary>
        /// <param name="_settings">The settings.</param>
        /// <param name="_dataset">Un-folded dataset, without having the unknown class defined</param>
        /// <param name="logger">The logger.</param>
        public void Deploy(CrossValidationModel _settings, IEnumerable <WebSiteDocumentsSet> _dataset, ILogBuilder logger)
        {
            settings = _settings;

            if (settings.SingleFold)
            {
                name = "1-fold -- single fold override";
                ExperimentDataSetFold fold = new ExperimentDataSetFold();
                fold.name = "SingleFold";
                fold.AddRange(_dataset);
                Add(fold);
                return;
            }

            if (_dataset is ExperimentDataSetFold foldInstance)
            {
                dataset = foldInstance;
            }

            name = settings.K + "-fold Tr[" + _settings.TrainingFolds + "] Ts[" + _settings.TestFolds + "]";

            List <CategorySlicedFolds> folds = new List <CategorySlicedFolds>();

            Dictionary <WebSiteDocumentsSet, CategorySlicedFolds> slicedFolds = new Dictionary <WebSiteDocumentsSet, CategorySlicedFolds>();

            foreach (WebSiteDocumentsSet cat in _dataset)
            {
                CategorySlicedFolds fold = new CategorySlicedFolds();
                fold.Deploy(cat, settings.K, settings.randomFolds);
                folds.Add(fold);
                slicedFolds.Add(cat, fold);
            }

            // --------------------------------------------------------- //

            var distributionMatrix = settings.GetDistributionMatrix();

            Int32 foldsToCreate = settings.K;

            if (settings.LimitFoldsExecution > 0)
            {
                foldsToCreate = settings.LimitFoldsExecution;
            }
            for (int i = 0; i < foldsToCreate; i++)
            {
                ExperimentDataSetFold setFold = new ExperimentDataSetFold();
                setFold.name = settings.K + "-fold[" + i + "]";

                setFold.CopyLabelNames(_dataset);

                WebSiteDocumentsSet unknownCat = new WebSiteDocumentsSet(SpaceLabel.UNKNOWN, "Test category - " + setFold.name);

                setFold.Add(unknownCat);

                foreach (KeyValuePair <WebSiteDocumentsSet, CategorySlicedFolds> catPair in slicedFolds)
                {
                    WebSiteDocumentsSet cat = setFold.First(x => x.name == catPair.Key.name);

                    for (int s = 0; s < settings.K; s++)
                    {
                        bool toTraining = distributionMatrix[i][s];

                        if (toTraining)
                        {
                            cat.AddRange(catPair.Value[s].WeakClone());
                        }
                        else
                        {
                            unknownCat.AddRange(catPair.Value[s].WeakClone());
                        }
                    }
                }

                Add(setFold);
            }
        }