public CrossValidationService(CrossValidationModel crossValidationModel, ProgressChangedEventHandler crossValidationProgressChanged) { _worker = new BackgroundWorker(); _k = crossValidationModel.K; _trainingModel = crossValidationModel.TrainingModel; _testingModel = crossValidationModel.TestingModel; _progressChangedEventHandler = crossValidationProgressChanged; }
///// <summary> ///// Prepares the notes. ///// </summary> ///// <param name="_rootFolder">The root folder.</param> ///// <param name="_experimentDescription">The experiment description.</param> ///// <param name="logger">The logger.</param> //public void PrepareNotes() //{ //} /// <summary> /// Prepares the dataset. /// </summary> /// <param name="dataset">The dataset.</param> /// <param name="validationModel">The validation model.</param> public void PrepareDataset(IEnumerable <WebSiteDocumentsSet> __dataset, CrossValidationModel validationModel) { crossValidation = validationModel; if (validationModel != null) { validationModel.Describe(notes); } else { validationModel = new CrossValidationModel(); validationModel.SingleFold = true; } truthTable = new ExperimentTruthTable(); dataset = new ExperimentDataSetFold(__dataset.ToList(), runName); // ------------------ creation of Experiment Folds ------------------ // if (validationModel != null) { if (notes != null) { notes.logStartPhase("[-] Creating k-fold crossvalidation datasets", "The input dataset with [" + dataset.Count + "] categories, is sliced into k=[" + validationModel.K + "] mutually exclusive folds, of ~equal size"); } folds = new ExperimentDataSetFolds(); folds.Deploy(validationModel, dataset, notes); } else { } truthTable.Deploy(dataset, notes); /* * testReportsByFold = new Dictionary<string, List<classificationReport>>(); * foreach (var fold in folds) * { * testReportsByFold.Add(fold.name, new List<classificationReport>()); * }*/ if (notes != null) { notes.logEndPhase(); } }
public static void SetReportDataFields(this classificationReport report, CrossValidationModel crossValidationModel, ExperimentModelExecutionContext mainContext) { report.data.Add(nameof(ReportDataFieldEnum.DataSetName), mainContext.dataset.name, "Name of dataset used in the experiment"); report.data.Add(nameof(ReportDataFieldEnum.ValidationK), crossValidationModel.GetShortSignature(), "Cross validation model signature"); double testPerFold = 0; double trainingPerFold = 0; Int32 c = 0; foreach (var frep in mainContext.testSummaries) { testPerFold += frep.Targets; c++; } trainingPerFold = testPerFold; testPerFold = testPerFold.GetRatio(c); trainingPerFold = trainingPerFold - testPerFold; report.data.Add(nameof(ReportDataFieldEnum.TestSetCount), testPerFold.ToString("F2"), "Average number of test instances per fold"); report.data.Add(nameof(ReportDataFieldEnum.TrainingSetCount), trainingPerFold.ToString("F2"), "Average number of training instances per fold"); }
/// <summary> /// Deploys the specified settings. /// </summary> /// <param name="_settings">The settings.</param> /// <param name="_dataset">Un-folded dataset, without having the unknown class defined</param> /// <param name="logger">The logger.</param> public void Deploy(CrossValidationModel _settings, IEnumerable <WebSiteDocumentsSet> _dataset, ILogBuilder logger) { settings = _settings; if (settings.SingleFold) { name = "1-fold -- single fold override"; ExperimentDataSetFold fold = new ExperimentDataSetFold(); fold.name = "SingleFold"; fold.AddRange(_dataset); Add(fold); return; } if (_dataset is ExperimentDataSetFold foldInstance) { dataset = foldInstance; } name = settings.K + "-fold Tr[" + _settings.TrainingFolds + "] Ts[" + _settings.TestFolds + "]"; List <CategorySlicedFolds> folds = new List <CategorySlicedFolds>(); Dictionary <WebSiteDocumentsSet, CategorySlicedFolds> slicedFolds = new Dictionary <WebSiteDocumentsSet, CategorySlicedFolds>(); foreach (WebSiteDocumentsSet cat in _dataset) { CategorySlicedFolds fold = new CategorySlicedFolds(); fold.Deploy(cat, settings.K, settings.randomFolds); folds.Add(fold); slicedFolds.Add(cat, fold); } // --------------------------------------------------------- // var distributionMatrix = settings.GetDistributionMatrix(); Int32 foldsToCreate = settings.K; if (settings.LimitFoldsExecution > 0) { foldsToCreate = settings.LimitFoldsExecution; } for (int i = 0; i < foldsToCreate; i++) { ExperimentDataSetFold setFold = new ExperimentDataSetFold(); setFold.name = settings.K + "-fold[" + i + "]"; setFold.CopyLabelNames(_dataset); WebSiteDocumentsSet unknownCat = new WebSiteDocumentsSet(SpaceLabel.UNKNOWN, "Test category - " + setFold.name); setFold.Add(unknownCat); foreach (KeyValuePair <WebSiteDocumentsSet, CategorySlicedFolds> catPair in slicedFolds) { WebSiteDocumentsSet cat = setFold.First(x => x.name == catPair.Key.name); for (int s = 0; s < settings.K; s++) { bool toTraining = distributionMatrix[i][s]; if (toTraining) { cat.AddRange(catPair.Value[s].WeakClone()); } else { unknownCat.AddRange(catPair.Value[s].WeakClone()); } } } Add(setFold); } }