public void AddBestPerformer(DocumentSetCaseCollectionReport bestPerformingClassifier, DocumentSetCaseCollectionReport meanPerformance, IWebFVExtractor fveModel) { bestPerformingClassifiers.Add(bestPerformingClassifier); String line = "[" + fveModel.name + "] completed " + DateTime.Now.ToLongTimeString(); fveFinishedRecods.Add(line); meanPerformanceForExtractors.Add(meanPerformance); }
public DocumentSetCaseCollectionReport GetTopClassifierReport() { if (topClassifierReport == null) { topClassifierReport = new DocumentSetCaseCollectionReport("null"); } topClassifierReport.Name = extractor.name + " - top F1"; return(topClassifierReport); }
public List <String> DescribeSelf(List <String> output = null) { if (output == null) { output = new List <string>(); } output.Add("Experiment [" + experiment.name + "] done in: " + Duration.ToString("F2") + " minutes"); output.Add(context.setup.description); // context.validationCollections.Count output.Add("k-Fold cross validation k[" + experiment.validationSetup.k + "] - RND(T/E)SMP[" + experiment.validationSetup.randomize.ToString() + "] - FVE models [" + experiment.models.Count + "] - Classiffiers [" + experiment.classifiers.Count + "]"); Int32 nCats = 0; Int32 nCases = 0; Double nCasePerCat = 0; foreach (var c in context.classes.GetClasses()) { nCats++; nCases += c.WebSiteSample.Count(); } nCasePerCat = nCases.GetRatio(nCats); output.Add("Categories [" + nCats + "] with [" + nCases + "] -- cases per category [" + nCasePerCat.ToString("F2") + "]"); var model = context.tools.model as pipelineMCRepo.model.mcRepoProcessModel; output.Add("Pages per web site (limit) [" + model.setup.target_languagePagePerSite + "]"); foreach (var m in context.setup.models) { String ln = m.name.TrimToMaxLength(15); foreach (var fv in m.settings.featureVectors.serialization) { if (fv.isActive) { ln = ln.add("[" + fv.name.TrimToMaxLength(10, " ") + "]", " "); } else { ln = ln.add("[" + ("-".Repeat(10)) + "]", " "); } } } output.Add("----"); output.Add("The best classifier per FVE models, by cross k-fold mean of F1 (macro-average): "); output.Add(String.Format("[{0,-30}] [{1,10}] [{2,10:F5}]", "Feature Vector Model", "Top class.", "Macro F1")); foreach (var cl in bestPerformingClassifiers) { if (cl == theBestPerformer) { output.Add(String.Format("[{0,-30}] [{1,10}] [{2,10:F5}] <-- the best ", cl.Name, cl.Classifier, cl.F1measure)); } else { output.Add(String.Format("[{0,-30}] [{1,10}] [{2,10:F5}]", cl.Name, cl.Classifier, cl.F1measure)); } } output.Add("----"); output.Add("The best performer: "); output.Add("Name: " + theBestPerformer.Name); output.Add("Classifier: " + theBestPerformer.Classifier); output.Add("F1 measure: " + theBestPerformer.F1measure.ToString("F5")); output.Add("----"); output.Add("The FVE with highest S1 measure: "); output.Add("Name: " + bestModel.modelName); output.Add("Range width: " + bestModel.RangeWidthAvg.ToString("F5")); output.Add("Range position: " + bestModel.RangePositionAvg.ToString("F5")); output.Add("S1 measure: " + bestModel.S1Measure.ToString("F5")); output.Add("----"); output.Add("Mean classifier performances by FVE models: "); DocumentSetCaseCollectionReport minMean = new DocumentSetCaseCollectionReport(); minMean.F1measure = 1; DocumentSetCaseCollectionReport maxMean = new DocumentSetCaseCollectionReport(); maxMean.F1measure = 0; foreach (var cl in meanPerformanceForExtractors) { if (cl.F1measure <= minMean.F1measure) { minMean = cl; } if (cl.F1measure > maxMean.F1measure) { maxMean = cl; } } foreach (var cl in meanPerformanceForExtractors) { String lb = " --- "; if (cl == minMean) { lb = " min "; } if (cl == maxMean) { lb = " max "; } output.Add(String.Format("[{0,-30}] P[{1,10:F5}] R[{2,10:F5}] F1[{3,10:F5}] [{4,5}]", cl.Name, cl.Precision, cl.Recall, cl.F1measure, lb)); } output.Add(" --- FVE cross-classifier means are computed as quality infication for FVE's configuration"); output.Add(" --- FVE models and k-fold sample distribution MD5 hash"); foreach (var c in valColVsModelVsSampleHash) { output.Add(c); } return(output); }
public void MakeReports(experimentExecutionContext context, folderNode folder) { meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name); aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>(); DSCCReports firstCase = null; List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>(); foreach (var kFoldCasePair in this) { if (firstCase == null) { firstCase = kFoldCasePair.Value; } foreach (var pair in kFoldCasePair.Value.avgReports) { tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value; if (!classifiers.Contains(pair.Key)) { classifiers.Add(pair.Key); } } } // DataSet dataSet = new DataSet(context.setup.name); // <---------- CREATING AVERAGE TABLE ----------------------------------------------------- var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method"); var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method"); List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>(); DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports"); foreach (IWebPostClassifier classifier in classifiers) { // < ---- report on each classifier context.logger.log("-- producing report about [" + classifier.name + "]"); //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum"); DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. "); DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg."); rep_eval.Classifier = classifier.name; classificationEvalMetricSet metrics = new classificationEvalMetricSet(); classificationEval eval = new classificationEval(); //eval = metrics[classifier.name]; Int32 c = 0; foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this) { DocumentSetCaseCollectionReport rep = kFoldCasePair.Value.avgReports[classifier]; kFoldValidationCase vCase = kFoldCasePair.Key; classificationEvalMetricSet met = rep.GetSetMetrics(); if (met != null) { foreach (IDocumentSetClass cl in context.classes.GetClasses()) { eval = eval + met[cl.name]; } } rep.Name = classifier.name + "_" + vCase.name; avg.AddValues(rep); EMperKFolds.AddRow(rep); c++; } rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging); avg.Classifier = classifier.name; avg.DivideValues(c); // <<< detecting the best performed classifier in all evaluation folds if (avg.F1measure > highestF1Value) { highestF1Value = avg.F1measure; topClassifierReport = avg; } meanClassifierReport.AddValues(avg); // ----------------- EMperKFolds.AddRow(avg); tpAvgMacro.AddRow(avg); macroaverages.Add(avg); if (DOMAKE_MICROaverage) { tpAvgMicro.AddRow(rep_eval); } // tp.Add(rep_eval); if (context.tools.operation.DoMakeReportForEachClassifier) { DataTable cTable = EMperKFolds; cTable.SetTitle($"{classifier.name} report"); cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]"); cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name); cTable.SetAdditionalInfoEntry("Classifier", classifier.name); cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name); cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct); cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong); //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision); //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall); //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure); cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives); cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives); cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives); cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]"); var info = classifier.DescribeSelf(); info.ForEach(x => cTable.AddExtra(x)); cTable.AddExtra("-----------------------------------------------------------------------"); cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema"); // output.CopyRowsFrom(cTable); cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name); // dataSet.AddTable(cTable); } } rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name"); meanClassifierReport.DivideValues(classifiers.Count); if (macroaverages.Count > 0) { Double maxF1 = macroaverages.Max(x => x.F1measure); Double minF1 = macroaverages.Min(x => x.F1measure); List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList(); List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList(); var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames); EMperKFolds.GetRowMetaSet().AddUnit(style); // style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames); tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name); if (DOMAKE_MICROaverage) { tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name); } List <String> averageNames = macroaverages.Select(x => x.Name).ToList(); var avg_style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames); foreach (var x in averageNames) { avg_style.AddMatch(x); } } // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: // tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report"); if (DOMAKE_MICROaverage) { tpAvgMicro.SetTitle($"{extractor.name} - microaverage report"); } tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]"); tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means."); if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]"); } if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means."); } context.AddExperimentInfo(tpAvgMacro); if (DOMAKE_MICROaverage) { context.AddExperimentInfo(tpAvgMicro); } tpAvgMacro.AddExtra(extractor.description); if (extractor is semanticFVExtractor) { semanticFVExtractor semExtractor = (semanticFVExtractor)extractor; semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); } context.logger.log("-- producing summary reports on [" + extractor.name + "]"); rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true, imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max | imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true); EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier."); EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true); if (DOMAKE_MICROaverage) { tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true); } //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo); }