/// <summary> /// Gets the report table on one collection /// </summary> /// <returns></returns> public DataTable GetReportTable(Boolean isTrainingCollection = false, Boolean isSingleCategoryReport = true) { DataTable output = this.BuildShema(isSingleCategoryReport, isTrainingCollection); this.SetAdditionalInfo(output, isSingleCategoryReport, isTrainingCollection); foreach (var setCase in this) { this.BuildRow(setCase, output, isTrainingCollection); } ranger = new rangeFinderForDataTable(output, "name"); ranger.AddRangeRows(setClass.name, output, true, imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); return(output); }
protected void runModel(experimentExecutionContext context, IWebFVExtractor model) { imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, model.name); Int32 crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit; aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection> casesByClasses = new aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection>(); DSCCReportSet kFoldReport = new DSCCReportSet(model); var valCol = context.validationCollections[model.name]; List <DocumentSetCaseCollectionSet> modelCaseResults = new List <DocumentSetCaseCollectionSet>(); crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit; while (crashRetries > 0) { try { experimentNotes modelNotes = new experimentNotes(valCol.folder, "Fold-level experiment settings description notes"); modelNotes.AppendLine("# Notes on Feature Vector Extractor: " + model.name); var nts = model.DescribeSelf(); nts.ForEach(x => modelNotes.AppendLine(x)); context.logger.log("Executing k-fold cases with model [" + model.name + "]"); valCol.DescribeSampleDistribution(modelNotes); context.mainReport.valColVsModelVsSampleHash.Add("[" + model.name + "]".toWidthExact(20) + " [sample distribution hash: " + valCol.SampleDistributionHash + "]"); modelNotes.SaveNote(); ParallelOptions ops = new ParallelOptions(); ops.MaxDegreeOfParallelism = context.tools.operation.ParallelThreads; Parallel.ForEach <kFoldValidationCase>(valCol.GetCases(), ops, valCase => { model.DoFVEAndTraining(valCase, context.tools, context.logger); // <--------------------------------------------------------------------------------------- BUILDING FVE DocumentSetCaseCollectionSet results = model.DoClassification(valCase, context.tools, context.logger); if (!results.Any()) { throw new aceScienceException("DoClassification for [" + model.name + "] returned no results!", null, model, "DoClassification " + model.name + " failed!", context); } foreach (var pair in results) { DocumentSetCaseCollection cls = pair.Value; casesByClasses.Add(cls.setClass, cls); } valCase.evaluationResults = results; if (context.tools.DoResultReporting) { context.logger.log("producing reports on k-Fold case [" + valCase.name + "]"); DSCCReports r = results.GetReports(); var sumMeans = r.GetAverageTable(context); //.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name); sumMeans.SetDescription("FVE report, aggregated for all categories - for fold [" + valCase.name + "]"); sumMeans.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name, true, context.tools.operation.doReportsInParalell); var fveAndCase = r.GetFullValidationTable(context); fveAndCase.SetDescription("Per-category aggregate statistics, for each classifier, within fold [" + valCase.name + "], used for macro-averaging"); fveAndCase.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_extrainfo_" + valCase.name, true, context.tools.operation.doReportsInParalell); var fullCaseReport = results.GetReportOnAllCases(); fullCaseReport.GetReportAndSave(valCase.folder, appManager.AppInfo, "FullReport_" + valCase.name, true, context.tools.operation.doReportsInParalell); kFoldReport.Add(valCase, r); } context.logger.log("k-Fold case [" + valCase.name + "] completed"); context.notes.log("- - Experiment sequence for [" + valCase.name + "] fold completed"); if (context.tools.operation.doSaveKnowledgeForClasses) { valCase.knowledgeLibrary.SaveKnowledgeInstancesForClasses(valCase, context.logger); } }); foreach (var fold in valCol.GetCases()) // Parallel.ForEach<kFoldValidationCase>(valCol.GetCases(), ops, valCase => { modelCaseResults.Add(fold.evaluationResults); } crashRetries = 0; } catch (Exception ex) { crashRetries--; context.errorNotes.LogException("FVE Model crashed -- retries left [" + crashRetries + "] --- ", ex, model.name); context.logger.log(":::: REPEATING the model [" + model.name + "] ::: CRASHED [" + ex.Message + "] ::: RETRIES [" + crashRetries + "]"); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 1000, 1); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(2400, 1000, 1); imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "RETRIES[" + crashRetries + "]"); } } imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "Reporting"); valCol.knowledgeLibrary.SaveCaseKnowledgeInstances(context.logger); // DocumentSetCaseCollection second = null; if (modelCaseResults.Any()) { featureExtractionMetrics modelMetrics = new featureExtractionMetrics(model.name, "All"); DataTableTypeExtended <featureExtractionMetrics> modelVsCategoryMetrics = new DataTableTypeExtended <featureExtractionMetrics>(model.name, "Model metrics per category"); // <-------------------------------------- CATEGORIES REPORT ---------------------------------------------- DataTable allTable = modelCaseResults.First()[0].GetReportTable(false, false).GetClonedShema <DataTable>();; //valCol.GetCases().First().evaluationResults[0].GetReportTable(false, false); rangeFinderForDataTable ranger = new rangeFinderForDataTable(allTable, "name"); ranger.columnsToSignIn.Add("Case"); foreach (KeyValuePair <IDocumentSetClass, aceConcurrentBag <DocumentSetCaseCollection> > pair in casesByClasses) { DocumentSetCaseCollection first = null; DataTable repTable = null; ranger.prepareForNextAggregationBlock(allTable, "name"); foreach (DocumentSetCaseCollection cn in pair.Value) { foreach (var cni in cn) { if (cni != null) { cn.BuildRow(cni, allTable, false); } } } ranger.AddRangeRows(pair.Key.name, allTable, true, imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); var categoryMetrics = new featureExtractionMetrics(model.name, pair.Key.name); categoryMetrics.SetValues(ranger); modelVsCategoryMetrics.AddRow(categoryMetrics); modelMetrics.AddValues(categoryMetrics); categoryMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_" + categoryMetrics.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "FV and Category sample metrics, serialized object")); //context.notes.log("- - Creating report for category [" + pair.Key.name + "] completed"); //repTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_category_" + pair.Key.name); } modelMetrics.DivideValues(casesByClasses.Count); modelMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_metrics.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Cross-categories macroaveraged metrics of the FVE model [" + model.name + "]")); modelVsCategoryMetrics.AddRow(modelMetrics); modelVsCategoryMetrics.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, "Name", modelMetrics.Name); modelVsCategoryMetrics.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_metrics", true, true); context.mainReport.AddModelMetrics(modelMetrics); context.notes.log("- Creating report for all categories [" + model.name + "] "); allTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_categories", true, context.tools.operation.doReportsInParalell); } kFoldReport.MakeReports(context, valCol.folder); context.mainReport.AddBestPerformer(kFoldReport.GetTopClassifierReport(), kFoldReport.meanClassifierReport, model); // <---------------- creation of complete report context.notes.log("- Experiment sequence with Feature Vector Extractor [" + model.name + "] completed"); context.notes.SaveNote(); // <------------- END OF THE MODEL ------------------------------------------------------------------------------------------------- }
public void MakeReports(experimentExecutionContext context, folderNode folder) { meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name); aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>(); DSCCReports firstCase = null; List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>(); foreach (var kFoldCasePair in this) { if (firstCase == null) { firstCase = kFoldCasePair.Value; } foreach (var pair in kFoldCasePair.Value.avgReports) { tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value; if (!classifiers.Contains(pair.Key)) { classifiers.Add(pair.Key); } } } // DataSet dataSet = new DataSet(context.setup.name); // <---------- CREATING AVERAGE TABLE ----------------------------------------------------- var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method"); var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method"); List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>(); DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports"); foreach (IWebPostClassifier classifier in classifiers) { // < ---- report on each classifier context.logger.log("-- producing report about [" + classifier.name + "]"); //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum"); DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. "); DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg."); rep_eval.Classifier = classifier.name; classificationEvalMetricSet metrics = new classificationEvalMetricSet(); classificationEval eval = new classificationEval(); //eval = metrics[classifier.name]; Int32 c = 0; foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this) { DocumentSetCaseCollectionReport rep = kFoldCasePair.Value.avgReports[classifier]; kFoldValidationCase vCase = kFoldCasePair.Key; classificationEvalMetricSet met = rep.GetSetMetrics(); if (met != null) { foreach (IDocumentSetClass cl in context.classes.GetClasses()) { eval = eval + met[cl.name]; } } rep.Name = classifier.name + "_" + vCase.name; avg.AddValues(rep); EMperKFolds.AddRow(rep); c++; } rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging); avg.Classifier = classifier.name; avg.DivideValues(c); // <<< detecting the best performed classifier in all evaluation folds if (avg.F1measure > highestF1Value) { highestF1Value = avg.F1measure; topClassifierReport = avg; } meanClassifierReport.AddValues(avg); // ----------------- EMperKFolds.AddRow(avg); tpAvgMacro.AddRow(avg); macroaverages.Add(avg); if (DOMAKE_MICROaverage) { tpAvgMicro.AddRow(rep_eval); } // tp.Add(rep_eval); if (context.tools.operation.DoMakeReportForEachClassifier) { DataTable cTable = EMperKFolds; cTable.SetTitle($"{classifier.name} report"); cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]"); cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name); cTable.SetAdditionalInfoEntry("Classifier", classifier.name); cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name); cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct); cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong); //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision); //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall); //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure); cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives); cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives); cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives); cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]"); var info = classifier.DescribeSelf(); info.ForEach(x => cTable.AddExtra(x)); cTable.AddExtra("-----------------------------------------------------------------------"); cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema"); // output.CopyRowsFrom(cTable); cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name); // dataSet.AddTable(cTable); } } rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name"); meanClassifierReport.DivideValues(classifiers.Count); if (macroaverages.Count > 0) { Double maxF1 = macroaverages.Max(x => x.F1measure); Double minF1 = macroaverages.Min(x => x.F1measure); List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList(); List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList(); var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames); EMperKFolds.GetRowMetaSet().AddUnit(style); // style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames); tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name); if (DOMAKE_MICROaverage) { tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name); } List <String> averageNames = macroaverages.Select(x => x.Name).ToList(); var avg_style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames); foreach (var x in averageNames) { avg_style.AddMatch(x); } } // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: // tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report"); if (DOMAKE_MICROaverage) { tpAvgMicro.SetTitle($"{extractor.name} - microaverage report"); } tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]"); tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means."); if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]"); } if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means."); } context.AddExperimentInfo(tpAvgMacro); if (DOMAKE_MICROaverage) { context.AddExperimentInfo(tpAvgMicro); } tpAvgMacro.AddExtra(extractor.description); if (extractor is semanticFVExtractor) { semanticFVExtractor semExtractor = (semanticFVExtractor)extractor; semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); } context.logger.log("-- producing summary reports on [" + extractor.name + "]"); rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true, imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max | imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true); EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier."); EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true); if (DOMAKE_MICROaverage) { tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true); } //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo); }