protected classificationEvalMetricSet GetMetrics(IWebPostClassifier classifier) { classificationEvalMetricSet metrics = new classificationEvalMetricSet(); foreach (KeyValuePair <int, DocumentSetCaseCollection> pair in this) { String className = pair.Value.validation.className; foreach (var setCase in pair.Value) { var assocClass = setCase.data[classifier].selected; if (assocClass != null) { if (pair.Value.rightClassID == assocClass.classID) { metrics[className].correct++; metrics[className].truePositives++; } else { metrics[className].wrong++; metrics[className].falseNegatives++; metrics[assocClass.name].falsePositives++; } } } } return(metrics); }
/// <summary> /// Gets the report table on one collection /// </summary> /// <returns></returns> public DSCCReports GetReports() { //String repTitle = validationCase.name + " - " + validationCase.featureVectorExtractor.name; //String repName = repTitle.getCleanFileName(); DSCCReports repTable = new DSCCReports(); repTable.parent = this; foreach (IWebPostClassifier classifier in validationCase.context.setup.classifiers) { classificationEvalMetricSet metrics = GetMetrics(classifier); Int32 i = 0; DocumentSetCaseCollectionReport avgReport = new DocumentSetCaseCollectionReport(validationCase.name + "_" + classifier.name + "_" + "mean"); avgReport.Classifier = classifier.name; foreach (KeyValuePair <int, DocumentSetCaseCollection> pair in this) { var rep = new DocumentSetCaseCollectionReport(validationCase.name + " " + pair.Value.setClass.name + " " + classifier.name); //rep.kFoldCase = ; rep.Classifier = classifier.name; rep.Targets = metrics[pair.Value.setClass.name].correct + metrics[pair.Value.setClass.name].wrong; rep.Wrong = metrics[pair.Value.setClass.name].wrong; rep.Correct = metrics[pair.Value.setClass.name].correct; rep.Precision = metrics[pair.Value.setClass.name].GetPrecision(); rep.Recall = metrics[pair.Value.setClass.name].GetRecall(); rep.F1measure = metrics[pair.Value.setClass.name].GetF1(); repTable[classifier].Add(rep); avgReport.AddValues(rep); i++; } //DocumentSetCaseCollectionReport checkReport = new DocumentSetCaseCollectionReport(); //checkReport.kFoldCase = classifier.name + "(check)"; //checkReport.Classifier = classifier.name; //checkReport.Precision = metrics.GetPrecision(); //checkReport.Recall = metrics.GetRecall(); //checkReport.F1measure = metrics.GetF1(); avgReport.DivideValues(i); repTable[classifier].Add(avgReport); //repTable[classifier].Add(checkReport); avgReport.GetSetMetrics(metrics); repTable.avgReports.Add(classifier, avgReport); } // output.SetDescription("Report for " + validationCase.name + " sample evaluation"); // ds.Tables.Add(repTable.GetDataTable()); return(repTable); }
/// <summary> /// Evaluates the test results. /// </summary> /// <param name="testResults">Set of test results.</param> /// <param name="_testName">Descriptive name to be attached at results report.</param> /// <param name="logger">The logger - to log any problems, if occourred.</param> /// <param name="averagingMethod">The averaging method.</param> /// <returns></returns> public classificationReport EvaluateTestResults(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger, classificationMetricComputation averagingMethod = classificationMetricComputation.macroAveraging) { //classificationReport report = new classificationReport() classificationReport report = new classificationReport(_testName); classificationEvalMetricSet metric = EvaluateTestResultsToMetricSet(testResults, _testName, logger); report.GetSetMetrics(metric); report.AddValues(metric, averagingMethod); return(report); }
/// <summary> /// Evaluates the test results and returns te metric set /// </summary> /// <param name="testResults">Set of test results.</param> /// <param name="_testName">Descriptive name to be attached at results report.</param> /// <param name="logger">The logger - to log any problems, if occourred.</param> /// <returns></returns> public classificationEvalMetricSet EvaluateTestResultsToMetricSet(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger) { classificationEvalMetricSet metric = new classificationEvalMetricSet(_testName, labels_without_unknown); foreach (FeatureVectorWithLabelID test_item in testResults) { String test_response = labels_without_unknown[test_item.labelID]; String test_truth = siteToLabel[test_item.vector.name]; metric.AddRecord(test_response, test_truth); } return(metric); }
public void MakeReports(experimentExecutionContext context, folderNode folder) { meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name); aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>(); DSCCReports firstCase = null; List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>(); foreach (var kFoldCasePair in this) { if (firstCase == null) { firstCase = kFoldCasePair.Value; } foreach (var pair in kFoldCasePair.Value.avgReports) { tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value; if (!classifiers.Contains(pair.Key)) { classifiers.Add(pair.Key); } } } // DataSet dataSet = new DataSet(context.setup.name); // <---------- CREATING AVERAGE TABLE ----------------------------------------------------- var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method"); var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method"); List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>(); DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports"); foreach (IWebPostClassifier classifier in classifiers) { // < ---- report on each classifier context.logger.log("-- producing report about [" + classifier.name + "]"); //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum"); DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. "); DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg."); rep_eval.Classifier = classifier.name; classificationEvalMetricSet metrics = new classificationEvalMetricSet(); classificationEval eval = new classificationEval(); //eval = metrics[classifier.name]; Int32 c = 0; foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this) { DocumentSetCaseCollectionReport rep = kFoldCasePair.Value.avgReports[classifier]; kFoldValidationCase vCase = kFoldCasePair.Key; classificationEvalMetricSet met = rep.GetSetMetrics(); if (met != null) { foreach (IDocumentSetClass cl in context.classes.GetClasses()) { eval = eval + met[cl.name]; } } rep.Name = classifier.name + "_" + vCase.name; avg.AddValues(rep); EMperKFolds.AddRow(rep); c++; } rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging); avg.Classifier = classifier.name; avg.DivideValues(c); // <<< detecting the best performed classifier in all evaluation folds if (avg.F1measure > highestF1Value) { highestF1Value = avg.F1measure; topClassifierReport = avg; } meanClassifierReport.AddValues(avg); // ----------------- EMperKFolds.AddRow(avg); tpAvgMacro.AddRow(avg); macroaverages.Add(avg); if (DOMAKE_MICROaverage) { tpAvgMicro.AddRow(rep_eval); } // tp.Add(rep_eval); if (context.tools.operation.DoMakeReportForEachClassifier) { DataTable cTable = EMperKFolds; cTable.SetTitle($"{classifier.name} report"); cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]"); cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name); cTable.SetAdditionalInfoEntry("Classifier", classifier.name); cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name); cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct); cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong); //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision); //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall); //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure); cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives); cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives); cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives); cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]"); var info = classifier.DescribeSelf(); info.ForEach(x => cTable.AddExtra(x)); cTable.AddExtra("-----------------------------------------------------------------------"); cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema"); // output.CopyRowsFrom(cTable); cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name); // dataSet.AddTable(cTable); } } rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name"); meanClassifierReport.DivideValues(classifiers.Count); if (macroaverages.Count > 0) { Double maxF1 = macroaverages.Max(x => x.F1measure); Double minF1 = macroaverages.Min(x => x.F1measure); List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList(); List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList(); var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames); EMperKFolds.GetRowMetaSet().AddUnit(style); // style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames); tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name); if (DOMAKE_MICROaverage) { tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name); } List <String> averageNames = macroaverages.Select(x => x.Name).ToList(); var avg_style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames); foreach (var x in averageNames) { avg_style.AddMatch(x); } } // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: // tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report"); if (DOMAKE_MICROaverage) { tpAvgMicro.SetTitle($"{extractor.name} - microaverage report"); } tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]"); tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means."); if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]"); } if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means."); } context.AddExperimentInfo(tpAvgMacro); if (DOMAKE_MICROaverage) { context.AddExperimentInfo(tpAvgMicro); } tpAvgMacro.AddExtra(extractor.description); if (extractor is semanticFVExtractor) { semanticFVExtractor semExtractor = (semanticFVExtractor)extractor; semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); } context.logger.log("-- producing summary reports on [" + extractor.name + "]"); rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true, imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max | imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true); EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier."); EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true); if (DOMAKE_MICROaverage) { tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true); } //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo); }
public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null) { ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain); Open(); if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty()) { String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename); var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold); //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories); DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger); // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger); if (scores != null) { scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite)); scores = setup.documentSelectQuery.ExecuteLimit(scores, logger); IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID); scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite)); fold.DataSetSubSet(assignedIDs.ToList(), true, true); } else { throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename); logger.log(" _ DocumentSelect failed for [" + name + "]"); } } classificationReport tmpReport = new classificationReport(); String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit; DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName); dsReport.Publish(fold_notes.folder, true, true); tmpReport.SetReportDataFields(dsReport); if (!output.context.IsDatasetDeployed) { output.context.DeployDataSet(fold, logger); entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender); corpusOperation.SpaceModelPopulation(output.context, notes); if (requirements.MayUseSpaceModelCategories) { corpusOperation.SpaceModelCategories(output.context, notes); } } tmpReport.SetReportDataFields(output.context, false); corpusOperation.FeatureSelection(output.context, notes); corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories); corpusOperation.FeatureVectorConstruction(output.context, notes); if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo)) { logger.log("-- generating random sample report"); var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample"); data_wm.GetReportAndSave(fold_notes.folder); var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample"); data_fs.GetReportAndSave(fold_notes.folder); } classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes); corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger); //classificationOperation.classifier. classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger); if (setup.ExportEvaluationAsDocumentSelectionResult) { Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore); String out_ds = setup.ExportEvaluationToFilename.Replace("*", ""); dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger); //executionContextExtra.resourceProvider.folder dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger); } DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class"); evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x)); inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum")); notes.SaveDataTable(inclassEvalTable, notes.folder_classification); classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod); averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name; averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary")); averagedReport.ReportToLog(notes); averagedReport.SetReportDataFields(output.context, true); averagedReport.data.Merge(tmpReport.data); averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel); executionContextExtra.testSummaries.Add(averagedReport); OperationContextReport reportOperation = new OperationContextReport(); reportOperation.DeploySettingsBase(notes); reportOperation.GenerateReports(output.context, setup.reportOptions, notes); /* * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification)) * { * * Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults); * * objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName); * classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName); * foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory) * { * var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger); * * cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name; * cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]"; * String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true); * * macroAverage.AddValues(cReport); * * tbl.Add(cReport); * } * // macroAverage.DivideValues(byCategory.Keys.Count); * * tbl.Add(macroAverage); * * notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification); * * }*/ Close(); return(output); }