示例#1
0
        protected classificationEvalMetricSet GetMetrics(IWebPostClassifier classifier)
        {
            classificationEvalMetricSet metrics = new classificationEvalMetricSet();


            foreach (KeyValuePair <int, DocumentSetCaseCollection> pair in this)
            {
                String className = pair.Value.validation.className;
                foreach (var setCase in pair.Value)
                {
                    var assocClass = setCase.data[classifier].selected;

                    if (assocClass != null)
                    {
                        if (pair.Value.rightClassID == assocClass.classID)
                        {
                            metrics[className].correct++;
                            metrics[className].truePositives++;
                        }
                        else
                        {
                            metrics[className].wrong++;
                            metrics[className].falseNegatives++;
                            metrics[assocClass.name].falsePositives++;
                        }
                    }
                }
            }
            return(metrics);
        }
示例#2
0
        /// <summary>
        /// Gets the report table on one collection
        /// </summary>
        /// <returns></returns>
        public DSCCReports GetReports()
        {
            //String repTitle = validationCase.name + " - " + validationCase.featureVectorExtractor.name;
            //String repName = repTitle.getCleanFileName();

            DSCCReports repTable = new DSCCReports();

            repTable.parent = this;
            foreach (IWebPostClassifier classifier in validationCase.context.setup.classifiers)
            {
                classificationEvalMetricSet metrics = GetMetrics(classifier);

                Int32 i = 0;

                DocumentSetCaseCollectionReport avgReport = new DocumentSetCaseCollectionReport(validationCase.name + "_" + classifier.name + "_" + "mean");

                avgReport.Classifier = classifier.name;

                foreach (KeyValuePair <int, DocumentSetCaseCollection> pair in this)
                {
                    var rep = new DocumentSetCaseCollectionReport(validationCase.name + " " + pair.Value.setClass.name + " " + classifier.name);
                    //rep.kFoldCase = ;
                    rep.Classifier = classifier.name;


                    rep.Targets   = metrics[pair.Value.setClass.name].correct + metrics[pair.Value.setClass.name].wrong;
                    rep.Wrong     = metrics[pair.Value.setClass.name].wrong;
                    rep.Correct   = metrics[pair.Value.setClass.name].correct;
                    rep.Precision = metrics[pair.Value.setClass.name].GetPrecision();
                    rep.Recall    = metrics[pair.Value.setClass.name].GetRecall();
                    rep.F1measure = metrics[pair.Value.setClass.name].GetF1();
                    repTable[classifier].Add(rep);
                    avgReport.AddValues(rep);
                    i++;
                }

                //DocumentSetCaseCollectionReport checkReport = new DocumentSetCaseCollectionReport();
                //checkReport.kFoldCase = classifier.name + "(check)";
                //checkReport.Classifier = classifier.name;
                //checkReport.Precision = metrics.GetPrecision();
                //checkReport.Recall = metrics.GetRecall();
                //checkReport.F1measure = metrics.GetF1();

                avgReport.DivideValues(i);
                repTable[classifier].Add(avgReport);
                //repTable[classifier].Add(checkReport);
                avgReport.GetSetMetrics(metrics);
                repTable.avgReports.Add(classifier, avgReport);
            }


            // output.SetDescription("Report for " + validationCase.name + " sample evaluation");



            // ds.Tables.Add(repTable.GetDataTable());

            return(repTable);
        }
示例#3
0
        /// <summary>
        /// Evaluates the test results.
        /// </summary>
        /// <param name="testResults">Set of test results.</param>
        /// <param name="_testName">Descriptive name to be attached at results report.</param>
        /// <param name="logger">The logger - to log any problems, if occourred.</param>
        /// <param name="averagingMethod">The averaging method.</param>
        /// <returns></returns>
        public classificationReport EvaluateTestResults(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger, classificationMetricComputation averagingMethod = classificationMetricComputation.macroAveraging)
        {
            //classificationReport report = new classificationReport()

            classificationReport report = new classificationReport(_testName);

            classificationEvalMetricSet metric = EvaluateTestResultsToMetricSet(testResults, _testName, logger);

            report.GetSetMetrics(metric);
            report.AddValues(metric, averagingMethod);

            return(report);
        }
示例#4
0
        /// <summary>
        /// Evaluates the test results and returns te metric set
        /// </summary>
        /// <param name="testResults">Set of test results.</param>
        /// <param name="_testName">Descriptive name to be attached at results report.</param>
        /// <param name="logger">The logger - to log any problems, if occourred.</param>
        /// <returns></returns>
        public classificationEvalMetricSet EvaluateTestResultsToMetricSet(List <FeatureVectorWithLabelID> testResults, String _testName, ILogBuilder logger)
        {
            classificationEvalMetricSet metric = new classificationEvalMetricSet(_testName, labels_without_unknown);

            foreach (FeatureVectorWithLabelID test_item in testResults)
            {
                String test_response = labels_without_unknown[test_item.labelID];

                String test_truth = siteToLabel[test_item.vector.name];

                metric.AddRecord(test_response, test_truth);
            }
            return(metric);
        }
示例#5
0
        public void MakeReports(experimentExecutionContext context, folderNode folder)
        {
            meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name);

            aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>();

            DSCCReports firstCase = null;
            List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>();

            foreach (var kFoldCasePair in this)
            {
                if (firstCase == null)
                {
                    firstCase = kFoldCasePair.Value;
                }
                foreach (var pair in kFoldCasePair.Value.avgReports)
                {
                    tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value;
                    if (!classifiers.Contains(pair.Key))
                    {
                        classifiers.Add(pair.Key);
                    }
                }
            }



            // DataSet dataSet = new DataSet(context.setup.name);



            // <---------- CREATING AVERAGE TABLE -----------------------------------------------------
            var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method");
            var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method");

            List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>();
            DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports");


            foreach (IWebPostClassifier classifier in classifiers)
            {
                // < ---- report on each classifier

                context.logger.log("-- producing report about [" + classifier.name + "]");
                //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum");



                DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. ");

                DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg.");

                rep_eval.Classifier = classifier.name;

                classificationEvalMetricSet metrics = new classificationEvalMetricSet();
                classificationEval          eval    = new classificationEval();
                //eval = metrics[classifier.name];

                Int32 c = 0;
                foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this)
                {
                    DocumentSetCaseCollectionReport rep   = kFoldCasePair.Value.avgReports[classifier];
                    kFoldValidationCase             vCase = kFoldCasePair.Key;


                    classificationEvalMetricSet met = rep.GetSetMetrics();

                    if (met != null)
                    {
                        foreach (IDocumentSetClass cl in context.classes.GetClasses())
                        {
                            eval = eval + met[cl.name];
                        }
                    }

                    rep.Name = classifier.name + "_" + vCase.name;
                    avg.AddValues(rep);
                    EMperKFolds.AddRow(rep);

                    c++;
                }

                rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging);



                avg.Classifier = classifier.name;
                avg.DivideValues(c);

                // <<< detecting the best performed classifier in all evaluation folds
                if (avg.F1measure > highestF1Value)
                {
                    highestF1Value      = avg.F1measure;
                    topClassifierReport = avg;
                }

                meanClassifierReport.AddValues(avg);


                // -----------------

                EMperKFolds.AddRow(avg);

                tpAvgMacro.AddRow(avg);

                macroaverages.Add(avg);

                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.AddRow(rep_eval);
                }
                // tp.Add(rep_eval);

                if (context.tools.operation.DoMakeReportForEachClassifier)
                {
                    DataTable cTable = EMperKFolds;
                    cTable.SetTitle($"{classifier.name} report");
                    cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]");


                    cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                    cTable.SetAdditionalInfoEntry("Classifier", classifier.name);
                    cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name);

                    cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct);
                    cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong);

                    //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision);
                    //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall);
                    //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure);

                    cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives);
                    cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives);
                    cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives);


                    cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]");
                    var info = classifier.DescribeSelf();
                    info.ForEach(x => cTable.AddExtra(x));

                    cTable.AddExtra("-----------------------------------------------------------------------");

                    cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema");
                    //  output.CopyRowsFrom(cTable);


                    cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name);

                    // dataSet.AddTable(cTable);
                }
            }



            rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name");



            meanClassifierReport.DivideValues(classifiers.Count);
            if (macroaverages.Count > 0)
            {
                Double maxF1 = macroaverages.Max(x => x.F1measure);
                Double minF1 = macroaverages.Min(x => x.F1measure);

                List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList();
                List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList();


                var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames);

                EMperKFolds.GetRowMetaSet().AddUnit(style);


                //  style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames);



                tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                }


                List <String> averageNames = macroaverages.Select(x => x.Name).ToList();
                var           avg_style    = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames);
                foreach (var x in averageNames)
                {
                    avg_style.AddMatch(x);
                }
            }

            // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: //

            tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report");
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.SetTitle($"{extractor.name} - microaverage report");
            }

            tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]");
            tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means.");

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]");
            }
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means.");
            }

            context.AddExperimentInfo(tpAvgMacro);
            if (DOMAKE_MICROaverage)
            {
                context.AddExperimentInfo(tpAvgMicro);
            }

            tpAvgMacro.AddExtra(extractor.description);


            if (extractor is semanticFVExtractor)
            {
                semanticFVExtractor semExtractor = (semanticFVExtractor)extractor;

                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
            }

            context.logger.log("-- producing summary reports on [" + extractor.name + "]");

            rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true,
                                     imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.avg
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);
            tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true);


            EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier.");

            EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true);

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true);
            }
            //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo);
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();


            if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty())
            {
                String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename);

                var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold);

                //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories);

                DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger);  // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger);

                if (scores != null)
                {
                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    scores = setup.documentSelectQuery.ExecuteLimit(scores, logger);

                    IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID);

                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    fold.DataSetSubSet(assignedIDs.ToList(), true, true);
                }
                else
                {
                    throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename);

                    logger.log(" _ DocumentSelect failed for [" + name + "]");
                }
            }

            classificationReport tmpReport = new classificationReport();

            String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit;


            DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName);

            dsReport.Publish(fold_notes.folder, true, true);

            tmpReport.SetReportDataFields(dsReport);

            if (!output.context.IsDatasetDeployed)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                if (requirements.MayUseSpaceModelCategories)
                {
                    corpusOperation.SpaceModelCategories(output.context, notes);
                }
            }

            tmpReport.SetReportDataFields(output.context, false);

            corpusOperation.FeatureSelection(output.context, notes);


            corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories);

            corpusOperation.FeatureVectorConstruction(output.context, notes);


            if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo))
            {
                logger.log("-- generating random sample report");
                var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample");
                data_wm.GetReportAndSave(fold_notes.folder);
                var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample");
                data_fs.GetReportAndSave(fold_notes.folder);
            }

            classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes);


            corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger);

            //classificationOperation.classifier.

            classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger);

            if (setup.ExportEvaluationAsDocumentSelectionResult)
            {
                Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore);
                String out_ds = setup.ExportEvaluationToFilename.Replace("*", "");
                dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger);
                //executionContextExtra.resourceProvider.folder
                dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger);
            }


            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod);

            averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));
            averagedReport.ReportToLog(notes);

            averagedReport.SetReportDataFields(output.context, true);
            averagedReport.data.Merge(tmpReport.data);

            averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel);



            executionContextExtra.testSummaries.Add(averagedReport);



            OperationContextReport reportOperation = new OperationContextReport();

            reportOperation.DeploySettingsBase(notes);

            reportOperation.GenerateReports(output.context, setup.reportOptions, notes);

            /*
             * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification))
             * {
             *
             *  Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults);
             *
             *  objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName);
             *  classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName);
             *  foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory)
             *  {
             *      var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger);
             *
             *      cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name;
             *      cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]";
             *      String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true);
             *
             *      macroAverage.AddValues(cReport);
             *
             *      tbl.Add(cReport);
             *  }
             *  //  macroAverage.DivideValues(byCategory.Keys.Count);
             *
             *  tbl.Add(macroAverage);
             *
             *  notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification);
             *
             * }*/

            Close();

            return(output);
        }