Beispiel #1
0
        public void AddBestPerformer(DocumentSetCaseCollectionReport bestPerformingClassifier, DocumentSetCaseCollectionReport meanPerformance, IWebFVExtractor fveModel)
        {
            bestPerformingClassifiers.Add(bestPerformingClassifier);
            String line = "[" + fveModel.name + "] completed " + DateTime.Now.ToLongTimeString();

            fveFinishedRecods.Add(line);
            meanPerformanceForExtractors.Add(meanPerformance);
        }
Beispiel #2
0
 public DocumentSetCaseCollectionReport GetTopClassifierReport()
 {
     if (topClassifierReport == null)
     {
         topClassifierReport = new DocumentSetCaseCollectionReport("null");
     }
     topClassifierReport.Name = extractor.name + " - top F1";
     return(topClassifierReport);
 }
Beispiel #3
0
        public List <String> DescribeSelf(List <String> output = null)
        {
            if (output == null)
            {
                output = new List <string>();
            }

            output.Add("Experiment [" + experiment.name + "] done in: " + Duration.ToString("F2") + " minutes");
            output.Add(context.setup.description);

            //    context.validationCollections.Count

            output.Add("k-Fold cross validation k[" + experiment.validationSetup.k + "] - RND(T/E)SMP[" + experiment.validationSetup.randomize.ToString() + "] - FVE models [" + experiment.models.Count + "] - Classiffiers [" + experiment.classifiers.Count + "]");
            Int32  nCats       = 0;
            Int32  nCases      = 0;
            Double nCasePerCat = 0;

            foreach (var c in context.classes.GetClasses())
            {
                nCats++;
                nCases += c.WebSiteSample.Count();
            }

            nCasePerCat = nCases.GetRatio(nCats);

            output.Add("Categories [" + nCats + "] with [" + nCases + "] -- cases per category [" + nCasePerCat.ToString("F2") + "]");

            var model = context.tools.model as pipelineMCRepo.model.mcRepoProcessModel;

            output.Add("Pages per web site (limit) [" + model.setup.target_languagePagePerSite + "]");

            foreach (var m in context.setup.models)
            {
                String ln = m.name.TrimToMaxLength(15);

                foreach (var fv in m.settings.featureVectors.serialization)
                {
                    if (fv.isActive)
                    {
                        ln = ln.add("[" + fv.name.TrimToMaxLength(10, " ") + "]", " ");
                    }
                    else
                    {
                        ln = ln.add("[" + ("-".Repeat(10)) + "]", " ");
                    }
                }
            }



            output.Add("----");

            output.Add("The best classifier per FVE models, by cross k-fold mean of F1 (macro-average): ");

            output.Add(String.Format("[{0,-30}] [{1,10}] [{2,10:F5}]", "Feature Vector Model", "Top class.", "Macro F1"));

            foreach (var cl in bestPerformingClassifiers)
            {
                if (cl == theBestPerformer)
                {
                    output.Add(String.Format("[{0,-30}] [{1,10}] [{2,10:F5}] <-- the best ", cl.Name, cl.Classifier, cl.F1measure));
                }
                else
                {
                    output.Add(String.Format("[{0,-30}] [{1,10}] [{2,10:F5}]", cl.Name, cl.Classifier, cl.F1measure));
                }
            }

            output.Add("----");

            output.Add("The best performer: ");

            output.Add("Name: " + theBestPerformer.Name);
            output.Add("Classifier: " + theBestPerformer.Classifier);
            output.Add("F1 measure: " + theBestPerformer.F1measure.ToString("F5"));

            output.Add("----");

            output.Add("The FVE with highest S1 measure: ");
            output.Add("Name: " + bestModel.modelName);
            output.Add("Range width:    " + bestModel.RangeWidthAvg.ToString("F5"));
            output.Add("Range position: " + bestModel.RangePositionAvg.ToString("F5"));
            output.Add("S1 measure:     " + bestModel.S1Measure.ToString("F5"));



            output.Add("----");

            output.Add("Mean classifier performances by FVE models: ");


            DocumentSetCaseCollectionReport minMean = new DocumentSetCaseCollectionReport();

            minMean.F1measure = 1;
            DocumentSetCaseCollectionReport maxMean = new DocumentSetCaseCollectionReport();

            maxMean.F1measure = 0;

            foreach (var cl in meanPerformanceForExtractors)
            {
                if (cl.F1measure <= minMean.F1measure)
                {
                    minMean = cl;
                }
                if (cl.F1measure > maxMean.F1measure)
                {
                    maxMean = cl;
                }
            }

            foreach (var cl in meanPerformanceForExtractors)
            {
                String lb = " --- ";
                if (cl == minMean)
                {
                    lb = " min ";
                }
                if (cl == maxMean)
                {
                    lb = " max ";
                }

                output.Add(String.Format("[{0,-30}] P[{1,10:F5}] R[{2,10:F5}] F1[{3,10:F5}] [{4,5}]", cl.Name, cl.Precision, cl.Recall, cl.F1measure, lb));
            }

            output.Add(" --- FVE cross-classifier means are computed as quality infication for FVE's configuration");

            output.Add(" --- FVE models and k-fold sample distribution MD5 hash");

            foreach (var c in valColVsModelVsSampleHash)
            {
                output.Add(c);
            }

            return(output);
        }
Beispiel #4
0
        public void MakeReports(experimentExecutionContext context, folderNode folder)
        {
            meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name);

            aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>();

            DSCCReports firstCase = null;
            List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>();

            foreach (var kFoldCasePair in this)
            {
                if (firstCase == null)
                {
                    firstCase = kFoldCasePair.Value;
                }
                foreach (var pair in kFoldCasePair.Value.avgReports)
                {
                    tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value;
                    if (!classifiers.Contains(pair.Key))
                    {
                        classifiers.Add(pair.Key);
                    }
                }
            }



            // DataSet dataSet = new DataSet(context.setup.name);



            // <---------- CREATING AVERAGE TABLE -----------------------------------------------------
            var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method");
            var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method");

            List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>();
            DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports");


            foreach (IWebPostClassifier classifier in classifiers)
            {
                // < ---- report on each classifier

                context.logger.log("-- producing report about [" + classifier.name + "]");
                //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum");



                DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. ");

                DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg.");

                rep_eval.Classifier = classifier.name;

                classificationEvalMetricSet metrics = new classificationEvalMetricSet();
                classificationEval          eval    = new classificationEval();
                //eval = metrics[classifier.name];

                Int32 c = 0;
                foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this)
                {
                    DocumentSetCaseCollectionReport rep   = kFoldCasePair.Value.avgReports[classifier];
                    kFoldValidationCase             vCase = kFoldCasePair.Key;


                    classificationEvalMetricSet met = rep.GetSetMetrics();

                    if (met != null)
                    {
                        foreach (IDocumentSetClass cl in context.classes.GetClasses())
                        {
                            eval = eval + met[cl.name];
                        }
                    }

                    rep.Name = classifier.name + "_" + vCase.name;
                    avg.AddValues(rep);
                    EMperKFolds.AddRow(rep);

                    c++;
                }

                rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging);



                avg.Classifier = classifier.name;
                avg.DivideValues(c);

                // <<< detecting the best performed classifier in all evaluation folds
                if (avg.F1measure > highestF1Value)
                {
                    highestF1Value      = avg.F1measure;
                    topClassifierReport = avg;
                }

                meanClassifierReport.AddValues(avg);


                // -----------------

                EMperKFolds.AddRow(avg);

                tpAvgMacro.AddRow(avg);

                macroaverages.Add(avg);

                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.AddRow(rep_eval);
                }
                // tp.Add(rep_eval);

                if (context.tools.operation.DoMakeReportForEachClassifier)
                {
                    DataTable cTable = EMperKFolds;
                    cTable.SetTitle($"{classifier.name} report");
                    cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]");


                    cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                    cTable.SetAdditionalInfoEntry("Classifier", classifier.name);
                    cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name);

                    cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct);
                    cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong);

                    //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision);
                    //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall);
                    //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure);

                    cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives);
                    cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives);
                    cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives);


                    cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]");
                    var info = classifier.DescribeSelf();
                    info.ForEach(x => cTable.AddExtra(x));

                    cTable.AddExtra("-----------------------------------------------------------------------");

                    cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema");
                    //  output.CopyRowsFrom(cTable);


                    cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name);

                    // dataSet.AddTable(cTable);
                }
            }



            rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name");



            meanClassifierReport.DivideValues(classifiers.Count);
            if (macroaverages.Count > 0)
            {
                Double maxF1 = macroaverages.Max(x => x.F1measure);
                Double minF1 = macroaverages.Min(x => x.F1measure);

                List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList();
                List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList();


                var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames);

                EMperKFolds.GetRowMetaSet().AddUnit(style);


                //  style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames);



                tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                }


                List <String> averageNames = macroaverages.Select(x => x.Name).ToList();
                var           avg_style    = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames);
                foreach (var x in averageNames)
                {
                    avg_style.AddMatch(x);
                }
            }

            // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: //

            tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report");
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.SetTitle($"{extractor.name} - microaverage report");
            }

            tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]");
            tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means.");

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]");
            }
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means.");
            }

            context.AddExperimentInfo(tpAvgMacro);
            if (DOMAKE_MICROaverage)
            {
                context.AddExperimentInfo(tpAvgMicro);
            }

            tpAvgMacro.AddExtra(extractor.description);


            if (extractor is semanticFVExtractor)
            {
                semanticFVExtractor semExtractor = (semanticFVExtractor)extractor;

                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
            }

            context.logger.log("-- producing summary reports on [" + extractor.name + "]");

            rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true,
                                     imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.avg
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);
            tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true);


            EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier.");

            EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true);

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true);
            }
            //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo);
        }