Example #1
0
        public static DataTableTypeExtended <classificationReportExpanded> MakeOverviewTable(this classificationReportCollection reportCollection, classificationReportDataComplexContext context, String name = "", String description = "")
        {
            DataTableTypeExtended <classificationReportExpanded> table = new DataTableTypeExtended <classificationReportExpanded>("overview_" + reportCollection.name, "Aggregate report for experiments ran [" + DateTime.Now.ToShortDateString() + "]");

            if (name != "")
            {
                table.SetAdditionalInfoEntry("Group name", name);
            }
            if (description != "")
            {
                table.SetAdditionalInfoEntry("Group desc", description);
            }



            table.SetDescription(reportCollection.description);
            table.SetAdditionalInfoEntry("Path", reportCollection.rootPath, "Path of the collection");

            table.SetAdditionalInfoEntry("Subcollections", reportCollection.Children.Count, "Number of sub collections");
            foreach (var rep in reportCollection)
            {
                table.AddRow(rep);
            }

            context.overview_tables.Add(table);

            return(table);
            //table.GetReportAndSave(folder, imbACE.Core.appManager.AppInfo);
        }
        /// <summary>
        /// Gets the data table.
        /// </summary>
        /// <param name="_name">The name.</param>
        /// <param name="_description">The description.</param>
        /// <returns></returns>
        public DataTable GetDataTable(String _name, String _description)
        {
            DataTableTypeExtended <HtmlTagCounterResult> output = new DataTableTypeExtended <HtmlTagCounterResult>(_name, _description);
            var results = GetResultList();

            results.ForEach(x => output.AddRow(x));

            output.SetAdditionalInfoEntry("Unknown dist. tags", DistinctUnknownTags.Count, "Number of distinct unknown tags");
            output.SetAdditionalInfoEntry("Distinct tags", DistinctTags.Count, "Number of known distinct tags");
            output.SetAdditionalInfoEntry("Unknown tags", UnknownTags.Count(), "Number of unknown distinct tags");

            output.AddExtra("Distinct tags found: [" + DistinctTags.toCsvInLine() + "]");
            output.AddExtra("Unknown tags found: [" + UnknownTags.toCsvInLine() + "]");

            return(output);
        }
Example #3
0
        /// <summary>
        /// Gets the data table, with all actions
        /// </summary>
        /// <returns></returns>
        public DataTable GetDataTable()
        {
            DataTableTypeExtended <cloudMatrixReductionReportEntry> output = new DataTableTypeExtended <cloudMatrixReductionReportEntry>(name, "Actions done by cloud matrix [" + name + "]");

            rangeFinderForDataTable ranger = new rangeFinderForDataTable(output, nameof(cloudMatrixReductionReportEntry.Hash));

            foreach (var entry in entries)
            {
                output.AddRow(entry);
            }

            output.SetAdditionalInfoEntry(nameof(TotalReduction).imbTitleCamelOperation(true), TotalReduction);
            output.SetAdditionalInfoEntry(nameof(CFFunctionReduction).imbTitleCamelOperation(true), CFFunctionReduction);
            output.SetAdditionalInfoEntry(nameof(LPFilterReduction).imbTitleCamelOperation(true), LPFilterReduction);
            output.SetAdditionalInfoEntry(nameof(LPFRemovals).imbTitleCamelOperation(true), LPFRemovals);
            output.SetAdditionalInfoEntry(nameof(MWRemovals).imbTitleCamelOperation(true), MWRemovals);

            return(output);
        }
Example #4
0
        public DataTable GetDataTable(String name)
        {
            DataTableTypeExtended <ContentMetrics> output = new DataTableTypeExtended <ContentMetrics>(name);

            foreach (var c in ClassMetrics)
            {
                output.AddRow(c);
            }
            output.AddRow(sumMetrics);
            output.AddRow(avgMetrics);

            output.SetAdditionalInfoEntry("Terms", terms.Count, "Number of distinct terms in the dataset");
            output.SetAdditionalInfoEntry("MaxTF", terms.GetMaxFrequency(), "Max. frequency in the corpus");
            output.SetAdditionalInfoEntry("SumTF", terms.GetSumFrequency(), "Sum of frequencies in the corpus");

            output.SetAdditionalInfoEntry("Stems", DictinctStems.Count(), "Number of distinct stems");
            output.SetAdditionalInfoEntry("Tokens", DictinctTokens.Count(), "Number of distinct tokens");

            return(output);
        }
Example #5
0
        public void MakeReports(experimentExecutionContext context, folderNode folder)
        {
            meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name);

            aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>();

            DSCCReports firstCase = null;
            List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>();

            foreach (var kFoldCasePair in this)
            {
                if (firstCase == null)
                {
                    firstCase = kFoldCasePair.Value;
                }
                foreach (var pair in kFoldCasePair.Value.avgReports)
                {
                    tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value;
                    if (!classifiers.Contains(pair.Key))
                    {
                        classifiers.Add(pair.Key);
                    }
                }
            }



            // DataSet dataSet = new DataSet(context.setup.name);



            // <---------- CREATING AVERAGE TABLE -----------------------------------------------------
            var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method");
            var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method");

            List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>();
            DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports");


            foreach (IWebPostClassifier classifier in classifiers)
            {
                // < ---- report on each classifier

                context.logger.log("-- producing report about [" + classifier.name + "]");
                //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum");



                DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. ");

                DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg.");

                rep_eval.Classifier = classifier.name;

                classificationEvalMetricSet metrics = new classificationEvalMetricSet();
                classificationEval          eval    = new classificationEval();
                //eval = metrics[classifier.name];

                Int32 c = 0;
                foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this)
                {
                    DocumentSetCaseCollectionReport rep   = kFoldCasePair.Value.avgReports[classifier];
                    kFoldValidationCase             vCase = kFoldCasePair.Key;


                    classificationEvalMetricSet met = rep.GetSetMetrics();

                    if (met != null)
                    {
                        foreach (IDocumentSetClass cl in context.classes.GetClasses())
                        {
                            eval = eval + met[cl.name];
                        }
                    }

                    rep.Name = classifier.name + "_" + vCase.name;
                    avg.AddValues(rep);
                    EMperKFolds.AddRow(rep);

                    c++;
                }

                rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging);



                avg.Classifier = classifier.name;
                avg.DivideValues(c);

                // <<< detecting the best performed classifier in all evaluation folds
                if (avg.F1measure > highestF1Value)
                {
                    highestF1Value      = avg.F1measure;
                    topClassifierReport = avg;
                }

                meanClassifierReport.AddValues(avg);


                // -----------------

                EMperKFolds.AddRow(avg);

                tpAvgMacro.AddRow(avg);

                macroaverages.Add(avg);

                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.AddRow(rep_eval);
                }
                // tp.Add(rep_eval);

                if (context.tools.operation.DoMakeReportForEachClassifier)
                {
                    DataTable cTable = EMperKFolds;
                    cTable.SetTitle($"{classifier.name} report");
                    cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]");


                    cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                    cTable.SetAdditionalInfoEntry("Classifier", classifier.name);
                    cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name);

                    cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct);
                    cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong);

                    //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision);
                    //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall);
                    //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure);

                    cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives);
                    cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives);
                    cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives);


                    cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]");
                    var info = classifier.DescribeSelf();
                    info.ForEach(x => cTable.AddExtra(x));

                    cTable.AddExtra("-----------------------------------------------------------------------");

                    cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema");
                    //  output.CopyRowsFrom(cTable);


                    cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name);

                    // dataSet.AddTable(cTable);
                }
            }



            rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name");



            meanClassifierReport.DivideValues(classifiers.Count);
            if (macroaverages.Count > 0)
            {
                Double maxF1 = macroaverages.Max(x => x.F1measure);
                Double minF1 = macroaverages.Min(x => x.F1measure);

                List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList();
                List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList();


                var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames);

                EMperKFolds.GetRowMetaSet().AddUnit(style);


                //  style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames);



                tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                }


                List <String> averageNames = macroaverages.Select(x => x.Name).ToList();
                var           avg_style    = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames);
                foreach (var x in averageNames)
                {
                    avg_style.AddMatch(x);
                }
            }

            // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: //

            tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report");
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.SetTitle($"{extractor.name} - microaverage report");
            }

            tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]");
            tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means.");

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]");
            }
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means.");
            }

            context.AddExperimentInfo(tpAvgMacro);
            if (DOMAKE_MICROaverage)
            {
                context.AddExperimentInfo(tpAvgMicro);
            }

            tpAvgMacro.AddExtra(extractor.description);


            if (extractor is semanticFVExtractor)
            {
                semanticFVExtractor semExtractor = (semanticFVExtractor)extractor;

                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
            }

            context.logger.log("-- producing summary reports on [" + extractor.name + "]");

            rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true,
                                     imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.avg
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);
            tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true);


            EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier.");

            EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true);

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true);
            }
            //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo);
        }
        /// <summary>
        /// Generates the final reports and read me files
        /// </summary>
        public void CloseExperiment(ILogBuilder logger, long startOfLog)
        {
            if (!testSummaries.Any())
            {
                logger.log("No experiment procedures performes");

                return;
            }
            DataTableTypeExtended <classificationReport> summaryTable = new DataTableTypeExtended <classificationReport>("Test results", "k-fold cross valudation results");

            classificationReport sumRow = new classificationReport(runName);

            sumRow.Comment = runName + ", " + description;


            //    classificationEvalMetricSet metric = new classificationEvalMetricSet("Total", truthTable.labels_without_unknown);

            foreach (classificationReport s in testSummaries)
            {
                summaryTable.AddRow(s);
                //metric = metric + s;

                if (sumRow.Classifier.isNullOrEmpty())
                {
                    sumRow.Classifier = s.Classifier;
                }

                sumRow.AddValues(s);
            }



            sumRow.DivideValues(testSummaries.Count);

            sumRow.SetReportDataFields(crossValidation, this);

            summaryTable.SetDescription(description);

            summaryTable.SetAdditionalInfoEntry("RunName", runName);
            summaryTable.SetAdditionalInfoEntry("Description", description);
            summaryTable.SetAdditionalInfoEntry("Averaging", averagingMethod.ToString());

            summaryTable.AddRow(sumRow);



            summaryTable.GetReportAndSave(notes.folder, signature);

            finalReport = sumRow;

            //sumRow.ReportToLog(logger);
            sumRow.ReportToLog(notes);

            objectSerialization.saveObjectToXML(sumRow, notes.folder.pathFor("results.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Experiment results", true));


            logger.log("Experiment completed");

            notes.SaveNote("note");

            String logPrintout = logger.GetContent(startOfLog);
            String p           = notes.folder.pathFor("log.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Log printout during experiment execution");

            File.WriteAllText(p, logPrintout);

            experimentRootFolder.generateReadmeFiles(signature);
        }