Exemple #1
0
        public static DataTableTypeExtended <classificationReportExpanded> MakeOverviewTable(this classificationReportCollection reportCollection, classificationReportDataComplexContext context, String name = "", String description = "")
        {
            DataTableTypeExtended <classificationReportExpanded> table = new DataTableTypeExtended <classificationReportExpanded>("overview_" + reportCollection.name, "Aggregate report for experiments ran [" + DateTime.Now.ToShortDateString() + "]");

            if (name != "")
            {
                table.SetAdditionalInfoEntry("Group name", name);
            }
            if (description != "")
            {
                table.SetAdditionalInfoEntry("Group desc", description);
            }



            table.SetDescription(reportCollection.description);
            table.SetAdditionalInfoEntry("Path", reportCollection.rootPath, "Path of the collection");

            table.SetAdditionalInfoEntry("Subcollections", reportCollection.Children.Count, "Number of sub collections");
            foreach (var rep in reportCollection)
            {
                table.AddRow(rep);
            }

            context.overview_tables.Add(table);

            return(table);
            //table.GetReportAndSave(folder, imbACE.Core.appManager.AppInfo);
        }
Exemple #2
0
        public void UnitTestMethod()
        {
            // Example 1: Loading BibTex file
            BibTexDataFile bib_1 = new BibTexDataFile("Resources\\test\\S0306457309000259.bib");



            // Example 2: Loading BibTex file
            String path = folderResources.findFile("S0306457309000259.bib", SearchOption.AllDirectories);

            // initializes bibtex data file object
            BibTexDataFile bib = new BibTexDataFile();

            // loads .bib or .bibtex file from path specified
            bib.Load(path, log);


            // converts loaded BibTex entries into DataTable, with all columns discovered in the entries
            DataTable dt = bib.ConvertToDataTable();

            // saves DataTable to Excel file, without adding Legend spreadsheet
            var finalPath = dt.serializeDataTable(Data.enums.reporting.dataTableExportEnum.excel, bib.name, folderResults, notation);

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var reportDataTable_ref = dt.GetReportAndSave(folderResults, notation);


            // Example 3: Short way

            // High-level method, creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var reportDataTable = BibTexTools.ExportToExcel(path, notation, log);


            // Example 4: Working with BibTexEntryModel

            // Creation of BibTex entry from code
            BibTexEntryModel entry = new BibTexEntryModel()
            {
                EntryKey  = "SOKOLOVA2009427",
                EntryType = "article",
                journal   = "Information Processing & Management",
                title     = "A systematic analysis of performance measures for classification tasks",
                keywords  = "Performance evaluation, Machine Learning, Text classification",
                year      = 2005,
                number    = 2,
                issn      = "0000-0000",
                @abstract = "Abs",
                doi       = "https://doi.org/10.1016/j.ipm.2009.03.002",
                url       = "http://www.sciencedirect.com/science/article/pii/S0306457309000259"
            };

            // Creation of data table collection
            DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time");

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation);
        }
        public DataTableExtended GetDataTable()
        {
            DataTableTypeExtended <webLemmaTerm> dte = new DataTableTypeExtended <webLemmaTerm>(name, description);

            foreach (var pair in index)
            {
                dte.AddRow(pair.Value);
            }
            return(dte);
        }
        public DataTableTypeExtended <FeatureCWPFrequencies> GetDataTable()
        {
            DataTableTypeExtended <FeatureCWPFrequencies> output = new DataTableTypeExtended <FeatureCWPFrequencies>(name);

            foreach (var p in this)
            {
                output.AddRow(p.Value);
            }

            return(output);
        }
Exemple #5
0
 /// <summary>
 /// Gets the table.
 /// </summary>
 /// <param name="folds">The folds.</param>
 /// <returns></returns>
 public DataTableTypeExtended <secondaryReportOnFVE> GetTable(Int32 folds)
 {
     if (!tablesByFolds.ContainsKey(folds))
     {
         DataTableTypeExtended <secondaryReportOnFVE> dataTableExtended = new DataTableTypeExtended <secondaryReportOnFVE>("SecondaryReport", "Experiments with k=[" + folds + "] cross-validation");
         tablesByFolds.Add(folds, dataTableExtended);
         top_inFolds_instance.Add(folds, new secondaryReportOnFVE());
         top_inFolds.Add(folds, "");
         top_F1inFolds.Add(folds, Double.MinValue);
     }
     return(tablesByFolds[folds]);
 }
Exemple #6
0
        public static void MakeReport(classificationReportCollection reportCollection, classificationReportCollectionSettings settings, aceAuthorNotation appInfo, ILogBuilder log, classificationReportStyleDefinition style, classificationReportDataComplexContext context)
        {
            DataTableTypeExtended <classificationReportExpanded> table = reportCollection.MakeOverviewTable(context, reportCollection.name, reportCollection.description);

            table.SetTitle(reportCollection.name);

            var statDataTable = table.GetReportAndSave(reportCollection.rootFolder, appInfo);

            log.log("Report [" + table.TableName + "] created at " + statDataTable.lastFilePath);

            //  context.cumulative_tables.Add(table);

            var layers = reportCollection.GetSpaceLayers(style);

            foreach (var pair in layers)
            {
                var reportSpace = classificationReportSpace.BuildReportSpace(pair.Value, reportCollection.datasetName, settings.SELECT_REPORT_NAME_PARTS, style, pair.Key);

                if (!context.report_spaces.ContainsKey(reportSpace.name))
                {
                    context.report_spaces.Add(reportSpace.name, new List <classificationReportSpace>());
                    context.comparative_tables.Add(reportSpace.name, new List <DataTable>());
                    context.comparative_narrow_tables.Add(reportSpace.name, new List <DataTable>());
                }
                context.report_spaces[reportSpace.name].Add(reportSpace);


                System.Data.DataTable comparative_table = reportSpace.ConstructTable("comparative_" + reportCollection.name + "_" + reportSpace.name, reportCollection.description);

                context.comparative_tables[reportSpace.name].Add(comparative_table);

                comparative_table.AddExtra("Group path: " + reportCollection.rootFolder.path);

                comparative_table.GetReportAndSave(reportCollection.rootFolder, appInfo);


                System.Data.DataTable comparative_table_small = reportSpace.ConstructTable("comparative_" + reportCollection.name + "_" + reportSpace.name + "_small", reportCollection.description, classificationReportTableMode.onlyBasic);

                context.comparative_narrow_tables[reportSpace.name].Add(comparative_table_small);


                var styleFS = style.CloneViaXML();
                styleFS.valueToUse = classificationReportStyleDefinition.GetFS(); //new reportExpandedDataPair(classificationReportStyleDefinition.VALUE_FS, "Selected Features", "Number of features actually selected");

                reportSpace = classificationReportSpace.BuildReportSpace(pair.Value, reportCollection.datasetName, settings.SELECT_REPORT_NAME_PARTS, styleFS, pair.Key);
                reportSpace.ConstructTable("featureSelected_" + reportCollection.name + "_" + reportSpace.name, reportCollection.description).GetReportAndSave(reportCollection.rootFolder, appInfo);
            }



            //    return comparative_table;
        }
Exemple #7
0
        /// <summary>
        /// Creates generic DataTable collection, adds 5 rows and generates Excel and CSV file
        /// </summary>
        public void ExampleOne_DataTableDataAnnotation()
        {
            // creating typed DataTable collection, holding DataEntryTest class
            DataTableTypeExtended<DataEntryTest> dataTableTypeExtended = new DataTableTypeExtended<DataEntryTest>(nameof(DataEntryTest), nameof(ExampleOne_DataTableDataAnnotation));

            // adding five rows
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());

            // Generating and exporting report into Excel file
            DataTableForStatistics report = dataTableTypeExtended.GetReportAndSave(folderResults);
        }
        /// <summary>
        /// Gets the data table.
        /// </summary>
        /// <param name="_name">The name.</param>
        /// <param name="_description">The description.</param>
        /// <returns></returns>
        public DataTable GetDataTable(String _name, String _description)
        {
            DataTableTypeExtended <HtmlTagCounterResult> output = new DataTableTypeExtended <HtmlTagCounterResult>(_name, _description);
            var results = GetResultList();

            results.ForEach(x => output.AddRow(x));

            output.SetAdditionalInfoEntry("Unknown dist. tags", DistinctUnknownTags.Count, "Number of distinct unknown tags");
            output.SetAdditionalInfoEntry("Distinct tags", DistinctTags.Count, "Number of known distinct tags");
            output.SetAdditionalInfoEntry("Unknown tags", UnknownTags.Count(), "Number of unknown distinct tags");

            output.AddExtra("Distinct tags found: [" + DistinctTags.toCsvInLine() + "]");
            output.AddExtra("Unknown tags found: [" + UnknownTags.toCsvInLine() + "]");

            return(output);
        }
Exemple #9
0
        /// <summary>
        /// Gets the data table, with all actions
        /// </summary>
        /// <returns></returns>
        public DataTable GetDataTable()
        {
            DataTableTypeExtended <cloudMatrixReductionReportEntry> output = new DataTableTypeExtended <cloudMatrixReductionReportEntry>(name, "Actions done by cloud matrix [" + name + "]");

            rangeFinderForDataTable ranger = new rangeFinderForDataTable(output, nameof(cloudMatrixReductionReportEntry.Hash));

            foreach (var entry in entries)
            {
                output.AddRow(entry);
            }

            output.SetAdditionalInfoEntry(nameof(TotalReduction).imbTitleCamelOperation(true), TotalReduction);
            output.SetAdditionalInfoEntry(nameof(CFFunctionReduction).imbTitleCamelOperation(true), CFFunctionReduction);
            output.SetAdditionalInfoEntry(nameof(LPFilterReduction).imbTitleCamelOperation(true), LPFilterReduction);
            output.SetAdditionalInfoEntry(nameof(LPFRemovals).imbTitleCamelOperation(true), LPFRemovals);
            output.SetAdditionalInfoEntry(nameof(MWRemovals).imbTitleCamelOperation(true), MWRemovals);

            return(output);
        }
        /// <summary>Creation of BibTex entry from code, and generation of Excel table</summary>

        /** <example><para>Creation of BibTex entry from code, and generation of Excel table</para>
         *  <code>
         *  // --- We create the entry and write its source code to the disk
         *
         *  // Creation of BibTex entry from code
         *  BibTexEntryModel entry = new BibTexEntryModel()
         *  {
         *      EntryKey = "SOKOLOVA2009427",
         *      EntryType = "article",
         *      journal = "Information Processing & Management",
         *      title = "A systematic analysis of performance measures for classification tasks",
         *      keywords = "Performance evaluation, Machine Learning, Text classification",
         *      year = 2005,
         *      number = 2,
         *      issn = "0000-0000",
         *      @abstract = "Abs",
         *      doi = "https://doi.org/10.1016/j.ipm.2009.03.002",
         *      url = "http://www.sciencedirect.com/science/article/pii/S0306457309000259",
         *      author = "Marina Sokolova and Guy Lapalme"
         *  };
         *
         *  // New instance of TextProcessor object, this one you would share with other parts of your code.
         *  BibTexSourceProcessor processor = new BibTexSourceProcessor();
         *
         *  // Generating BibTex code
         *  String code = entry.GetSource(processor.latex, log);
         *
         *  // Making path
         *  String path = folderResults.pathFor(nameof(Example4_UsingObjectModel) + ".txt");
         *
         *  File.WriteAllText(path, code);
         *
         *  // --- Now we export it to the Excel file
         *
         *  // Creation of data table collection
         *  DataTableTypeExtended<BibTexEntryModel> bibTable = new DataTableTypeExtended<BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time");
         *  bibTable.AddRow(entry);
         *
         *  // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
         *  var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation, nameof(Example4_UsingObjectModel));
         */
        public void Example4_UsingObjectModel()
        {
            // --- We create the entry and write its source code to the disk

            // Creation of BibTex entry from code
            BibTexEntryModel entry = new BibTexEntryModel()
            {
                EntryKey  = "SOKOLOVA2009427",
                EntryType = "article",
                journal   = "Information Processing & Management",
                title     = "A systematic analysis of performance measures for classification tasks",
                keywords  = "Performance evaluation, Machine Learning, Text classification",
                year      = 2005,
                number    = 2,
                issn      = "0000-0000",
                @abstract = "Abs",
                doi       = "https://doi.org/10.1016/j.ipm.2009.03.002",
                url       = "http://www.sciencedirect.com/science/article/pii/S0306457309000259",
                author    = "Marina Sokolova and Guy Lapalme"
            };

            // New instance of TextProcessor object, this one you would share with other parts of your code.
            BibTexSourceProcessor processor = new BibTexSourceProcessor();

            // Generating BibTex code
            String code = entry.GetSource(processor.latex, log);

            // Making path
            String path = folderResults.pathFor(nameof(Example4_UsingObjectModel) + ".txt");

            File.WriteAllText(path, code);

            // --- Now we export it to the Excel file

            // Creation of data table collection
            DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time");

            bibTable.AddRow(entry);

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation, nameof(Example4_UsingObjectModel));
        }
Exemple #11
0
        public DataTable GetDataTable(String name)
        {
            DataTableTypeExtended <ContentMetrics> output = new DataTableTypeExtended <ContentMetrics>(name);

            foreach (var c in ClassMetrics)
            {
                output.AddRow(c);
            }
            output.AddRow(sumMetrics);
            output.AddRow(avgMetrics);

            output.SetAdditionalInfoEntry("Terms", terms.Count, "Number of distinct terms in the dataset");
            output.SetAdditionalInfoEntry("MaxTF", terms.GetMaxFrequency(), "Max. frequency in the corpus");
            output.SetAdditionalInfoEntry("SumTF", terms.GetSumFrequency(), "Sum of frequencies in the corpus");

            output.SetAdditionalInfoEntry("Stems", DictinctStems.Count(), "Number of distinct stems");
            output.SetAdditionalInfoEntry("Tokens", DictinctTokens.Count(), "Number of distinct tokens");

            return(output);
        }
Exemple #12
0
        public IPlaneContext ExecutePlaneMethod(IPlaneContext inputContext, ExperimentModelExecutionContext generalContext, ILogBuilder logger)
        {
            //if (generalContext == null)
            //{
            //    generalContext = new PlanesMethodContext();
            //}
            IEntityPlaneContext entityInputContext = inputContext as IEntityPlaneContext;


            ICorpusPlaneContext entityContext = EntityMethod.ExecutePlaneMethod(inputContext, generalContext, logger) as ICorpusPlaneContext;

            IVectorPlaneContext corpusContext = CorpusMethod.ExecutePlaneMethod(entityContext, generalContext, logger) as IVectorPlaneContext;

            IFeaturePlaneContext vectorContext = VectorMethod.ExecutePlaneMethod(corpusContext, generalContext, logger) as IFeaturePlaneContext;

            IFeaturePlaneContext featureContext = FeatureMethod.ExecutePlaneMethod(vectorContext, generalContext, logger) as IFeaturePlaneContext;

            // --- the results reporting

            var evaluationMetrics = generalContext.truthTable.EvaluateTestResultsToMetricSet(featureContext.testResults, generalContext.runName + "-" + notes.folder.name, logger);

            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, generalContext.averagingMethod);

            averagedReport.Classifier = FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));

            generalContext.testSummaries.Add(averagedReport);

            averagedReport.ReportToLog(notes);

            featureContext.provider.Dispose();
            EntityMethod.CacheProvider.Dispose();

            return(generalContext);
        }
Exemple #13
0
        public void Publish(folderNode folder, Boolean exportXML = true, Boolean exportDescribe = true, Boolean exportDatatable = true)
        {
            if (exportXML)
            {
                String xml = objectSerialization.ObjectToXML(this);

                String x_path = folder.pathFor(name + "_report.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized dtructural report of a dataset");

                File.WriteAllText(x_path, xml);
            }


            if (exportDescribe)
            {
                builderForText builderForText = new builderForText();

                String t_path = folder.pathFor(name + "_report.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Summary of structural report of a dataset");

                Describe(builderForText);

                File.WriteAllText(t_path, builderForText.GetContent());
            }

            if (exportDatatable)
            {
                DataTableTypeExtended <DatasetStructureReport> dt_reports = new DataTableTypeExtended <DatasetStructureReport>(name, "Dataset structure stats");

                dt_reports.AddRow(this);
                foreach (var ch in Children)
                {
                    dt_reports.AddRow(ch);
                }

                dt_reports.GetReportAndSave(folder, null, name + "_report");
            }
        }
        /// <summary>
        /// Publishes the table blocks.
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="blockSize">Size of the block.</param>
        /// <param name="blockCount">The block count.</param>
        /// <returns></returns>
        public DataTableTypeExtended <FeatureCWPFrequencies> PublishTableBlocks(folderNode folder, Int32 blockSize = 500, Int32 blockCount = 10)
        {
            if (!this.Any())
            {
                return(null);
            }

            foreach (var pair in this)
            {
                pair.Value.Compute();
            }

            Int32 b = 0;

            for (int i = 0; i < blockCount; i++)
            {
                var p = this.First();

                String dt_n = name + "_" + i.ToString();

                DataTableTypeExtended <FeatureCWPFrequencies> cwpMetrics = new DataTableTypeExtended <FeatureCWPFrequencies>(dt_n, "Collected metrics");
                // DataTableTypeExtended<FeatureCWPFrequencies> cwpFrequencies = new DataTableTypeExtended<FeatureCWPFrequencies>(dt_n + "_freq", "frequency metrics");

                // p.Value.SetDataTable(datatable);
                //     DataColumn nm = datatable.Columns.Add("Name");

                Int32 c = 0;

                foreach (var pair in this)
                {
                    if (c > (i * blockSize) && c < (i + 1) * blockSize)
                    {
                        cwpMetrics.AddRow(pair.Value);

                        // cwpFrequencies.AddRow(term_finders.)
                    }

                    c++;
                }

                if (cwpMetrics.Rows.Count > 0)
                {
                    DataTableConverterASCII dataTableConverterASCII = new DataTableConverterASCII();

                    dataTableConverterASCII.ConvertToFile(cwpMetrics, folder, dt_n);

                    DataTableForStatistics report = cwpMetrics.GetReportAndSave(folder, null, dt_n);
                }
            }

            DataTableTypeExtended <FeatureCWPFrequencies> output = new DataTableTypeExtended <FeatureCWPFrequencies>();

            foreach (var p in this)
            {
                output.AddRow(p.Value);
            }

            output.Save(folder, null, "stats_frequencies");

            return(output);
        }
        public override ExperimentDataSetFoldContextPair <OperationContext> Execute(ILogBuilder logger, OperationContext executionContextMain = null, ExperimentModelExecutionContext executionContextExtra = null)
        {
            ExperimentDataSetFoldContextPair <OperationContext> output = new ExperimentDataSetFoldContextPair <OperationContext>(fold, executionContextMain);

            Open();


            if (!setup.documentSelectQuery.PrecompiledScoresFilename.Trim().isNullOrEmpty())
            {
                String precompFile = DocumentSelectResult.CheckAndMakeFilename(setup.documentSelectQuery.PrecompiledScoresFilename);

                var p = executionContextExtra.resourceProvider.GetResourceFile(precompFile, fold);

                //var p = executionContextExtra.resourceProvider.folder.findFile(precompFile, SearchOption.AllDirectories);

                DocumentSelectResult scores = DocumentSelectResult.LoadFromFile(p, logger);  // objectSerialization.loadObjectFromXML<DocumentSelectResult>(path, logger);

                if (scores != null)
                {
                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_loaded.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    scores = setup.documentSelectQuery.ExecuteLimit(scores, logger);

                    IEnumerable <string> assignedIDs = scores.items.Select(x => x.AssignedID);

                    scores.SaveReport(fold_notes.folder.pathFor("DSScores_applied.txt", imbSCI.Data.enums.getWritableFileMode.overwrite));

                    fold.DataSetSubSet(assignedIDs.ToList(), true, true);
                }
                else
                {
                    throw new ArgumentException("DSelection file failed: " + setup.documentSelectQuery.PrecompiledScoresFilename);

                    logger.log(" _ DocumentSelect failed for [" + name + "]");
                }
            }

            classificationReport tmpReport = new classificationReport();

            String dsReportName = fold.name + setup.documentSelectQuery.PrecompiledScoresFilename + setup.documentSelectQuery.SizeLimit;


            DatasetStructureReport dsReport = DatasetStructureReport.MakeStructureReport(fold, dsReportName);

            dsReport.Publish(fold_notes.folder, true, true);

            tmpReport.SetReportDataFields(dsReport);

            if (!output.context.IsDatasetDeployed)
            {
                output.context.DeployDataSet(fold, logger);

                entityOperation.TextRendering(output.context, notes, requirements.MayUseTextRender);


                corpusOperation.SpaceModelPopulation(output.context, notes);

                if (requirements.MayUseSpaceModelCategories)
                {
                    corpusOperation.SpaceModelCategories(output.context, notes);
                }
            }

            tmpReport.SetReportDataFields(output.context, false);

            corpusOperation.FeatureSelection(output.context, notes);


            corpusOperation.VectorSpaceConstruction(output.context, notes, requirements.MayUseVectorSpaceCategories);

            corpusOperation.FeatureVectorConstruction(output.context, notes);


            if (setup.reportOptions.HasFlag(OperationReportEnum.randomSampledDemo))
            {
                logger.log("-- generating random sample report");
                var data_wm = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.weightModel, output.context.SelectedFeatures, 5, "DemoForWeightModel", "Diagnostic report for picked sample");
                data_wm.GetReportAndSave(fold_notes.folder);
                var data_fs = imbNLP.Toolkit.Reporting.ReportGenerators.MakeWeightModelDemoTable(output.context.spaceModel, corpusOperation.filter.WeightModel, output.context.SelectedFeatures, 5, "DemoForFeatureSelection", "Diagnostic report for feature selection filter sample");
                data_fs.GetReportAndSave(fold_notes.folder);
            }

            classificationOperation.PerformClassification(output.context, executionContextExtra.truthTable, setup.dataSetMode, notes);


            corpusOperation.weightModel.DiagnosticDump(fold_notes.folder, logger);

            //classificationOperation.classifier.

            classificationEvalMetricSet evaluationMetrics = executionContextExtra.truthTable.EvaluateTestResultsToMetricSet(output.context.testResults, setup.OutputFilename + "-" + notes.folder.name, logger);

            if (setup.ExportEvaluationAsDocumentSelectionResult)
            {
                Toolkit.Feature.FeatureVectorDictionaryWithDimensions dict = executionContextExtra.truthTable.GetEvaluationAsFeatureVectorDictionary(output.context.testResults, setup.OutputFilename, logger, setup.ExportEvaluationCorrectScore, setup.ExportEvaluationIncorrectScore);
                String out_ds = setup.ExportEvaluationToFilename.Replace("*", "");
                dict.Save(fold_notes.folder, out_ds.or(setup.OutputFilename), logger);
                //executionContextExtra.resourceProvider.folder
                dict.Save(notes.folder, out_ds.or(setup.OutputFilename), logger);
            }


            DataTableTypeExtended <classificationEval> inclassEvalTable = new DataTableTypeExtended <classificationEval>("inclass_evaluation", "Test results, per class");

            evaluationMetrics.GetAllEntries().ForEach(x => inclassEvalTable.AddRow(x));
            inclassEvalTable.AddRow(evaluationMetrics.GetSummary("Sum"));
            notes.SaveDataTable(inclassEvalTable, notes.folder_classification);

            classificationReport averagedReport = new classificationReport(evaluationMetrics, setup.averagingMethod);

            averagedReport.Classifier = classificationOperation.classifier.GetSignature(); // featureMethod.classifierSettings.name; // FeatureMethod.classifier.name;
            averagedReport.saveObjectToXML(notes.folder_classification.pathFor(averagedReport.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized classification evaluation results summary"));
            averagedReport.ReportToLog(notes);

            averagedReport.SetReportDataFields(output.context, true);
            averagedReport.data.Merge(tmpReport.data);

            averagedReport.SetReportDataFields(classificationOperation.classifier, corpusOperation.filter, corpusOperation.weightModel);



            executionContextExtra.testSummaries.Add(averagedReport);



            OperationContextReport reportOperation = new OperationContextReport();

            reportOperation.DeploySettingsBase(notes);

            reportOperation.GenerateReports(output.context, setup.reportOptions, notes);

            /*
             * if (setup.reportOptions.HasFlag(OperationReportEnum.reportClassification))
             * {
             *
             *  Dictionary<string, List<FeatureVectorWithLabelID>> byCategory = executionContextExtra.truthTable.GroupByTrueCategory(executionContextMain.testResults);
             *
             *  objectTable<classificationReport> tbl = new objectTable<classificationReport>(nameof(classificationReport.Name), "inclass_" + executionContextExtra.runName);
             *  classificationReport macroAverage = new classificationReport("AVG-" + executionContextExtra.runName);
             *  foreach (KeyValuePair<string, List<FeatureVectorWithLabelID>> pair in byCategory)
             *  {
             *      var cReport = executionContextExtra.EvaluateTestResults(pair.Value, pair.Key + "-" + executionContextExtra.runName, logger);
             *
             *      cReport.Classifier = classificationOperation.classifier.GetSignature(); // classifier.name;
             *      cReport.Comment = "Tr/Ts [" + executionContextMain.trainingSet.Count + "]:[" + executionContextMain.testSet.Count + "]";
             *      String path = notes.folder_classification.pathFor(pair.Key + "_result.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized evaluation result within category [" + pair.Key + "]", true);
             *
             *      macroAverage.AddValues(cReport);
             *
             *      tbl.Add(cReport);
             *  }
             *  //  macroAverage.DivideValues(byCategory.Keys.Count);
             *
             *  tbl.Add(macroAverage);
             *
             *  notes.SaveDataTable(tbl.GetDataTable(), notes.folder_classification);
             *
             * }*/

            Close();

            return(output);
        }
        protected void runModel(experimentExecutionContext context, IWebFVExtractor model)
        {
            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, model.name);
            Int32 crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit;
            aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection> casesByClasses = new aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection>();
            DSCCReportSet kFoldReport = new DSCCReportSet(model);
            var           valCol      = context.validationCollections[model.name];

            List <DocumentSetCaseCollectionSet> modelCaseResults = new List <DocumentSetCaseCollectionSet>();

            crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit;
            while (crashRetries > 0)
            {
                try
                {
                    experimentNotes modelNotes = new experimentNotes(valCol.folder, "Fold-level experiment settings description notes");
                    modelNotes.AppendLine("# Notes on Feature Vector Extractor: " + model.name);

                    var nts = model.DescribeSelf();
                    nts.ForEach(x => modelNotes.AppendLine(x));



                    context.logger.log("Executing k-fold cases with model [" + model.name + "]");



                    valCol.DescribeSampleDistribution(modelNotes);

                    context.mainReport.valColVsModelVsSampleHash.Add("[" + model.name + "]".toWidthExact(20) + " [sample distribution hash: " + valCol.SampleDistributionHash + "]");

                    modelNotes.SaveNote();

                    ParallelOptions ops = new ParallelOptions();
                    ops.MaxDegreeOfParallelism = context.tools.operation.ParallelThreads;

                    Parallel.ForEach <kFoldValidationCase>(valCol.GetCases(), ops, valCase =>
                    {
                        model.DoFVEAndTraining(valCase, context.tools, context.logger); // <---------------------------------------------------------------------------------------   BUILDING FVE

                        DocumentSetCaseCollectionSet results = model.DoClassification(valCase, context.tools, context.logger);

                        if (!results.Any())
                        {
                            throw new aceScienceException("DoClassification for [" + model.name + "] returned no results!", null, model, "DoClassification " + model.name + " failed!", context);
                        }

                        foreach (var pair in results)
                        {
                            DocumentSetCaseCollection cls = pair.Value;
                            casesByClasses.Add(cls.setClass, cls);
                        }

                        valCase.evaluationResults = results;

                        if (context.tools.DoResultReporting)
                        {
                            context.logger.log("producing reports on k-Fold case [" + valCase.name + "]");
                            DSCCReports r = results.GetReports();

                            var sumMeans = r.GetAverageTable(context); //.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name);
                            sumMeans.SetDescription("FVE report, aggregated for all categories - for fold [" + valCase.name + "]");


                            sumMeans.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name, true, context.tools.operation.doReportsInParalell);

                            var fveAndCase = r.GetFullValidationTable(context);
                            fveAndCase.SetDescription("Per-category aggregate statistics, for each classifier, within fold [" + valCase.name + "], used for macro-averaging");
                            fveAndCase.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_extrainfo_" + valCase.name, true, context.tools.operation.doReportsInParalell);

                            var fullCaseReport = results.GetReportOnAllCases();


                            fullCaseReport.GetReportAndSave(valCase.folder, appManager.AppInfo, "FullReport_" + valCase.name, true, context.tools.operation.doReportsInParalell);

                            kFoldReport.Add(valCase, r);
                        }

                        context.logger.log("k-Fold case [" + valCase.name + "] completed");

                        context.notes.log("- - Experiment sequence for [" + valCase.name + "] fold completed");
                        if (context.tools.operation.doSaveKnowledgeForClasses)
                        {
                            valCase.knowledgeLibrary.SaveKnowledgeInstancesForClasses(valCase, context.logger);
                        }
                    });

                    foreach (var fold in valCol.GetCases()) //  Parallel.ForEach<kFoldValidationCase>(valCol.GetCases(), ops, valCase =>
                    {
                        modelCaseResults.Add(fold.evaluationResults);
                    }

                    crashRetries = 0;
                }
                catch (Exception ex)
                {
                    crashRetries--;
                    context.errorNotes.LogException("FVE Model crashed -- retries left [" + crashRetries + "] --- ", ex, model.name);
                    context.logger.log(":::: REPEATING the model [" + model.name + "] ::: CRASHED [" + ex.Message + "] ::: RETRIES [" + crashRetries + "]");
                    imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 1000, 1);
                    imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(2400, 1000, 1);
                    imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "RETRIES[" + crashRetries + "]");
                }
            }


            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "Reporting");


            valCol.knowledgeLibrary.SaveCaseKnowledgeInstances(context.logger);

            // DocumentSetCaseCollection second = null;
            if (modelCaseResults.Any())
            {
                featureExtractionMetrics modelMetrics = new featureExtractionMetrics(model.name, "All");
                DataTableTypeExtended <featureExtractionMetrics> modelVsCategoryMetrics = new DataTableTypeExtended <featureExtractionMetrics>(model.name, "Model metrics per category");


                // <-------------------------------------- CATEGORIES REPORT ----------------------------------------------

                DataTable allTable = modelCaseResults.First()[0].GetReportTable(false, false).GetClonedShema <DataTable>();; //valCol.GetCases().First().evaluationResults[0].GetReportTable(false, false);


                rangeFinderForDataTable ranger = new rangeFinderForDataTable(allTable, "name");
                ranger.columnsToSignIn.Add("Case");

                foreach (KeyValuePair <IDocumentSetClass, aceConcurrentBag <DocumentSetCaseCollection> > pair in casesByClasses)
                {
                    DocumentSetCaseCollection first = null;
                    DataTable repTable = null;

                    ranger.prepareForNextAggregationBlock(allTable, "name");

                    foreach (DocumentSetCaseCollection cn in pair.Value)
                    {
                        foreach (var cni in cn)
                        {
                            if (cni != null)
                            {
                                cn.BuildRow(cni, allTable, false);
                            }
                        }
                    }

                    ranger.AddRangeRows(pair.Key.name, allTable, true, imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);

                    var categoryMetrics = new featureExtractionMetrics(model.name, pair.Key.name);
                    categoryMetrics.SetValues(ranger);

                    modelVsCategoryMetrics.AddRow(categoryMetrics);
                    modelMetrics.AddValues(categoryMetrics);

                    categoryMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_" + categoryMetrics.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "FV and Category sample metrics, serialized object"));
                    //context.notes.log("- - Creating report for category [" + pair.Key.name + "] completed");
                    //repTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_category_" + pair.Key.name);
                }

                modelMetrics.DivideValues(casesByClasses.Count);
                modelMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_metrics.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Cross-categories macroaveraged metrics of the FVE model [" + model.name + "]"));

                modelVsCategoryMetrics.AddRow(modelMetrics);
                modelVsCategoryMetrics.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, "Name", modelMetrics.Name);
                modelVsCategoryMetrics.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_metrics", true, true);

                context.mainReport.AddModelMetrics(modelMetrics);


                context.notes.log("- Creating report for all categories [" + model.name + "] ");
                allTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_categories", true, context.tools.operation.doReportsInParalell);
            }



            kFoldReport.MakeReports(context, valCol.folder);
            context.mainReport.AddBestPerformer(kFoldReport.GetTopClassifierReport(), kFoldReport.meanClassifierReport, model);

            // <---------------- creation of complete report

            context.notes.log("- Experiment sequence with Feature Vector Extractor [" + model.name + "] completed");
            context.notes.SaveNote();

            // <------------- END OF THE MODEL -------------------------------------------------------------------------------------------------
        }
Exemple #17
0
        /// <summary>
        /// Prepares for parallel execution.
        /// </summary>
        /// <param name="tools">The tools.</param>
        /// <param name="_context">The context.</param>
        public webProjectKnowledgeSet PrepareForParallelExecution(classifierTools tools, experimentExecutionContext _context)
        {
            if (caseKnowledgeSet == null)
            {
                caseKnowledgeSet = new webProjectKnowledgeSet();
            }

            if (items.Any())
            {
                experimentContext.notes.log("Mining Context was ready already.");
                return(caseKnowledgeSet);
            }
            DateTime startTime = DateTime.Now;

            experimentContext = _context;



            List <webCaseKnowledge> cases = new List <webCaseKnowledge>();

            folderNode classReportFolder = experimentContext.folder.Add("General", "General and diagnostic reports", "The folder contains general (outside k-folds) reports on analysied industries (categories), web sites and other diagnostic data");

            // <----------------------------------------------------------------------------------------------------------------        [ performing pipeline ]
            experimentContext.notes.log("Executing the Mining Context decomposition with the pipeline model");
            foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
            {
                var pipelineContext = GetContextForPipeline(tools, classSet);
                sitesByCategory.Add(classSet, new List <pipelineTaskMCSiteSubject>());

                if (!pipelineContext.exitByType.ContainsKey(typeof(pipelineTaskMCSiteSubject)))
                {
                    throw new aceGeneralException("Pipeline context output contains no web site subjects! Check the pipeline Site Task constructor.", null, pipelineContext, "Pipeline broken");
                }

                var sitesForContext = pipelineContext.exitByType[typeof(pipelineTaskMCSiteSubject)]; // <----- preparing
                foreach (var site in sitesForContext)
                {
                    tokenBySite.Add(site as pipelineTaskMCSiteSubject, new ConcurrentBag <pipelineTaskSubjectContentToken>());
                    sitesByCategory[classSet].Add(site as pipelineTaskMCSiteSubject);

                    webCaseKnowledge webCase = new webCaseKnowledge(site as pipelineTaskMCSiteSubject, classSet);

                    caseKnowledgeSet.Add(webCase);
                    cases.Add(webCase);
                }

                semanticFVExtractorKnowledge kn = new semanticFVExtractorKnowledge();
                kn.name = classSet.name + "_general";
                kn.relatedItemPureName = classSet.name;
                kn.type = WebFVExtractorKnowledgeType.aboutCompleteCategory;
                kn.Deploy(classReportFolder, experimentContext.logger);
                knowledgeByClass.TryAdd(classSet, kn);
            }

            experimentContext.notes.log("Sorting tokens for all sites [in parallel]");
            Parallel.ForEach(tokenBySite.Keys, site =>
            {
                var leafs = site.getAllLeafs();
                foreach (var leaf in leafs)
                {
                    pipelineTaskSubjectContentToken token = leaf as pipelineTaskSubjectContentToken;
                    if (token != null)
                    {
                        tokenBySite[site].Add(token);
                    }
                }
            });

            foreach (var c in cases)
            {
                c.tokens = tokenBySite[c.MCSiteSubject];
            }


            experimentContext.notes.log("Building diagnostic TF-IDF master tables for all classes [in parallel]");


            Boolean useIntegratedApproach = false;



            if (useIntegratedApproach)
            {
                var valCase = experimentContext.validationCollections[experimentContext.masterExtractor.name].GetDiagnosticCase(experimentContext.classes);
                Parallel.ForEach(sitesByCategory, pair =>
                {
                    knowledgeByClass.TryAdd(pair.Key, experimentContext.masterExtractor.DoFVExtractionForClassViaCases(valCase.trainingCases[pair.Key.classID], pair.Key, valCase, experimentContext.tools, experimentContext.logger));
                });
            }
            else
            {
                Parallel.ForEach(sitesByCategory, pair =>
                {
                    IDocumentSetClass category             = pair.Key;
                    List <pipelineTaskMCSiteSubject> sites = pair.Value;

                    var lt = BuildLemmaTableForClass(tools, category, sites);
                    lt.Save();
                    // lt.SaveAs(classReportFolder.pathFor(lt.info.Name), imbSCI.Data.enums.getWritableFileMode.overwrite);
                });
            }

            experimentContext.notes.log("Saving lexic resource cache subset - for later reuse in case of repeated experiment run");
            tools.SaveCache();


            if (!useIntegratedApproach)
            {
                experimentContext.notes.log("Performing chunk construction for all web sites in all categories [in serial]");



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    BuildChunksForClass(tools, classSet);
                }



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false);
                }
            }

            if (tools.operation.doCreateDiagnosticMatrixAtStart)
            {
                experimentContext.notes.log("Performing diagnostic analysis on all categories...[doCreateDiagnosticMatrixAtStart=true]");



                folderNode matrixReport = classReportFolder.Add("clouds", "More reports on semantic cloud", "Directory contains exported DirectedGraphs, varous matrix derivates, combined cloud and other diagnostic things");

                List <lemmaSemanticCloud> clouds         = new List <lemmaSemanticCloud>();
                List <lemmaSemanticCloud> filteredClouds = new List <lemmaSemanticCloud>();

                var converter = lemmaSemanticCloud.GetDGMLConverter();

                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    // experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false);


                    var cloud = experimentContext.masterExtractor.CloudConstructor.process(knowledgeByClass[classSet].WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList(), tools.GetLemmaResource());
                    knowledgeByClass[classSet].semanticCloud.className = classSet.name;
                    clouds.Add(cloud);

                    if (experimentContext.tools.operation.doUseSimpleGraphs)
                    {
                        cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]"));
                    }
                    else
                    {
                        converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]"));
                    }



                    knowledgeByClass[classSet].semanticCloudFiltered           = knowledgeByClass[classSet].semanticCloud.CloneIntoType <lemmaSemanticCloud>(true);
                    knowledgeByClass[classSet].semanticCloudFiltered.className = classSet.name;
                    filteredClouds.Add(knowledgeByClass[classSet].semanticCloudFiltered);
                }

                cloudMatrix matrix = new cloudMatrix("CloudMatrix", "Diagnostic cloud matrix created from the complete sample set of [" + clouds.Count() + "] classes");
                matrix.build(filteredClouds, experimentContext.logger);

                lemmaSemanticCloud mergedCloudInitial = matrix.GetUnifiedCloud();
                mergedCloudInitial.Save(matrixReport.pathFor("unified_initial_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories"));


                var reductions = matrix.TransformClouds(experimentContext.masterExtractor.settings.semanticCloudFilter, experimentContext.logger);

                var p = matrixReport.pathFor("reductions_nodes.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Report on Cloud Matrix transformation process");
                File.WriteAllLines(p, reductions);



                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.maxCloudFrequency | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_max_cf_initial", true, experimentContext.tools.operation.doReportsInParalell);

                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapSize | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_size_initial", true, experimentContext.tools.operation.doReportsInParalell);

                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapValue | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_value_initial", true, experimentContext.tools.operation.doReportsInParalell);


                matrix.ExportTextReports(matrixReport, true, "matrix_cf");
                matrix.ExportTextReports(matrixReport, false, "matrix_cf");

                lemmaSemanticCloud mergedCloudAfterReduction = matrix.GetUnifiedCloud();
                mergedCloudAfterReduction.Save(matrixReport.pathFor("unified_reduced_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object -Version of all-categories diagnostic Semantic Cloud, after Cloud Matrix filter was applied"));

                if (experimentContext.tools.operation.doUseSimpleGraphs)
                {
                    mergedCloudInitial.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                }
                else
                {
                    converter = lemmaSemanticCloud.GetDGMLConverter();

                    converter.ConvertToDMGL(mergedCloudInitial).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                }


                // <-------- analysis -----------------------------------------------------------------------------------
                DataTableTypeExtended <freeGraphReport> cloudReports = new DataTableTypeExtended <freeGraphReport>();
                foreach (var cl in filteredClouds)
                {
                    freeGraphReport fgReport = new freeGraphReport(cl);
                    fgReport.Save(matrixReport);
                    cloudReports.AddRow(fgReport);
                }
                freeGraphReport unifiedReport = new freeGraphReport(mergedCloudAfterReduction);
                unifiedReport.Save(matrixReport);
                cloudReports.AddRow(unifiedReport);


                cloudReports.GetReportAndSave(matrixReport, appManager.AppInfo, "analysis_SemanticClouds");
                // <-------- analysis -----------------------------------------------------------------------------------



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    var cloud = knowledgeByClass[classSet].semanticCloudFiltered; // .WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList());


                    if (experimentContext.tools.operation.doUseSimpleGraphs)
                    {
                        cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                    }
                    else
                    {
                        converter = lemmaSemanticCloud.GetDGMLConverter();

                        converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                    }



                    //converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_reduced_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "DirectedGraphML file - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories (Open this with VS)"), imbSCI.Data.enums.getWritableFileMode.overwrite);
                }

                instanceCountCollection <String> tfcounter = new instanceCountCollection <string>();
                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    var wlt = knowledgeByClass[classSet].WLTableOfIndustryClass.GetDataTable();
                    wlt.DefaultView.Sort = "termFrequency desc";
                    var sorted = wlt.DefaultView.ToTable();
                    var tbl    = wlt.GetClonedShema <DataTable>(true);

                    tbl.CopyRowsFrom(sorted, 0, 100);
                    tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_WebLemma", true, experimentContext.tools.operation.doReportsInParalell);

                    var cht = knowledgeByClass[classSet].WLChunkTableOfIndustryClass.GetDataTable();
                    cht.DefaultView.Sort = "termFrequency desc";
                    var csorted = cht.DefaultView.ToTable();

                    tbl = cht.GetClonedShema <DataTable>(true);
                    tbl.CopyRowsFrom(csorted, 0, 100);
                    tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_Chunks", true, experimentContext.tools.operation.doReportsInParalell);

                    tfcounter.AddInstanceRange(knowledgeByClass[classSet].WLTableOfIndustryClass.unresolved);


                    knowledgeByClass[classSet].OnBeforeSave();
                }

                List <String> countSorted = tfcounter.getSorted();
                StringBuilder sb          = new StringBuilder();
                foreach (String s in countSorted)
                {
                    sb.AppendLine(String.Format("{1}  :  {0}", s, tfcounter[s]));
                }
                String pt = classReportFolder.pathFor("unresolved_tokens.txt", imbSCI.Data.enums.getWritableFileMode.none, "Cloud Frequency list of all unresolved letter-only tokens");
                File.WriteAllText(pt, sb.ToString());
            }


            if (tools.operation.doFullDiagnosticReport)
            {
                experimentContext.notes.log("Generating full diagnostic report on classes...");
                DataTable rep = null;
                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    rep = this.GetClassKnowledgeReport(classSet, rep);
                }
                rep.SetAdditionalInfoEntry("Experiment", experimentContext.setup.name);

                rep.AddExtra("Experiment: " + experimentContext.setup.name);

                rep.AddExtra("Info: " + experimentContext.setup.description);

                rep.SetDescription("Structural report for all classes in the experiment");
                rep.GetReportAndSave(classReportFolder, appManager.AppInfo, "structural_class_report", true, experimentContext.tools.operation.doReportsInParalell);
            }

            classReportFolder.generateReadmeFiles(appManager.AppInfo);


            experimentContext.notes.log("Mining Context preprocessing done in [" + DateTime.Now.Subtract(startTime).TotalMinutes.ToString("F2") + "] minutes");
            return(caseKnowledgeSet);
        }
Exemple #18
0
        public void MakeReports(experimentExecutionContext context, folderNode folder)
        {
            meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name);

            aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>();

            DSCCReports firstCase = null;
            List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>();

            foreach (var kFoldCasePair in this)
            {
                if (firstCase == null)
                {
                    firstCase = kFoldCasePair.Value;
                }
                foreach (var pair in kFoldCasePair.Value.avgReports)
                {
                    tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value;
                    if (!classifiers.Contains(pair.Key))
                    {
                        classifiers.Add(pair.Key);
                    }
                }
            }



            // DataSet dataSet = new DataSet(context.setup.name);



            // <---------- CREATING AVERAGE TABLE -----------------------------------------------------
            var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method");
            var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method");

            List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>();
            DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports");


            foreach (IWebPostClassifier classifier in classifiers)
            {
                // < ---- report on each classifier

                context.logger.log("-- producing report about [" + classifier.name + "]");
                //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum");



                DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. ");

                DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg.");

                rep_eval.Classifier = classifier.name;

                classificationEvalMetricSet metrics = new classificationEvalMetricSet();
                classificationEval          eval    = new classificationEval();
                //eval = metrics[classifier.name];

                Int32 c = 0;
                foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this)
                {
                    DocumentSetCaseCollectionReport rep   = kFoldCasePair.Value.avgReports[classifier];
                    kFoldValidationCase             vCase = kFoldCasePair.Key;


                    classificationEvalMetricSet met = rep.GetSetMetrics();

                    if (met != null)
                    {
                        foreach (IDocumentSetClass cl in context.classes.GetClasses())
                        {
                            eval = eval + met[cl.name];
                        }
                    }

                    rep.Name = classifier.name + "_" + vCase.name;
                    avg.AddValues(rep);
                    EMperKFolds.AddRow(rep);

                    c++;
                }

                rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging);



                avg.Classifier = classifier.name;
                avg.DivideValues(c);

                // <<< detecting the best performed classifier in all evaluation folds
                if (avg.F1measure > highestF1Value)
                {
                    highestF1Value      = avg.F1measure;
                    topClassifierReport = avg;
                }

                meanClassifierReport.AddValues(avg);


                // -----------------

                EMperKFolds.AddRow(avg);

                tpAvgMacro.AddRow(avg);

                macroaverages.Add(avg);

                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.AddRow(rep_eval);
                }
                // tp.Add(rep_eval);

                if (context.tools.operation.DoMakeReportForEachClassifier)
                {
                    DataTable cTable = EMperKFolds;
                    cTable.SetTitle($"{classifier.name} report");
                    cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]");


                    cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                    cTable.SetAdditionalInfoEntry("Classifier", classifier.name);
                    cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name);

                    cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct);
                    cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong);

                    //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision);
                    //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall);
                    //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure);

                    cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives);
                    cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives);
                    cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives);


                    cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]");
                    var info = classifier.DescribeSelf();
                    info.ForEach(x => cTable.AddExtra(x));

                    cTable.AddExtra("-----------------------------------------------------------------------");

                    cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema");
                    //  output.CopyRowsFrom(cTable);


                    cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name);

                    // dataSet.AddTable(cTable);
                }
            }



            rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name");



            meanClassifierReport.DivideValues(classifiers.Count);
            if (macroaverages.Count > 0)
            {
                Double maxF1 = macroaverages.Max(x => x.F1measure);
                Double minF1 = macroaverages.Min(x => x.F1measure);

                List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList();
                List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList();


                var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames);

                EMperKFolds.GetRowMetaSet().AddUnit(style);


                //  style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames);



                tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                }


                List <String> averageNames = macroaverages.Select(x => x.Name).ToList();
                var           avg_style    = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames);
                foreach (var x in averageNames)
                {
                    avg_style.AddMatch(x);
                }
            }

            // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: //

            tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report");
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.SetTitle($"{extractor.name} - microaverage report");
            }

            tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]");
            tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means.");

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]");
            }
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means.");
            }

            context.AddExperimentInfo(tpAvgMacro);
            if (DOMAKE_MICROaverage)
            {
                context.AddExperimentInfo(tpAvgMicro);
            }

            tpAvgMacro.AddExtra(extractor.description);


            if (extractor is semanticFVExtractor)
            {
                semanticFVExtractor semExtractor = (semanticFVExtractor)extractor;

                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
            }

            context.logger.log("-- producing summary reports on [" + extractor.name + "]");

            rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true,
                                     imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.avg
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);
            tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true);


            EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier.");

            EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true);

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true);
            }
            //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo);
        }
        public void Save(ILogBuilder log, Boolean SubLevelCall = false)
        {
            //if (folder == null) folder = notes.folder_corpus;

            if (folder == null)
            {
                return;
            }
            if (purpose != FeatureCWPAnalysisSettings.AnalysisPurpose.application)
            {
                SaveFeatures(folder.Add("Features", "Features", "Features"), featuresByClass);

                EntryDictionary.Save(folder, log, name);

                if (!SubLevelCall)
                {
                    var flds = fields();

                    DataSet dataSet = new DataSet("rep_" + name);

                    // List<DataTableForStatistics> rdt_list = new List<DataTableForStatistics>();
                    foreach (String n in flds)
                    {
                        DataTable dt = EntryDictionary.MakeTable(n + "_" + name, EntryDictionary.description,
                                                                 flds
                                                                 , 500, flds.IndexOf(n));

                        dt.AddStringLine("Report for " + name);

                        dataSet.Tables.Add(dt);
                        //DataTableForStatistics rdt = dt.GetReportTableVersion();
                        //rdt_list.Add(rdt);
                    }

                    DataSetForStatistics report = dataSet.GetReportAndSave(folder, null, "cwp_" + name);

                    var keys = Keys.ToList();
                    keys.Sort();

                    DataTableTypeExtended <FeatureCWPAnalysisSiteMetrics> metrics = new DataTableTypeExtended <FeatureCWPAnalysisSiteMetrics>();
                    Int32 c = 0;
                    foreach (var key in keys)
                    {
                        metrics.AddRow(this[key]);
                        c++;
                        if (c > 2000)
                        {
                            break;
                        }
                    }

                    metrics.GetReportAndSave(folder, null, "cwp_" + name + "metrics");

                    //foreach (var pair in entryReport)
                    //{
                    //    folderNode fn = folder.Add(pair.Key, pair.Key, "Sub entry report");
                    //    pair.Value.Save(log, true);
                    //}
                }
            }
        }
        /// <summary>
        /// Generates the final reports and read me files
        /// </summary>
        public void CloseExperiment(ILogBuilder logger, long startOfLog)
        {
            if (!testSummaries.Any())
            {
                logger.log("No experiment procedures performes");

                return;
            }
            DataTableTypeExtended <classificationReport> summaryTable = new DataTableTypeExtended <classificationReport>("Test results", "k-fold cross valudation results");

            classificationReport sumRow = new classificationReport(runName);

            sumRow.Comment = runName + ", " + description;


            //    classificationEvalMetricSet metric = new classificationEvalMetricSet("Total", truthTable.labels_without_unknown);

            foreach (classificationReport s in testSummaries)
            {
                summaryTable.AddRow(s);
                //metric = metric + s;

                if (sumRow.Classifier.isNullOrEmpty())
                {
                    sumRow.Classifier = s.Classifier;
                }

                sumRow.AddValues(s);
            }



            sumRow.DivideValues(testSummaries.Count);

            sumRow.SetReportDataFields(crossValidation, this);

            summaryTable.SetDescription(description);

            summaryTable.SetAdditionalInfoEntry("RunName", runName);
            summaryTable.SetAdditionalInfoEntry("Description", description);
            summaryTable.SetAdditionalInfoEntry("Averaging", averagingMethod.ToString());

            summaryTable.AddRow(sumRow);



            summaryTable.GetReportAndSave(notes.folder, signature);

            finalReport = sumRow;

            //sumRow.ReportToLog(logger);
            sumRow.ReportToLog(notes);

            objectSerialization.saveObjectToXML(sumRow, notes.folder.pathFor("results.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Experiment results", true));


            logger.log("Experiment completed");

            notes.SaveNote("note");

            String logPrintout = logger.GetContent(startOfLog);
            String p           = notes.folder.pathFor("log.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Log printout during experiment execution");

            File.WriteAllText(p, logPrintout);

            experimentRootFolder.generateReadmeFiles(signature);
        }