Пример #1
0
        public void UnitTestMethod()
        {
            // Example 1: Loading BibTex file
            BibTexDataFile bib_1 = new BibTexDataFile("Resources\\test\\S0306457309000259.bib");



            // Example 2: Loading BibTex file
            String path = folderResources.findFile("S0306457309000259.bib", SearchOption.AllDirectories);

            // initializes bibtex data file object
            BibTexDataFile bib = new BibTexDataFile();

            // loads .bib or .bibtex file from path specified
            bib.Load(path, log);


            // converts loaded BibTex entries into DataTable, with all columns discovered in the entries
            DataTable dt = bib.ConvertToDataTable();

            // saves DataTable to Excel file, without adding Legend spreadsheet
            var finalPath = dt.serializeDataTable(Data.enums.reporting.dataTableExportEnum.excel, bib.name, folderResults, notation);

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var reportDataTable_ref = dt.GetReportAndSave(folderResults, notation);


            // Example 3: Short way

            // High-level method, creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var reportDataTable = BibTexTools.ExportToExcel(path, notation, log);


            // Example 4: Working with BibTexEntryModel

            // Creation of BibTex entry from code
            BibTexEntryModel entry = new BibTexEntryModel()
            {
                EntryKey  = "SOKOLOVA2009427",
                EntryType = "article",
                journal   = "Information Processing & Management",
                title     = "A systematic analysis of performance measures for classification tasks",
                keywords  = "Performance evaluation, Machine Learning, Text classification",
                year      = 2005,
                number    = 2,
                issn      = "0000-0000",
                @abstract = "Abs",
                doi       = "https://doi.org/10.1016/j.ipm.2009.03.002",
                url       = "http://www.sciencedirect.com/science/article/pii/S0306457309000259"
            };

            // Creation of data table collection
            DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time");

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation);
        }
Пример #2
0
        public static void MakeReport(classificationReportCollection reportCollection, classificationReportCollectionSettings settings, aceAuthorNotation appInfo, ILogBuilder log, classificationReportStyleDefinition style, classificationReportDataComplexContext context)
        {
            DataTableTypeExtended <classificationReportExpanded> table = reportCollection.MakeOverviewTable(context, reportCollection.name, reportCollection.description);

            table.SetTitle(reportCollection.name);

            var statDataTable = table.GetReportAndSave(reportCollection.rootFolder, appInfo);

            log.log("Report [" + table.TableName + "] created at " + statDataTable.lastFilePath);

            //  context.cumulative_tables.Add(table);

            var layers = reportCollection.GetSpaceLayers(style);

            foreach (var pair in layers)
            {
                var reportSpace = classificationReportSpace.BuildReportSpace(pair.Value, reportCollection.datasetName, settings.SELECT_REPORT_NAME_PARTS, style, pair.Key);

                if (!context.report_spaces.ContainsKey(reportSpace.name))
                {
                    context.report_spaces.Add(reportSpace.name, new List <classificationReportSpace>());
                    context.comparative_tables.Add(reportSpace.name, new List <DataTable>());
                    context.comparative_narrow_tables.Add(reportSpace.name, new List <DataTable>());
                }
                context.report_spaces[reportSpace.name].Add(reportSpace);


                System.Data.DataTable comparative_table = reportSpace.ConstructTable("comparative_" + reportCollection.name + "_" + reportSpace.name, reportCollection.description);

                context.comparative_tables[reportSpace.name].Add(comparative_table);

                comparative_table.AddExtra("Group path: " + reportCollection.rootFolder.path);

                comparative_table.GetReportAndSave(reportCollection.rootFolder, appInfo);


                System.Data.DataTable comparative_table_small = reportSpace.ConstructTable("comparative_" + reportCollection.name + "_" + reportSpace.name + "_small", reportCollection.description, classificationReportTableMode.onlyBasic);

                context.comparative_narrow_tables[reportSpace.name].Add(comparative_table_small);


                var styleFS = style.CloneViaXML();
                styleFS.valueToUse = classificationReportStyleDefinition.GetFS(); //new reportExpandedDataPair(classificationReportStyleDefinition.VALUE_FS, "Selected Features", "Number of features actually selected");

                reportSpace = classificationReportSpace.BuildReportSpace(pair.Value, reportCollection.datasetName, settings.SELECT_REPORT_NAME_PARTS, styleFS, pair.Key);
                reportSpace.ConstructTable("featureSelected_" + reportCollection.name + "_" + reportSpace.name, reportCollection.description).GetReportAndSave(reportCollection.rootFolder, appInfo);
            }



            //    return comparative_table;
        }
Пример #3
0
        /// <summary>
        /// Creates generic DataTable collection, adds 5 rows and generates Excel and CSV file
        /// </summary>
        public void ExampleOne_DataTableDataAnnotation()
        {
            // creating typed DataTable collection, holding DataEntryTest class
            DataTableTypeExtended<DataEntryTest> dataTableTypeExtended = new DataTableTypeExtended<DataEntryTest>(nameof(DataEntryTest), nameof(ExampleOne_DataTableDataAnnotation));

            // adding five rows
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());
            dataTableTypeExtended.AddRow(new DataEntryTest());

            // Generating and exporting report into Excel file
            DataTableForStatistics report = dataTableTypeExtended.GetReportAndSave(folderResults);
        }
Пример #4
0
        /// <summary>Creation of BibTex entry from code, and generation of Excel table</summary>

        /** <example><para>Creation of BibTex entry from code, and generation of Excel table</para>
         *  <code>
         *  // --- We create the entry and write its source code to the disk
         *
         *  // Creation of BibTex entry from code
         *  BibTexEntryModel entry = new BibTexEntryModel()
         *  {
         *      EntryKey = "SOKOLOVA2009427",
         *      EntryType = "article",
         *      journal = "Information Processing & Management",
         *      title = "A systematic analysis of performance measures for classification tasks",
         *      keywords = "Performance evaluation, Machine Learning, Text classification",
         *      year = 2005,
         *      number = 2,
         *      issn = "0000-0000",
         *      @abstract = "Abs",
         *      doi = "https://doi.org/10.1016/j.ipm.2009.03.002",
         *      url = "http://www.sciencedirect.com/science/article/pii/S0306457309000259",
         *      author = "Marina Sokolova and Guy Lapalme"
         *  };
         *
         *  // New instance of TextProcessor object, this one you would share with other parts of your code.
         *  BibTexSourceProcessor processor = new BibTexSourceProcessor();
         *
         *  // Generating BibTex code
         *  String code = entry.GetSource(processor.latex, log);
         *
         *  // Making path
         *  String path = folderResults.pathFor(nameof(Example4_UsingObjectModel) + ".txt");
         *
         *  File.WriteAllText(path, code);
         *
         *  // --- Now we export it to the Excel file
         *
         *  // Creation of data table collection
         *  DataTableTypeExtended<BibTexEntryModel> bibTable = new DataTableTypeExtended<BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time");
         *  bibTable.AddRow(entry);
         *
         *  // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
         *  var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation, nameof(Example4_UsingObjectModel));
         */
        public void Example4_UsingObjectModel()
        {
            // --- We create the entry and write its source code to the disk

            // Creation of BibTex entry from code
            BibTexEntryModel entry = new BibTexEntryModel()
            {
                EntryKey  = "SOKOLOVA2009427",
                EntryType = "article",
                journal   = "Information Processing & Management",
                title     = "A systematic analysis of performance measures for classification tasks",
                keywords  = "Performance evaluation, Machine Learning, Text classification",
                year      = 2005,
                number    = 2,
                issn      = "0000-0000",
                @abstract = "Abs",
                doi       = "https://doi.org/10.1016/j.ipm.2009.03.002",
                url       = "http://www.sciencedirect.com/science/article/pii/S0306457309000259",
                author    = "Marina Sokolova and Guy Lapalme"
            };

            // New instance of TextProcessor object, this one you would share with other parts of your code.
            BibTexSourceProcessor processor = new BibTexSourceProcessor();

            // Generating BibTex code
            String code = entry.GetSource(processor.latex, log);

            // Making path
            String path = folderResults.pathFor(nameof(Example4_UsingObjectModel) + ".txt");

            File.WriteAllText(path, code);

            // --- Now we export it to the Excel file

            // Creation of data table collection
            DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time");

            bibTable.AddRow(entry);

            // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information
            var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation, nameof(Example4_UsingObjectModel));
        }
Пример #5
0
        public void Publish(folderNode folder, Boolean exportXML = true, Boolean exportDescribe = true, Boolean exportDatatable = true)
        {
            if (exportXML)
            {
                String xml = objectSerialization.ObjectToXML(this);

                String x_path = folder.pathFor(name + "_report.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized dtructural report of a dataset");

                File.WriteAllText(x_path, xml);
            }


            if (exportDescribe)
            {
                builderForText builderForText = new builderForText();

                String t_path = folder.pathFor(name + "_report.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Summary of structural report of a dataset");

                Describe(builderForText);

                File.WriteAllText(t_path, builderForText.GetContent());
            }

            if (exportDatatable)
            {
                DataTableTypeExtended <DatasetStructureReport> dt_reports = new DataTableTypeExtended <DatasetStructureReport>(name, "Dataset structure stats");

                dt_reports.AddRow(this);
                foreach (var ch in Children)
                {
                    dt_reports.AddRow(ch);
                }

                dt_reports.GetReportAndSave(folder, null, name + "_report");
            }
        }
        /// <summary>
        /// Publishes the table blocks.
        /// </summary>
        /// <param name="folder">The folder.</param>
        /// <param name="blockSize">Size of the block.</param>
        /// <param name="blockCount">The block count.</param>
        /// <returns></returns>
        public DataTableTypeExtended <FeatureCWPFrequencies> PublishTableBlocks(folderNode folder, Int32 blockSize = 500, Int32 blockCount = 10)
        {
            if (!this.Any())
            {
                return(null);
            }

            foreach (var pair in this)
            {
                pair.Value.Compute();
            }

            Int32 b = 0;

            for (int i = 0; i < blockCount; i++)
            {
                var p = this.First();

                String dt_n = name + "_" + i.ToString();

                DataTableTypeExtended <FeatureCWPFrequencies> cwpMetrics = new DataTableTypeExtended <FeatureCWPFrequencies>(dt_n, "Collected metrics");
                // DataTableTypeExtended<FeatureCWPFrequencies> cwpFrequencies = new DataTableTypeExtended<FeatureCWPFrequencies>(dt_n + "_freq", "frequency metrics");

                // p.Value.SetDataTable(datatable);
                //     DataColumn nm = datatable.Columns.Add("Name");

                Int32 c = 0;

                foreach (var pair in this)
                {
                    if (c > (i * blockSize) && c < (i + 1) * blockSize)
                    {
                        cwpMetrics.AddRow(pair.Value);

                        // cwpFrequencies.AddRow(term_finders.)
                    }

                    c++;
                }

                if (cwpMetrics.Rows.Count > 0)
                {
                    DataTableConverterASCII dataTableConverterASCII = new DataTableConverterASCII();

                    dataTableConverterASCII.ConvertToFile(cwpMetrics, folder, dt_n);

                    DataTableForStatistics report = cwpMetrics.GetReportAndSave(folder, null, dt_n);
                }
            }

            DataTableTypeExtended <FeatureCWPFrequencies> output = new DataTableTypeExtended <FeatureCWPFrequencies>();

            foreach (var p in this)
            {
                output.AddRow(p.Value);
            }

            output.Save(folder, null, "stats_frequencies");

            return(output);
        }
Пример #7
0
        protected void runModel(experimentExecutionContext context, IWebFVExtractor model)
        {
            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, model.name);
            Int32 crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit;
            aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection> casesByClasses = new aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection>();
            DSCCReportSet kFoldReport = new DSCCReportSet(model);
            var           valCol      = context.validationCollections[model.name];

            List <DocumentSetCaseCollectionSet> modelCaseResults = new List <DocumentSetCaseCollectionSet>();

            crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit;
            while (crashRetries > 0)
            {
                try
                {
                    experimentNotes modelNotes = new experimentNotes(valCol.folder, "Fold-level experiment settings description notes");
                    modelNotes.AppendLine("# Notes on Feature Vector Extractor: " + model.name);

                    var nts = model.DescribeSelf();
                    nts.ForEach(x => modelNotes.AppendLine(x));



                    context.logger.log("Executing k-fold cases with model [" + model.name + "]");



                    valCol.DescribeSampleDistribution(modelNotes);

                    context.mainReport.valColVsModelVsSampleHash.Add("[" + model.name + "]".toWidthExact(20) + " [sample distribution hash: " + valCol.SampleDistributionHash + "]");

                    modelNotes.SaveNote();

                    ParallelOptions ops = new ParallelOptions();
                    ops.MaxDegreeOfParallelism = context.tools.operation.ParallelThreads;

                    Parallel.ForEach <kFoldValidationCase>(valCol.GetCases(), ops, valCase =>
                    {
                        model.DoFVEAndTraining(valCase, context.tools, context.logger); // <---------------------------------------------------------------------------------------   BUILDING FVE

                        DocumentSetCaseCollectionSet results = model.DoClassification(valCase, context.tools, context.logger);

                        if (!results.Any())
                        {
                            throw new aceScienceException("DoClassification for [" + model.name + "] returned no results!", null, model, "DoClassification " + model.name + " failed!", context);
                        }

                        foreach (var pair in results)
                        {
                            DocumentSetCaseCollection cls = pair.Value;
                            casesByClasses.Add(cls.setClass, cls);
                        }

                        valCase.evaluationResults = results;

                        if (context.tools.DoResultReporting)
                        {
                            context.logger.log("producing reports on k-Fold case [" + valCase.name + "]");
                            DSCCReports r = results.GetReports();

                            var sumMeans = r.GetAverageTable(context); //.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name);
                            sumMeans.SetDescription("FVE report, aggregated for all categories - for fold [" + valCase.name + "]");


                            sumMeans.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name, true, context.tools.operation.doReportsInParalell);

                            var fveAndCase = r.GetFullValidationTable(context);
                            fveAndCase.SetDescription("Per-category aggregate statistics, for each classifier, within fold [" + valCase.name + "], used for macro-averaging");
                            fveAndCase.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_extrainfo_" + valCase.name, true, context.tools.operation.doReportsInParalell);

                            var fullCaseReport = results.GetReportOnAllCases();


                            fullCaseReport.GetReportAndSave(valCase.folder, appManager.AppInfo, "FullReport_" + valCase.name, true, context.tools.operation.doReportsInParalell);

                            kFoldReport.Add(valCase, r);
                        }

                        context.logger.log("k-Fold case [" + valCase.name + "] completed");

                        context.notes.log("- - Experiment sequence for [" + valCase.name + "] fold completed");
                        if (context.tools.operation.doSaveKnowledgeForClasses)
                        {
                            valCase.knowledgeLibrary.SaveKnowledgeInstancesForClasses(valCase, context.logger);
                        }
                    });

                    foreach (var fold in valCol.GetCases()) //  Parallel.ForEach<kFoldValidationCase>(valCol.GetCases(), ops, valCase =>
                    {
                        modelCaseResults.Add(fold.evaluationResults);
                    }

                    crashRetries = 0;
                }
                catch (Exception ex)
                {
                    crashRetries--;
                    context.errorNotes.LogException("FVE Model crashed -- retries left [" + crashRetries + "] --- ", ex, model.name);
                    context.logger.log(":::: REPEATING the model [" + model.name + "] ::: CRASHED [" + ex.Message + "] ::: RETRIES [" + crashRetries + "]");
                    imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 1000, 1);
                    imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(2400, 1000, 1);
                    imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "RETRIES[" + crashRetries + "]");
                }
            }


            imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "Reporting");


            valCol.knowledgeLibrary.SaveCaseKnowledgeInstances(context.logger);

            // DocumentSetCaseCollection second = null;
            if (modelCaseResults.Any())
            {
                featureExtractionMetrics modelMetrics = new featureExtractionMetrics(model.name, "All");
                DataTableTypeExtended <featureExtractionMetrics> modelVsCategoryMetrics = new DataTableTypeExtended <featureExtractionMetrics>(model.name, "Model metrics per category");


                // <-------------------------------------- CATEGORIES REPORT ----------------------------------------------

                DataTable allTable = modelCaseResults.First()[0].GetReportTable(false, false).GetClonedShema <DataTable>();; //valCol.GetCases().First().evaluationResults[0].GetReportTable(false, false);


                rangeFinderForDataTable ranger = new rangeFinderForDataTable(allTable, "name");
                ranger.columnsToSignIn.Add("Case");

                foreach (KeyValuePair <IDocumentSetClass, aceConcurrentBag <DocumentSetCaseCollection> > pair in casesByClasses)
                {
                    DocumentSetCaseCollection first = null;
                    DataTable repTable = null;

                    ranger.prepareForNextAggregationBlock(allTable, "name");

                    foreach (DocumentSetCaseCollection cn in pair.Value)
                    {
                        foreach (var cni in cn)
                        {
                            if (cni != null)
                            {
                                cn.BuildRow(cni, allTable, false);
                            }
                        }
                    }

                    ranger.AddRangeRows(pair.Key.name, allTable, true, imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);

                    var categoryMetrics = new featureExtractionMetrics(model.name, pair.Key.name);
                    categoryMetrics.SetValues(ranger);

                    modelVsCategoryMetrics.AddRow(categoryMetrics);
                    modelMetrics.AddValues(categoryMetrics);

                    categoryMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_" + categoryMetrics.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "FV and Category sample metrics, serialized object"));
                    //context.notes.log("- - Creating report for category [" + pair.Key.name + "] completed");
                    //repTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_category_" + pair.Key.name);
                }

                modelMetrics.DivideValues(casesByClasses.Count);
                modelMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_metrics.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Cross-categories macroaveraged metrics of the FVE model [" + model.name + "]"));

                modelVsCategoryMetrics.AddRow(modelMetrics);
                modelVsCategoryMetrics.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, "Name", modelMetrics.Name);
                modelVsCategoryMetrics.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_metrics", true, true);

                context.mainReport.AddModelMetrics(modelMetrics);


                context.notes.log("- Creating report for all categories [" + model.name + "] ");
                allTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_categories", true, context.tools.operation.doReportsInParalell);
            }



            kFoldReport.MakeReports(context, valCol.folder);
            context.mainReport.AddBestPerformer(kFoldReport.GetTopClassifierReport(), kFoldReport.meanClassifierReport, model);

            // <---------------- creation of complete report

            context.notes.log("- Experiment sequence with Feature Vector Extractor [" + model.name + "] completed");
            context.notes.SaveNote();

            // <------------- END OF THE MODEL -------------------------------------------------------------------------------------------------
        }
Пример #8
0
        /// <summary>
        /// Prepares for parallel execution.
        /// </summary>
        /// <param name="tools">The tools.</param>
        /// <param name="_context">The context.</param>
        public webProjectKnowledgeSet PrepareForParallelExecution(classifierTools tools, experimentExecutionContext _context)
        {
            if (caseKnowledgeSet == null)
            {
                caseKnowledgeSet = new webProjectKnowledgeSet();
            }

            if (items.Any())
            {
                experimentContext.notes.log("Mining Context was ready already.");
                return(caseKnowledgeSet);
            }
            DateTime startTime = DateTime.Now;

            experimentContext = _context;



            List <webCaseKnowledge> cases = new List <webCaseKnowledge>();

            folderNode classReportFolder = experimentContext.folder.Add("General", "General and diagnostic reports", "The folder contains general (outside k-folds) reports on analysied industries (categories), web sites and other diagnostic data");

            // <----------------------------------------------------------------------------------------------------------------        [ performing pipeline ]
            experimentContext.notes.log("Executing the Mining Context decomposition with the pipeline model");
            foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
            {
                var pipelineContext = GetContextForPipeline(tools, classSet);
                sitesByCategory.Add(classSet, new List <pipelineTaskMCSiteSubject>());

                if (!pipelineContext.exitByType.ContainsKey(typeof(pipelineTaskMCSiteSubject)))
                {
                    throw new aceGeneralException("Pipeline context output contains no web site subjects! Check the pipeline Site Task constructor.", null, pipelineContext, "Pipeline broken");
                }

                var sitesForContext = pipelineContext.exitByType[typeof(pipelineTaskMCSiteSubject)]; // <----- preparing
                foreach (var site in sitesForContext)
                {
                    tokenBySite.Add(site as pipelineTaskMCSiteSubject, new ConcurrentBag <pipelineTaskSubjectContentToken>());
                    sitesByCategory[classSet].Add(site as pipelineTaskMCSiteSubject);

                    webCaseKnowledge webCase = new webCaseKnowledge(site as pipelineTaskMCSiteSubject, classSet);

                    caseKnowledgeSet.Add(webCase);
                    cases.Add(webCase);
                }

                semanticFVExtractorKnowledge kn = new semanticFVExtractorKnowledge();
                kn.name = classSet.name + "_general";
                kn.relatedItemPureName = classSet.name;
                kn.type = WebFVExtractorKnowledgeType.aboutCompleteCategory;
                kn.Deploy(classReportFolder, experimentContext.logger);
                knowledgeByClass.TryAdd(classSet, kn);
            }

            experimentContext.notes.log("Sorting tokens for all sites [in parallel]");
            Parallel.ForEach(tokenBySite.Keys, site =>
            {
                var leafs = site.getAllLeafs();
                foreach (var leaf in leafs)
                {
                    pipelineTaskSubjectContentToken token = leaf as pipelineTaskSubjectContentToken;
                    if (token != null)
                    {
                        tokenBySite[site].Add(token);
                    }
                }
            });

            foreach (var c in cases)
            {
                c.tokens = tokenBySite[c.MCSiteSubject];
            }


            experimentContext.notes.log("Building diagnostic TF-IDF master tables for all classes [in parallel]");


            Boolean useIntegratedApproach = false;



            if (useIntegratedApproach)
            {
                var valCase = experimentContext.validationCollections[experimentContext.masterExtractor.name].GetDiagnosticCase(experimentContext.classes);
                Parallel.ForEach(sitesByCategory, pair =>
                {
                    knowledgeByClass.TryAdd(pair.Key, experimentContext.masterExtractor.DoFVExtractionForClassViaCases(valCase.trainingCases[pair.Key.classID], pair.Key, valCase, experimentContext.tools, experimentContext.logger));
                });
            }
            else
            {
                Parallel.ForEach(sitesByCategory, pair =>
                {
                    IDocumentSetClass category             = pair.Key;
                    List <pipelineTaskMCSiteSubject> sites = pair.Value;

                    var lt = BuildLemmaTableForClass(tools, category, sites);
                    lt.Save();
                    // lt.SaveAs(classReportFolder.pathFor(lt.info.Name), imbSCI.Data.enums.getWritableFileMode.overwrite);
                });
            }

            experimentContext.notes.log("Saving lexic resource cache subset - for later reuse in case of repeated experiment run");
            tools.SaveCache();


            if (!useIntegratedApproach)
            {
                experimentContext.notes.log("Performing chunk construction for all web sites in all categories [in serial]");



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    BuildChunksForClass(tools, classSet);
                }



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false);
                }
            }

            if (tools.operation.doCreateDiagnosticMatrixAtStart)
            {
                experimentContext.notes.log("Performing diagnostic analysis on all categories...[doCreateDiagnosticMatrixAtStart=true]");



                folderNode matrixReport = classReportFolder.Add("clouds", "More reports on semantic cloud", "Directory contains exported DirectedGraphs, varous matrix derivates, combined cloud and other diagnostic things");

                List <lemmaSemanticCloud> clouds         = new List <lemmaSemanticCloud>();
                List <lemmaSemanticCloud> filteredClouds = new List <lemmaSemanticCloud>();

                var converter = lemmaSemanticCloud.GetDGMLConverter();

                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    // experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false);


                    var cloud = experimentContext.masterExtractor.CloudConstructor.process(knowledgeByClass[classSet].WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList(), tools.GetLemmaResource());
                    knowledgeByClass[classSet].semanticCloud.className = classSet.name;
                    clouds.Add(cloud);

                    if (experimentContext.tools.operation.doUseSimpleGraphs)
                    {
                        cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]"));
                    }
                    else
                    {
                        converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]"));
                    }



                    knowledgeByClass[classSet].semanticCloudFiltered           = knowledgeByClass[classSet].semanticCloud.CloneIntoType <lemmaSemanticCloud>(true);
                    knowledgeByClass[classSet].semanticCloudFiltered.className = classSet.name;
                    filteredClouds.Add(knowledgeByClass[classSet].semanticCloudFiltered);
                }

                cloudMatrix matrix = new cloudMatrix("CloudMatrix", "Diagnostic cloud matrix created from the complete sample set of [" + clouds.Count() + "] classes");
                matrix.build(filteredClouds, experimentContext.logger);

                lemmaSemanticCloud mergedCloudInitial = matrix.GetUnifiedCloud();
                mergedCloudInitial.Save(matrixReport.pathFor("unified_initial_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories"));


                var reductions = matrix.TransformClouds(experimentContext.masterExtractor.settings.semanticCloudFilter, experimentContext.logger);

                var p = matrixReport.pathFor("reductions_nodes.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Report on Cloud Matrix transformation process");
                File.WriteAllLines(p, reductions);



                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.maxCloudFrequency | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_max_cf_initial", true, experimentContext.tools.operation.doReportsInParalell);

                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapSize | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_size_initial", true, experimentContext.tools.operation.doReportsInParalell);

                matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapValue | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_value_initial", true, experimentContext.tools.operation.doReportsInParalell);


                matrix.ExportTextReports(matrixReport, true, "matrix_cf");
                matrix.ExportTextReports(matrixReport, false, "matrix_cf");

                lemmaSemanticCloud mergedCloudAfterReduction = matrix.GetUnifiedCloud();
                mergedCloudAfterReduction.Save(matrixReport.pathFor("unified_reduced_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object -Version of all-categories diagnostic Semantic Cloud, after Cloud Matrix filter was applied"));

                if (experimentContext.tools.operation.doUseSimpleGraphs)
                {
                    mergedCloudInitial.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                }
                else
                {
                    converter = lemmaSemanticCloud.GetDGMLConverter();

                    converter.ConvertToDMGL(mergedCloudInitial).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                }


                // <-------- analysis -----------------------------------------------------------------------------------
                DataTableTypeExtended <freeGraphReport> cloudReports = new DataTableTypeExtended <freeGraphReport>();
                foreach (var cl in filteredClouds)
                {
                    freeGraphReport fgReport = new freeGraphReport(cl);
                    fgReport.Save(matrixReport);
                    cloudReports.AddRow(fgReport);
                }
                freeGraphReport unifiedReport = new freeGraphReport(mergedCloudAfterReduction);
                unifiedReport.Save(matrixReport);
                cloudReports.AddRow(unifiedReport);


                cloudReports.GetReportAndSave(matrixReport, appManager.AppInfo, "analysis_SemanticClouds");
                // <-------- analysis -----------------------------------------------------------------------------------



                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    var cloud = knowledgeByClass[classSet].semanticCloudFiltered; // .WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList());


                    if (experimentContext.tools.operation.doUseSimpleGraphs)
                    {
                        cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                    }
                    else
                    {
                        converter = lemmaSemanticCloud.GetDGMLConverter();

                        converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)"));
                    }



                    //converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_reduced_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "DirectedGraphML file - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories (Open this with VS)"), imbSCI.Data.enums.getWritableFileMode.overwrite);
                }

                instanceCountCollection <String> tfcounter = new instanceCountCollection <string>();
                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    var wlt = knowledgeByClass[classSet].WLTableOfIndustryClass.GetDataTable();
                    wlt.DefaultView.Sort = "termFrequency desc";
                    var sorted = wlt.DefaultView.ToTable();
                    var tbl    = wlt.GetClonedShema <DataTable>(true);

                    tbl.CopyRowsFrom(sorted, 0, 100);
                    tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_WebLemma", true, experimentContext.tools.operation.doReportsInParalell);

                    var cht = knowledgeByClass[classSet].WLChunkTableOfIndustryClass.GetDataTable();
                    cht.DefaultView.Sort = "termFrequency desc";
                    var csorted = cht.DefaultView.ToTable();

                    tbl = cht.GetClonedShema <DataTable>(true);
                    tbl.CopyRowsFrom(csorted, 0, 100);
                    tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_Chunks", true, experimentContext.tools.operation.doReportsInParalell);

                    tfcounter.AddInstanceRange(knowledgeByClass[classSet].WLTableOfIndustryClass.unresolved);


                    knowledgeByClass[classSet].OnBeforeSave();
                }

                List <String> countSorted = tfcounter.getSorted();
                StringBuilder sb          = new StringBuilder();
                foreach (String s in countSorted)
                {
                    sb.AppendLine(String.Format("{1}  :  {0}", s, tfcounter[s]));
                }
                String pt = classReportFolder.pathFor("unresolved_tokens.txt", imbSCI.Data.enums.getWritableFileMode.none, "Cloud Frequency list of all unresolved letter-only tokens");
                File.WriteAllText(pt, sb.ToString());
            }


            if (tools.operation.doFullDiagnosticReport)
            {
                experimentContext.notes.log("Generating full diagnostic report on classes...");
                DataTable rep = null;
                foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses())
                {
                    rep = this.GetClassKnowledgeReport(classSet, rep);
                }
                rep.SetAdditionalInfoEntry("Experiment", experimentContext.setup.name);

                rep.AddExtra("Experiment: " + experimentContext.setup.name);

                rep.AddExtra("Info: " + experimentContext.setup.description);

                rep.SetDescription("Structural report for all classes in the experiment");
                rep.GetReportAndSave(classReportFolder, appManager.AppInfo, "structural_class_report", true, experimentContext.tools.operation.doReportsInParalell);
            }

            classReportFolder.generateReadmeFiles(appManager.AppInfo);


            experimentContext.notes.log("Mining Context preprocessing done in [" + DateTime.Now.Subtract(startTime).TotalMinutes.ToString("F2") + "] minutes");
            return(caseKnowledgeSet);
        }
Пример #9
0
        public void MakeReports(experimentExecutionContext context, folderNode folder)
        {
            meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name);

            aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>();

            DSCCReports firstCase = null;
            List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>();

            foreach (var kFoldCasePair in this)
            {
                if (firstCase == null)
                {
                    firstCase = kFoldCasePair.Value;
                }
                foreach (var pair in kFoldCasePair.Value.avgReports)
                {
                    tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value;
                    if (!classifiers.Contains(pair.Key))
                    {
                        classifiers.Add(pair.Key);
                    }
                }
            }



            // DataSet dataSet = new DataSet(context.setup.name);



            // <---------- CREATING AVERAGE TABLE -----------------------------------------------------
            var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method");
            var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method");

            List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>();
            DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports");


            foreach (IWebPostClassifier classifier in classifiers)
            {
                // < ---- report on each classifier

                context.logger.log("-- producing report about [" + classifier.name + "]");
                //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum");



                DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. ");

                DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg.");

                rep_eval.Classifier = classifier.name;

                classificationEvalMetricSet metrics = new classificationEvalMetricSet();
                classificationEval          eval    = new classificationEval();
                //eval = metrics[classifier.name];

                Int32 c = 0;
                foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this)
                {
                    DocumentSetCaseCollectionReport rep   = kFoldCasePair.Value.avgReports[classifier];
                    kFoldValidationCase             vCase = kFoldCasePair.Key;


                    classificationEvalMetricSet met = rep.GetSetMetrics();

                    if (met != null)
                    {
                        foreach (IDocumentSetClass cl in context.classes.GetClasses())
                        {
                            eval = eval + met[cl.name];
                        }
                    }

                    rep.Name = classifier.name + "_" + vCase.name;
                    avg.AddValues(rep);
                    EMperKFolds.AddRow(rep);

                    c++;
                }

                rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging);



                avg.Classifier = classifier.name;
                avg.DivideValues(c);

                // <<< detecting the best performed classifier in all evaluation folds
                if (avg.F1measure > highestF1Value)
                {
                    highestF1Value      = avg.F1measure;
                    topClassifierReport = avg;
                }

                meanClassifierReport.AddValues(avg);


                // -----------------

                EMperKFolds.AddRow(avg);

                tpAvgMacro.AddRow(avg);

                macroaverages.Add(avg);

                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.AddRow(rep_eval);
                }
                // tp.Add(rep_eval);

                if (context.tools.operation.DoMakeReportForEachClassifier)
                {
                    DataTable cTable = EMperKFolds;
                    cTable.SetTitle($"{classifier.name} report");
                    cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]");


                    cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                    cTable.SetAdditionalInfoEntry("Classifier", classifier.name);
                    cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name);

                    cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct);
                    cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong);

                    //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision);
                    //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall);
                    //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure);

                    cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives);
                    cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives);
                    cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives);


                    cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]");
                    var info = classifier.DescribeSelf();
                    info.ForEach(x => cTable.AddExtra(x));

                    cTable.AddExtra("-----------------------------------------------------------------------");

                    cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema");
                    //  output.CopyRowsFrom(cTable);


                    cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name);

                    // dataSet.AddTable(cTable);
                }
            }



            rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name");



            meanClassifierReport.DivideValues(classifiers.Count);
            if (macroaverages.Count > 0)
            {
                Double maxF1 = macroaverages.Max(x => x.F1measure);
                Double minF1 = macroaverages.Min(x => x.F1measure);

                List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList();
                List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList();


                var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames);

                EMperKFolds.GetRowMetaSet().AddUnit(style);


                //  style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames);



                tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                if (DOMAKE_MICROaverage)
                {
                    tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name);
                }


                List <String> averageNames = macroaverages.Select(x => x.Name).ToList();
                var           avg_style    = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames);
                foreach (var x in averageNames)
                {
                    avg_style.AddMatch(x);
                }
            }

            // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: //

            tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report");
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.SetTitle($"{extractor.name} - microaverage report");
            }

            tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]");
            tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means.");

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]");
            }
            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means.");
            }

            context.AddExperimentInfo(tpAvgMacro);
            if (DOMAKE_MICROaverage)
            {
                context.AddExperimentInfo(tpAvgMicro);
            }

            tpAvgMacro.AddExtra(extractor.description);


            if (extractor is semanticFVExtractor)
            {
                semanticFVExtractor semExtractor = (semanticFVExtractor)extractor;

                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x));
                semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
                semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x));
            }

            context.logger.log("-- producing summary reports on [" + extractor.name + "]");

            rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true,
                                     imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.avg
                                     | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev);
            tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true);


            EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier.");

            EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true);

            if (DOMAKE_MICROaverage)
            {
                tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true);
            }
            //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo);
        }
        /// <summary>
        /// Generates the final reports and read me files
        /// </summary>
        public void CloseExperiment(ILogBuilder logger, long startOfLog)
        {
            if (!testSummaries.Any())
            {
                logger.log("No experiment procedures performes");

                return;
            }
            DataTableTypeExtended <classificationReport> summaryTable = new DataTableTypeExtended <classificationReport>("Test results", "k-fold cross valudation results");

            classificationReport sumRow = new classificationReport(runName);

            sumRow.Comment = runName + ", " + description;


            //    classificationEvalMetricSet metric = new classificationEvalMetricSet("Total", truthTable.labels_without_unknown);

            foreach (classificationReport s in testSummaries)
            {
                summaryTable.AddRow(s);
                //metric = metric + s;

                if (sumRow.Classifier.isNullOrEmpty())
                {
                    sumRow.Classifier = s.Classifier;
                }

                sumRow.AddValues(s);
            }



            sumRow.DivideValues(testSummaries.Count);

            sumRow.SetReportDataFields(crossValidation, this);

            summaryTable.SetDescription(description);

            summaryTable.SetAdditionalInfoEntry("RunName", runName);
            summaryTable.SetAdditionalInfoEntry("Description", description);
            summaryTable.SetAdditionalInfoEntry("Averaging", averagingMethod.ToString());

            summaryTable.AddRow(sumRow);



            summaryTable.GetReportAndSave(notes.folder, signature);

            finalReport = sumRow;

            //sumRow.ReportToLog(logger);
            sumRow.ReportToLog(notes);

            objectSerialization.saveObjectToXML(sumRow, notes.folder.pathFor("results.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Experiment results", true));


            logger.log("Experiment completed");

            notes.SaveNote("note");

            String logPrintout = logger.GetContent(startOfLog);
            String p           = notes.folder.pathFor("log.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Log printout during experiment execution");

            File.WriteAllText(p, logPrintout);

            experimentRootFolder.generateReadmeFiles(signature);
        }
        public void Save(ILogBuilder log, Boolean SubLevelCall = false)
        {
            //if (folder == null) folder = notes.folder_corpus;

            if (folder == null)
            {
                return;
            }
            if (purpose != FeatureCWPAnalysisSettings.AnalysisPurpose.application)
            {
                SaveFeatures(folder.Add("Features", "Features", "Features"), featuresByClass);

                EntryDictionary.Save(folder, log, name);

                if (!SubLevelCall)
                {
                    var flds = fields();

                    DataSet dataSet = new DataSet("rep_" + name);

                    // List<DataTableForStatistics> rdt_list = new List<DataTableForStatistics>();
                    foreach (String n in flds)
                    {
                        DataTable dt = EntryDictionary.MakeTable(n + "_" + name, EntryDictionary.description,
                                                                 flds
                                                                 , 500, flds.IndexOf(n));

                        dt.AddStringLine("Report for " + name);

                        dataSet.Tables.Add(dt);
                        //DataTableForStatistics rdt = dt.GetReportTableVersion();
                        //rdt_list.Add(rdt);
                    }

                    DataSetForStatistics report = dataSet.GetReportAndSave(folder, null, "cwp_" + name);

                    var keys = Keys.ToList();
                    keys.Sort();

                    DataTableTypeExtended <FeatureCWPAnalysisSiteMetrics> metrics = new DataTableTypeExtended <FeatureCWPAnalysisSiteMetrics>();
                    Int32 c = 0;
                    foreach (var key in keys)
                    {
                        metrics.AddRow(this[key]);
                        c++;
                        if (c > 2000)
                        {
                            break;
                        }
                    }

                    metrics.GetReportAndSave(folder, null, "cwp_" + name + "metrics");

                    //foreach (var pair in entryReport)
                    //{
                    //    folderNode fn = folder.Add(pair.Key, pair.Key, "Sub entry report");
                    //    pair.Value.Save(log, true);
                    //}
                }
            }
        }