public void UnitTestMethod() { // Example 1: Loading BibTex file BibTexDataFile bib_1 = new BibTexDataFile("Resources\\test\\S0306457309000259.bib"); // Example 2: Loading BibTex file String path = folderResources.findFile("S0306457309000259.bib", SearchOption.AllDirectories); // initializes bibtex data file object BibTexDataFile bib = new BibTexDataFile(); // loads .bib or .bibtex file from path specified bib.Load(path, log); // converts loaded BibTex entries into DataTable, with all columns discovered in the entries DataTable dt = bib.ConvertToDataTable(); // saves DataTable to Excel file, without adding Legend spreadsheet var finalPath = dt.serializeDataTable(Data.enums.reporting.dataTableExportEnum.excel, bib.name, folderResults, notation); // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information var reportDataTable_ref = dt.GetReportAndSave(folderResults, notation); // Example 3: Short way // High-level method, creates extended version of Excel file, with additional spreadsheet for Legend and other meta information var reportDataTable = BibTexTools.ExportToExcel(path, notation, log); // Example 4: Working with BibTexEntryModel // Creation of BibTex entry from code BibTexEntryModel entry = new BibTexEntryModel() { EntryKey = "SOKOLOVA2009427", EntryType = "article", journal = "Information Processing & Management", title = "A systematic analysis of performance measures for classification tasks", keywords = "Performance evaluation, Machine Learning, Text classification", year = 2005, number = 2, issn = "0000-0000", @abstract = "Abs", doi = "https://doi.org/10.1016/j.ipm.2009.03.002", url = "http://www.sciencedirect.com/science/article/pii/S0306457309000259" }; // Creation of data table collection DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time"); // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation); }
public static void MakeReport(classificationReportCollection reportCollection, classificationReportCollectionSettings settings, aceAuthorNotation appInfo, ILogBuilder log, classificationReportStyleDefinition style, classificationReportDataComplexContext context) { DataTableTypeExtended <classificationReportExpanded> table = reportCollection.MakeOverviewTable(context, reportCollection.name, reportCollection.description); table.SetTitle(reportCollection.name); var statDataTable = table.GetReportAndSave(reportCollection.rootFolder, appInfo); log.log("Report [" + table.TableName + "] created at " + statDataTable.lastFilePath); // context.cumulative_tables.Add(table); var layers = reportCollection.GetSpaceLayers(style); foreach (var pair in layers) { var reportSpace = classificationReportSpace.BuildReportSpace(pair.Value, reportCollection.datasetName, settings.SELECT_REPORT_NAME_PARTS, style, pair.Key); if (!context.report_spaces.ContainsKey(reportSpace.name)) { context.report_spaces.Add(reportSpace.name, new List <classificationReportSpace>()); context.comparative_tables.Add(reportSpace.name, new List <DataTable>()); context.comparative_narrow_tables.Add(reportSpace.name, new List <DataTable>()); } context.report_spaces[reportSpace.name].Add(reportSpace); System.Data.DataTable comparative_table = reportSpace.ConstructTable("comparative_" + reportCollection.name + "_" + reportSpace.name, reportCollection.description); context.comparative_tables[reportSpace.name].Add(comparative_table); comparative_table.AddExtra("Group path: " + reportCollection.rootFolder.path); comparative_table.GetReportAndSave(reportCollection.rootFolder, appInfo); System.Data.DataTable comparative_table_small = reportSpace.ConstructTable("comparative_" + reportCollection.name + "_" + reportSpace.name + "_small", reportCollection.description, classificationReportTableMode.onlyBasic); context.comparative_narrow_tables[reportSpace.name].Add(comparative_table_small); var styleFS = style.CloneViaXML(); styleFS.valueToUse = classificationReportStyleDefinition.GetFS(); //new reportExpandedDataPair(classificationReportStyleDefinition.VALUE_FS, "Selected Features", "Number of features actually selected"); reportSpace = classificationReportSpace.BuildReportSpace(pair.Value, reportCollection.datasetName, settings.SELECT_REPORT_NAME_PARTS, styleFS, pair.Key); reportSpace.ConstructTable("featureSelected_" + reportCollection.name + "_" + reportSpace.name, reportCollection.description).GetReportAndSave(reportCollection.rootFolder, appInfo); } // return comparative_table; }
/// <summary> /// Creates generic DataTable collection, adds 5 rows and generates Excel and CSV file /// </summary> public void ExampleOne_DataTableDataAnnotation() { // creating typed DataTable collection, holding DataEntryTest class DataTableTypeExtended<DataEntryTest> dataTableTypeExtended = new DataTableTypeExtended<DataEntryTest>(nameof(DataEntryTest), nameof(ExampleOne_DataTableDataAnnotation)); // adding five rows dataTableTypeExtended.AddRow(new DataEntryTest()); dataTableTypeExtended.AddRow(new DataEntryTest()); dataTableTypeExtended.AddRow(new DataEntryTest()); dataTableTypeExtended.AddRow(new DataEntryTest()); dataTableTypeExtended.AddRow(new DataEntryTest()); // Generating and exporting report into Excel file DataTableForStatistics report = dataTableTypeExtended.GetReportAndSave(folderResults); }
/// <summary>Creation of BibTex entry from code, and generation of Excel table</summary> /** <example><para>Creation of BibTex entry from code, and generation of Excel table</para> * <code> * // --- We create the entry and write its source code to the disk * * // Creation of BibTex entry from code * BibTexEntryModel entry = new BibTexEntryModel() * { * EntryKey = "SOKOLOVA2009427", * EntryType = "article", * journal = "Information Processing & Management", * title = "A systematic analysis of performance measures for classification tasks", * keywords = "Performance evaluation, Machine Learning, Text classification", * year = 2005, * number = 2, * issn = "0000-0000", * @abstract = "Abs", * doi = "https://doi.org/10.1016/j.ipm.2009.03.002", * url = "http://www.sciencedirect.com/science/article/pii/S0306457309000259", * author = "Marina Sokolova and Guy Lapalme" * }; * * // New instance of TextProcessor object, this one you would share with other parts of your code. * BibTexSourceProcessor processor = new BibTexSourceProcessor(); * * // Generating BibTex code * String code = entry.GetSource(processor.latex, log); * * // Making path * String path = folderResults.pathFor(nameof(Example4_UsingObjectModel) + ".txt"); * * File.WriteAllText(path, code); * * // --- Now we export it to the Excel file * * // Creation of data table collection * DataTableTypeExtended<BibTexEntryModel> bibTable = new DataTableTypeExtended<BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time"); * bibTable.AddRow(entry); * * // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information * var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation, nameof(Example4_UsingObjectModel)); */ public void Example4_UsingObjectModel() { // --- We create the entry and write its source code to the disk // Creation of BibTex entry from code BibTexEntryModel entry = new BibTexEntryModel() { EntryKey = "SOKOLOVA2009427", EntryType = "article", journal = "Information Processing & Management", title = "A systematic analysis of performance measures for classification tasks", keywords = "Performance evaluation, Machine Learning, Text classification", year = 2005, number = 2, issn = "0000-0000", @abstract = "Abs", doi = "https://doi.org/10.1016/j.ipm.2009.03.002", url = "http://www.sciencedirect.com/science/article/pii/S0306457309000259", author = "Marina Sokolova and Guy Lapalme" }; // New instance of TextProcessor object, this one you would share with other parts of your code. BibTexSourceProcessor processor = new BibTexSourceProcessor(); // Generating BibTex code String code = entry.GetSource(processor.latex, log); // Making path String path = folderResults.pathFor(nameof(Example4_UsingObjectModel) + ".txt"); File.WriteAllText(path, code); // --- Now we export it to the Excel file // Creation of data table collection DataTableTypeExtended <BibTexEntryModel> bibTable = new DataTableTypeExtended <BibTexEntryModel>("RuntimeCreatedBibTex", "BibTex table, created in Run Time"); bibTable.AddRow(entry); // creates extended version of Excel file, with additional spreadsheet for Legend and other meta information var codeDataTable_ref = bibTable.GetReportAndSave(folderResults, notation, nameof(Example4_UsingObjectModel)); }
public void Publish(folderNode folder, Boolean exportXML = true, Boolean exportDescribe = true, Boolean exportDatatable = true) { if (exportXML) { String xml = objectSerialization.ObjectToXML(this); String x_path = folder.pathFor(name + "_report.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized dtructural report of a dataset"); File.WriteAllText(x_path, xml); } if (exportDescribe) { builderForText builderForText = new builderForText(); String t_path = folder.pathFor(name + "_report.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Summary of structural report of a dataset"); Describe(builderForText); File.WriteAllText(t_path, builderForText.GetContent()); } if (exportDatatable) { DataTableTypeExtended <DatasetStructureReport> dt_reports = new DataTableTypeExtended <DatasetStructureReport>(name, "Dataset structure stats"); dt_reports.AddRow(this); foreach (var ch in Children) { dt_reports.AddRow(ch); } dt_reports.GetReportAndSave(folder, null, name + "_report"); } }
/// <summary> /// Publishes the table blocks. /// </summary> /// <param name="folder">The folder.</param> /// <param name="blockSize">Size of the block.</param> /// <param name="blockCount">The block count.</param> /// <returns></returns> public DataTableTypeExtended <FeatureCWPFrequencies> PublishTableBlocks(folderNode folder, Int32 blockSize = 500, Int32 blockCount = 10) { if (!this.Any()) { return(null); } foreach (var pair in this) { pair.Value.Compute(); } Int32 b = 0; for (int i = 0; i < blockCount; i++) { var p = this.First(); String dt_n = name + "_" + i.ToString(); DataTableTypeExtended <FeatureCWPFrequencies> cwpMetrics = new DataTableTypeExtended <FeatureCWPFrequencies>(dt_n, "Collected metrics"); // DataTableTypeExtended<FeatureCWPFrequencies> cwpFrequencies = new DataTableTypeExtended<FeatureCWPFrequencies>(dt_n + "_freq", "frequency metrics"); // p.Value.SetDataTable(datatable); // DataColumn nm = datatable.Columns.Add("Name"); Int32 c = 0; foreach (var pair in this) { if (c > (i * blockSize) && c < (i + 1) * blockSize) { cwpMetrics.AddRow(pair.Value); // cwpFrequencies.AddRow(term_finders.) } c++; } if (cwpMetrics.Rows.Count > 0) { DataTableConverterASCII dataTableConverterASCII = new DataTableConverterASCII(); dataTableConverterASCII.ConvertToFile(cwpMetrics, folder, dt_n); DataTableForStatistics report = cwpMetrics.GetReportAndSave(folder, null, dt_n); } } DataTableTypeExtended <FeatureCWPFrequencies> output = new DataTableTypeExtended <FeatureCWPFrequencies>(); foreach (var p in this) { output.AddRow(p.Value); } output.Save(folder, null, "stats_frequencies"); return(output); }
protected void runModel(experimentExecutionContext context, IWebFVExtractor model) { imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, model.name); Int32 crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit; aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection> casesByClasses = new aceDictionarySet <IDocumentSetClass, DocumentSetCaseCollection>(); DSCCReportSet kFoldReport = new DSCCReportSet(model); var valCol = context.validationCollections[model.name]; List <DocumentSetCaseCollectionSet> modelCaseResults = new List <DocumentSetCaseCollectionSet>(); crashRetries = context.tools.operation.doRebootFVEOnCrashRetryLimit; while (crashRetries > 0) { try { experimentNotes modelNotes = new experimentNotes(valCol.folder, "Fold-level experiment settings description notes"); modelNotes.AppendLine("# Notes on Feature Vector Extractor: " + model.name); var nts = model.DescribeSelf(); nts.ForEach(x => modelNotes.AppendLine(x)); context.logger.log("Executing k-fold cases with model [" + model.name + "]"); valCol.DescribeSampleDistribution(modelNotes); context.mainReport.valColVsModelVsSampleHash.Add("[" + model.name + "]".toWidthExact(20) + " [sample distribution hash: " + valCol.SampleDistributionHash + "]"); modelNotes.SaveNote(); ParallelOptions ops = new ParallelOptions(); ops.MaxDegreeOfParallelism = context.tools.operation.ParallelThreads; Parallel.ForEach <kFoldValidationCase>(valCol.GetCases(), ops, valCase => { model.DoFVEAndTraining(valCase, context.tools, context.logger); // <--------------------------------------------------------------------------------------- BUILDING FVE DocumentSetCaseCollectionSet results = model.DoClassification(valCase, context.tools, context.logger); if (!results.Any()) { throw new aceScienceException("DoClassification for [" + model.name + "] returned no results!", null, model, "DoClassification " + model.name + " failed!", context); } foreach (var pair in results) { DocumentSetCaseCollection cls = pair.Value; casesByClasses.Add(cls.setClass, cls); } valCase.evaluationResults = results; if (context.tools.DoResultReporting) { context.logger.log("producing reports on k-Fold case [" + valCase.name + "]"); DSCCReports r = results.GetReports(); var sumMeans = r.GetAverageTable(context); //.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name); sumMeans.SetDescription("FVE report, aggregated for all categories - for fold [" + valCase.name + "]"); sumMeans.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_" + valCase.name, true, context.tools.operation.doReportsInParalell); var fveAndCase = r.GetFullValidationTable(context); fveAndCase.SetDescription("Per-category aggregate statistics, for each classifier, within fold [" + valCase.name + "], used for macro-averaging"); fveAndCase.GetReportAndSave(valCase.folder, appManager.AppInfo, "CrossValidation_extrainfo_" + valCase.name, true, context.tools.operation.doReportsInParalell); var fullCaseReport = results.GetReportOnAllCases(); fullCaseReport.GetReportAndSave(valCase.folder, appManager.AppInfo, "FullReport_" + valCase.name, true, context.tools.operation.doReportsInParalell); kFoldReport.Add(valCase, r); } context.logger.log("k-Fold case [" + valCase.name + "] completed"); context.notes.log("- - Experiment sequence for [" + valCase.name + "] fold completed"); if (context.tools.operation.doSaveKnowledgeForClasses) { valCase.knowledgeLibrary.SaveKnowledgeInstancesForClasses(valCase, context.logger); } }); foreach (var fold in valCol.GetCases()) // Parallel.ForEach<kFoldValidationCase>(valCol.GetCases(), ops, valCase => { modelCaseResults.Add(fold.evaluationResults); } crashRetries = 0; } catch (Exception ex) { crashRetries--; context.errorNotes.LogException("FVE Model crashed -- retries left [" + crashRetries + "] --- ", ex, model.name); context.logger.log(":::: REPEATING the model [" + model.name + "] ::: CRASHED [" + ex.Message + "] ::: RETRIES [" + crashRetries + "]"); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(1200, 1000, 1); imbACE.Services.terminal.aceTerminalInput.doBeepViaConsole(2400, 1000, 1); imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "RETRIES[" + crashRetries + "]"); } } imbSCI.Core.screenOutputControl.logToConsoleControl.setAsOutput(context.logger, "Reporting"); valCol.knowledgeLibrary.SaveCaseKnowledgeInstances(context.logger); // DocumentSetCaseCollection second = null; if (modelCaseResults.Any()) { featureExtractionMetrics modelMetrics = new featureExtractionMetrics(model.name, "All"); DataTableTypeExtended <featureExtractionMetrics> modelVsCategoryMetrics = new DataTableTypeExtended <featureExtractionMetrics>(model.name, "Model metrics per category"); // <-------------------------------------- CATEGORIES REPORT ---------------------------------------------- DataTable allTable = modelCaseResults.First()[0].GetReportTable(false, false).GetClonedShema <DataTable>();; //valCol.GetCases().First().evaluationResults[0].GetReportTable(false, false); rangeFinderForDataTable ranger = new rangeFinderForDataTable(allTable, "name"); ranger.columnsToSignIn.Add("Case"); foreach (KeyValuePair <IDocumentSetClass, aceConcurrentBag <DocumentSetCaseCollection> > pair in casesByClasses) { DocumentSetCaseCollection first = null; DataTable repTable = null; ranger.prepareForNextAggregationBlock(allTable, "name"); foreach (DocumentSetCaseCollection cn in pair.Value) { foreach (var cni in cn) { if (cni != null) { cn.BuildRow(cni, allTable, false); } } } ranger.AddRangeRows(pair.Key.name, allTable, true, imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); var categoryMetrics = new featureExtractionMetrics(model.name, pair.Key.name); categoryMetrics.SetValues(ranger); modelVsCategoryMetrics.AddRow(categoryMetrics); modelMetrics.AddValues(categoryMetrics); categoryMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_" + categoryMetrics.Name + ".xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "FV and Category sample metrics, serialized object")); //context.notes.log("- - Creating report for category [" + pair.Key.name + "] completed"); //repTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_category_" + pair.Key.name); } modelMetrics.DivideValues(casesByClasses.Count); modelMetrics.saveObjectToXML(valCol.folder.pathFor(model.name + "_metrics.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Cross-categories macroaveraged metrics of the FVE model [" + model.name + "]")); modelVsCategoryMetrics.AddRow(modelMetrics); modelVsCategoryMetrics.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, "Name", modelMetrics.Name); modelVsCategoryMetrics.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_metrics", true, true); context.mainReport.AddModelMetrics(modelMetrics); context.notes.log("- Creating report for all categories [" + model.name + "] "); allTable.GetReportAndSave(valCol.folder, appManager.AppInfo, model.name + "_categories", true, context.tools.operation.doReportsInParalell); } kFoldReport.MakeReports(context, valCol.folder); context.mainReport.AddBestPerformer(kFoldReport.GetTopClassifierReport(), kFoldReport.meanClassifierReport, model); // <---------------- creation of complete report context.notes.log("- Experiment sequence with Feature Vector Extractor [" + model.name + "] completed"); context.notes.SaveNote(); // <------------- END OF THE MODEL ------------------------------------------------------------------------------------------------- }
/// <summary> /// Prepares for parallel execution. /// </summary> /// <param name="tools">The tools.</param> /// <param name="_context">The context.</param> public webProjectKnowledgeSet PrepareForParallelExecution(classifierTools tools, experimentExecutionContext _context) { if (caseKnowledgeSet == null) { caseKnowledgeSet = new webProjectKnowledgeSet(); } if (items.Any()) { experimentContext.notes.log("Mining Context was ready already."); return(caseKnowledgeSet); } DateTime startTime = DateTime.Now; experimentContext = _context; List <webCaseKnowledge> cases = new List <webCaseKnowledge>(); folderNode classReportFolder = experimentContext.folder.Add("General", "General and diagnostic reports", "The folder contains general (outside k-folds) reports on analysied industries (categories), web sites and other diagnostic data"); // <---------------------------------------------------------------------------------------------------------------- [ performing pipeline ] experimentContext.notes.log("Executing the Mining Context decomposition with the pipeline model"); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { var pipelineContext = GetContextForPipeline(tools, classSet); sitesByCategory.Add(classSet, new List <pipelineTaskMCSiteSubject>()); if (!pipelineContext.exitByType.ContainsKey(typeof(pipelineTaskMCSiteSubject))) { throw new aceGeneralException("Pipeline context output contains no web site subjects! Check the pipeline Site Task constructor.", null, pipelineContext, "Pipeline broken"); } var sitesForContext = pipelineContext.exitByType[typeof(pipelineTaskMCSiteSubject)]; // <----- preparing foreach (var site in sitesForContext) { tokenBySite.Add(site as pipelineTaskMCSiteSubject, new ConcurrentBag <pipelineTaskSubjectContentToken>()); sitesByCategory[classSet].Add(site as pipelineTaskMCSiteSubject); webCaseKnowledge webCase = new webCaseKnowledge(site as pipelineTaskMCSiteSubject, classSet); caseKnowledgeSet.Add(webCase); cases.Add(webCase); } semanticFVExtractorKnowledge kn = new semanticFVExtractorKnowledge(); kn.name = classSet.name + "_general"; kn.relatedItemPureName = classSet.name; kn.type = WebFVExtractorKnowledgeType.aboutCompleteCategory; kn.Deploy(classReportFolder, experimentContext.logger); knowledgeByClass.TryAdd(classSet, kn); } experimentContext.notes.log("Sorting tokens for all sites [in parallel]"); Parallel.ForEach(tokenBySite.Keys, site => { var leafs = site.getAllLeafs(); foreach (var leaf in leafs) { pipelineTaskSubjectContentToken token = leaf as pipelineTaskSubjectContentToken; if (token != null) { tokenBySite[site].Add(token); } } }); foreach (var c in cases) { c.tokens = tokenBySite[c.MCSiteSubject]; } experimentContext.notes.log("Building diagnostic TF-IDF master tables for all classes [in parallel]"); Boolean useIntegratedApproach = false; if (useIntegratedApproach) { var valCase = experimentContext.validationCollections[experimentContext.masterExtractor.name].GetDiagnosticCase(experimentContext.classes); Parallel.ForEach(sitesByCategory, pair => { knowledgeByClass.TryAdd(pair.Key, experimentContext.masterExtractor.DoFVExtractionForClassViaCases(valCase.trainingCases[pair.Key.classID], pair.Key, valCase, experimentContext.tools, experimentContext.logger)); }); } else { Parallel.ForEach(sitesByCategory, pair => { IDocumentSetClass category = pair.Key; List <pipelineTaskMCSiteSubject> sites = pair.Value; var lt = BuildLemmaTableForClass(tools, category, sites); lt.Save(); // lt.SaveAs(classReportFolder.pathFor(lt.info.Name), imbSCI.Data.enums.getWritableFileMode.overwrite); }); } experimentContext.notes.log("Saving lexic resource cache subset - for later reuse in case of repeated experiment run"); tools.SaveCache(); if (!useIntegratedApproach) { experimentContext.notes.log("Performing chunk construction for all web sites in all categories [in serial]"); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { BuildChunksForClass(tools, classSet); } foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false); } } if (tools.operation.doCreateDiagnosticMatrixAtStart) { experimentContext.notes.log("Performing diagnostic analysis on all categories...[doCreateDiagnosticMatrixAtStart=true]"); folderNode matrixReport = classReportFolder.Add("clouds", "More reports on semantic cloud", "Directory contains exported DirectedGraphs, varous matrix derivates, combined cloud and other diagnostic things"); List <lemmaSemanticCloud> clouds = new List <lemmaSemanticCloud>(); List <lemmaSemanticCloud> filteredClouds = new List <lemmaSemanticCloud>(); var converter = lemmaSemanticCloud.GetDGMLConverter(); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { // experimentContext.masterExtractor.chunkTableConstructor.process(chunksByCategory[classSet], cnt_level.mcPage, knowledgeByClass[classSet].WLChunkTableOfIndustryClass, null, experimentContext.logger, false); var cloud = experimentContext.masterExtractor.CloudConstructor.process(knowledgeByClass[classSet].WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList(), tools.GetLemmaResource()); knowledgeByClass[classSet].semanticCloud.className = classSet.name; clouds.Add(cloud); if (experimentContext.tools.operation.doUseSimpleGraphs) { cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]")); } else { converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_initial_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "Initial version of full-sample set, diagnostic Semantic Cloud for category [" + classSet.name + "]")); } knowledgeByClass[classSet].semanticCloudFiltered = knowledgeByClass[classSet].semanticCloud.CloneIntoType <lemmaSemanticCloud>(true); knowledgeByClass[classSet].semanticCloudFiltered.className = classSet.name; filteredClouds.Add(knowledgeByClass[classSet].semanticCloudFiltered); } cloudMatrix matrix = new cloudMatrix("CloudMatrix", "Diagnostic cloud matrix created from the complete sample set of [" + clouds.Count() + "] classes"); matrix.build(filteredClouds, experimentContext.logger); lemmaSemanticCloud mergedCloudInitial = matrix.GetUnifiedCloud(); mergedCloudInitial.Save(matrixReport.pathFor("unified_initial_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories")); var reductions = matrix.TransformClouds(experimentContext.masterExtractor.settings.semanticCloudFilter, experimentContext.logger); var p = matrixReport.pathFor("reductions_nodes.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Report on Cloud Matrix transformation process"); File.WriteAllLines(p, reductions); matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.maxCloudFrequency | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_max_cf_initial", true, experimentContext.tools.operation.doReportsInParalell); matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapSize | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_size_initial", true, experimentContext.tools.operation.doReportsInParalell); matrix.BuildTable(experimentContext.masterExtractor.settings.semanticCloudFilter, cloudMatrixDataTableType.initialState | cloudMatrixDataTableType.overlapValue | cloudMatrixDataTableType.absoluteValues).GetReportAndSave(matrixReport, appManager.AppInfo, "matrix_overlap_value_initial", true, experimentContext.tools.operation.doReportsInParalell); matrix.ExportTextReports(matrixReport, true, "matrix_cf"); matrix.ExportTextReports(matrixReport, false, "matrix_cf"); lemmaSemanticCloud mergedCloudAfterReduction = matrix.GetUnifiedCloud(); mergedCloudAfterReduction.Save(matrixReport.pathFor("unified_reduced_cloud.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Serialized object -Version of all-categories diagnostic Semantic Cloud, after Cloud Matrix filter was applied")); if (experimentContext.tools.operation.doUseSimpleGraphs) { mergedCloudInitial.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } else { converter = lemmaSemanticCloud.GetDGMLConverter(); converter.ConvertToDMGL(mergedCloudInitial).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } // <-------- analysis ----------------------------------------------------------------------------------- DataTableTypeExtended <freeGraphReport> cloudReports = new DataTableTypeExtended <freeGraphReport>(); foreach (var cl in filteredClouds) { freeGraphReport fgReport = new freeGraphReport(cl); fgReport.Save(matrixReport); cloudReports.AddRow(fgReport); } freeGraphReport unifiedReport = new freeGraphReport(mergedCloudAfterReduction); unifiedReport.Save(matrixReport); cloudReports.AddRow(unifiedReport); cloudReports.GetReportAndSave(matrixReport, appManager.AppInfo, "analysis_SemanticClouds"); // <-------- analysis ----------------------------------------------------------------------------------- foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { var cloud = knowledgeByClass[classSet].semanticCloudFiltered; // .WLChunkTableOfIndustryClass, knowledgeByClass[classSet].WLTableOfIndustryClass, knowledgeByClass[classSet].semanticCloud, experimentContext.logger, tokenBySite.Keys.ToList()); if (experimentContext.tools.operation.doUseSimpleGraphs) { cloud.GetSimpleGraph(true).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } else { converter = lemmaSemanticCloud.GetDGMLConverter(); converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("unified_initial_cloud", imbSCI.Data.enums.getWritableFileMode.overwrite, "DirectedGraphML file - unified Semantic Cloud, before Cloud Matrix filter was applied - Open this in VisualStudo)")); } //converter.ConvertToDMGL(cloud).Save(matrixReport.pathFor("cloud_reduced_" + classSet.name, imbSCI.Data.enums.getWritableFileMode.none, "DirectedGraphML file - Initial version of Semantic Cloud built as union of full-sample set Semantic Clouds of all categories (Open this with VS)"), imbSCI.Data.enums.getWritableFileMode.overwrite); } instanceCountCollection <String> tfcounter = new instanceCountCollection <string>(); foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { var wlt = knowledgeByClass[classSet].WLTableOfIndustryClass.GetDataTable(); wlt.DefaultView.Sort = "termFrequency desc"; var sorted = wlt.DefaultView.ToTable(); var tbl = wlt.GetClonedShema <DataTable>(true); tbl.CopyRowsFrom(sorted, 0, 100); tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_WebLemma", true, experimentContext.tools.operation.doReportsInParalell); var cht = knowledgeByClass[classSet].WLChunkTableOfIndustryClass.GetDataTable(); cht.DefaultView.Sort = "termFrequency desc"; var csorted = cht.DefaultView.ToTable(); tbl = cht.GetClonedShema <DataTable>(true); tbl.CopyRowsFrom(csorted, 0, 100); tbl.GetReportAndSave(classReportFolder, appManager.AppInfo, classSet.name + "_Chunks", true, experimentContext.tools.operation.doReportsInParalell); tfcounter.AddInstanceRange(knowledgeByClass[classSet].WLTableOfIndustryClass.unresolved); knowledgeByClass[classSet].OnBeforeSave(); } List <String> countSorted = tfcounter.getSorted(); StringBuilder sb = new StringBuilder(); foreach (String s in countSorted) { sb.AppendLine(String.Format("{1} : {0}", s, tfcounter[s])); } String pt = classReportFolder.pathFor("unresolved_tokens.txt", imbSCI.Data.enums.getWritableFileMode.none, "Cloud Frequency list of all unresolved letter-only tokens"); File.WriteAllText(pt, sb.ToString()); } if (tools.operation.doFullDiagnosticReport) { experimentContext.notes.log("Generating full diagnostic report on classes..."); DataTable rep = null; foreach (IDocumentSetClass classSet in experimentContext.classes.GetClasses()) { rep = this.GetClassKnowledgeReport(classSet, rep); } rep.SetAdditionalInfoEntry("Experiment", experimentContext.setup.name); rep.AddExtra("Experiment: " + experimentContext.setup.name); rep.AddExtra("Info: " + experimentContext.setup.description); rep.SetDescription("Structural report for all classes in the experiment"); rep.GetReportAndSave(classReportFolder, appManager.AppInfo, "structural_class_report", true, experimentContext.tools.operation.doReportsInParalell); } classReportFolder.generateReadmeFiles(appManager.AppInfo); experimentContext.notes.log("Mining Context preprocessing done in [" + DateTime.Now.Subtract(startTime).TotalMinutes.ToString("F2") + "] minutes"); return(caseKnowledgeSet); }
public void MakeReports(experimentExecutionContext context, folderNode folder) { meanClassifierReport = new DocumentSetCaseCollectionReport(extractor.name); aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport> tempStructure = new aceDictionary2D <IWebPostClassifier, kFoldValidationCase, DocumentSetCaseCollectionReport>(); DSCCReports firstCase = null; List <IWebPostClassifier> classifiers = new List <IWebPostClassifier>(); foreach (var kFoldCasePair in this) { if (firstCase == null) { firstCase = kFoldCasePair.Value; } foreach (var pair in kFoldCasePair.Value.avgReports) { tempStructure[pair.Key, kFoldCasePair.Key] = pair.Value; if (!classifiers.Contains(pair.Key)) { classifiers.Add(pair.Key); } } } // DataSet dataSet = new DataSet(context.setup.name); // <---------- CREATING AVERAGE TABLE ----------------------------------------------------- var tpAvgMacro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by macro-average method"); var tpAvgMicro = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(context.setup.name + " summary", "Cross k-fold averages measures, fold-level measures are computed by micro-average method"); List <DocumentSetCaseCollectionReport> macroaverages = new List <DocumentSetCaseCollectionReport>(); DataTableTypeExtended <DocumentSetCaseCollectionReport> EMperKFolds = new DataTableTypeExtended <DocumentSetCaseCollectionReport>(extractor.name + "_allReports"); foreach (IWebPostClassifier classifier in classifiers) { // < ---- report on each classifier context.logger.log("-- producing report about [" + classifier.name + "]"); //objectTable<DocumentSetCaseCollectionReport> tp = new objectTable<DocumentSetCaseCollectionReport>(nameof(DocumentSetCaseCollectionReport.Name), classifier + "_sum"); DocumentSetCaseCollectionReport avg = new DocumentSetCaseCollectionReport(classifier.name + " macro-averaging, k-fold avg. "); DocumentSetCaseCollectionReport rep_eval = new DocumentSetCaseCollectionReport(classifier.name + " micro-averaging, k-fold avg."); rep_eval.Classifier = classifier.name; classificationEvalMetricSet metrics = new classificationEvalMetricSet(); classificationEval eval = new classificationEval(); //eval = metrics[classifier.name]; Int32 c = 0; foreach (KeyValuePair <kFoldValidationCase, DSCCReports> kFoldCasePair in this) { DocumentSetCaseCollectionReport rep = kFoldCasePair.Value.avgReports[classifier]; kFoldValidationCase vCase = kFoldCasePair.Key; classificationEvalMetricSet met = rep.GetSetMetrics(); if (met != null) { foreach (IDocumentSetClass cl in context.classes.GetClasses()) { eval = eval + met[cl.name]; } } rep.Name = classifier.name + "_" + vCase.name; avg.AddValues(rep); EMperKFolds.AddRow(rep); c++; } rep_eval.AddValues(metrics, classificationMetricComputation.microAveraging); avg.Classifier = classifier.name; avg.DivideValues(c); // <<< detecting the best performed classifier in all evaluation folds if (avg.F1measure > highestF1Value) { highestF1Value = avg.F1measure; topClassifierReport = avg; } meanClassifierReport.AddValues(avg); // ----------------- EMperKFolds.AddRow(avg); tpAvgMacro.AddRow(avg); macroaverages.Add(avg); if (DOMAKE_MICROaverage) { tpAvgMicro.AddRow(rep_eval); } // tp.Add(rep_eval); if (context.tools.operation.DoMakeReportForEachClassifier) { DataTable cTable = EMperKFolds; cTable.SetTitle($"{classifier.name} report"); cTable.SetDescription("Summary " + context.setup.validationSetup.k + "-fold validation report for [" + classifier.name + "]"); cTable.SetAdditionalInfoEntry("FV Extractor", extractor.name); cTable.SetAdditionalInfoEntry("Classifier", classifier.name); cTable.SetAdditionalInfoEntry("Class name", classifier.GetType().Name); cTable.SetAdditionalInfoEntry("Correct", rep_eval.Correct); cTable.SetAdditionalInfoEntry("Wrong", rep_eval.Wrong); //cTable.SetAdditionalInfoEntry("Precision", rep_eval.Precision); //cTable.SetAdditionalInfoEntry("Recall", rep_eval.Recall); //cTable.SetAdditionalInfoEntry("F1", rep_eval.F1measure); cTable.SetAdditionalInfoEntry("True Positives", metrics[classifier.name].truePositives); cTable.SetAdditionalInfoEntry("False Negatives", metrics[classifier.name].falseNegatives); cTable.SetAdditionalInfoEntry("False Positives", metrics[classifier.name].falsePositives); cTable.AddExtra("Classifier: " + classifier.name + " [" + classifier.GetType().Name + "]"); var info = classifier.DescribeSelf(); info.ForEach(x => cTable.AddExtra(x)); cTable.AddExtra("-----------------------------------------------------------------------"); cTable.AddExtra("Precision, Recall and F1 measures expressed in this table are computed by macroaveraging shema"); // output.CopyRowsFrom(cTable); cTable.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_classifier_" + classifier.name); // dataSet.AddTable(cTable); } } rangeFinderForDataTable rangerMacro = new rangeFinderForDataTable(tpAvgMacro, "Name"); meanClassifierReport.DivideValues(classifiers.Count); if (macroaverages.Count > 0) { Double maxF1 = macroaverages.Max(x => x.F1measure); Double minF1 = macroaverages.Min(x => x.F1measure); List <String> minCaseNames = macroaverages.Where(x => x.F1measure == minF1).Select(x => x.Name).ToList(); List <String> maxCaseNames = macroaverages.Where(x => x.F1measure == maxF1).Select(x => x.Name).ToList(); var style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightA, nameof(DocumentSetCaseCollectionReport.Name), maxCaseNames); EMperKFolds.GetRowMetaSet().AddUnit(style); // style = tpAvgMacro.GetRowMetaSet().SetStyleForRowsWithValue<String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), minCaseNames); tpAvgMacro.SetAdditionalInfoEntry("FV Extractor", extractor.name); if (DOMAKE_MICROaverage) { tpAvgMicro.SetAdditionalInfoEntry("FV Extractor", extractor.name); } List <String> averageNames = macroaverages.Select(x => x.Name).ToList(); var avg_style = EMperKFolds.GetRowMetaSet().SetStyleForRowsWithValue <String>(DataRowInReportTypeEnum.dataHighlightC, nameof(DocumentSetCaseCollectionReport.Name), averageNames); foreach (var x in averageNames) { avg_style.AddMatch(x); } } // ::: ------------------------------------------------------------------------------------------------- ::: --------------------------------------------------------------------- ::: // tpAvgMacro.SetTitle($"{extractor.name} - macroaverage report"); if (DOMAKE_MICROaverage) { tpAvgMicro.SetTitle($"{extractor.name} - microaverage report"); } tpAvgMacro.AddExtra("Complete report on " + context.setup.validationSetup.k + "-fold validation FVE [" + extractor.name + "]"); tpAvgMacro.AddExtra("Fold-level P, R and F1 measures are computed by macroaveraging method, values here are cross k-fold means."); if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Complete " + context.setup.validationSetup.k + "-fold validation report for FVE [" + extractor.name + "]"); } if (DOMAKE_MICROaverage) { tpAvgMicro.AddExtra("Fold-level P, R and F1 measures are computed by microaveraging method, values here are cross k-fold means."); } context.AddExperimentInfo(tpAvgMacro); if (DOMAKE_MICROaverage) { context.AddExperimentInfo(tpAvgMicro); } tpAvgMacro.AddExtra(extractor.description); if (extractor is semanticFVExtractor) { semanticFVExtractor semExtractor = (semanticFVExtractor)extractor; semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMacro.AddExtra(x)); semExtractor.termTableConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); semExtractor.CloudConstructor.DescribeSelf().ForEach(x => tpAvgMicro.AddExtra(x)); } context.logger.log("-- producing summary reports on [" + extractor.name + "]"); rangerMacro.AddRangeRows("Macroaverage ", tpAvgMacro, true, imbSCI.Core.math.aggregation.dataPointAggregationType.min | imbSCI.Core.math.aggregation.dataPointAggregationType.max | imbSCI.Core.math.aggregation.dataPointAggregationType.avg | imbSCI.Core.math.aggregation.dataPointAggregationType.stdev); tpAvgMacro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_macroaverage_report", true, true); EMperKFolds.AddExtra("The table shows average measures for each fold --- rows marked with colored background show averages for all folds, per classifier."); EMperKFolds.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_allFolds", true, true); if (DOMAKE_MICROaverage) { tpAvgMicro.GetReportAndSave(folder, appManager.AppInfo, extractor.name + "_microaverage_report", true, true); } //dataSet.GetReportVersion().serializeDataSet(extractor.name + "_classifiers_MultiSheetSummary", folder, imbSCI.Data.enums.reporting.dataTableExportEnum.excel, appManager.AppInfo); }
/// <summary> /// Generates the final reports and read me files /// </summary> public void CloseExperiment(ILogBuilder logger, long startOfLog) { if (!testSummaries.Any()) { logger.log("No experiment procedures performes"); return; } DataTableTypeExtended <classificationReport> summaryTable = new DataTableTypeExtended <classificationReport>("Test results", "k-fold cross valudation results"); classificationReport sumRow = new classificationReport(runName); sumRow.Comment = runName + ", " + description; // classificationEvalMetricSet metric = new classificationEvalMetricSet("Total", truthTable.labels_without_unknown); foreach (classificationReport s in testSummaries) { summaryTable.AddRow(s); //metric = metric + s; if (sumRow.Classifier.isNullOrEmpty()) { sumRow.Classifier = s.Classifier; } sumRow.AddValues(s); } sumRow.DivideValues(testSummaries.Count); sumRow.SetReportDataFields(crossValidation, this); summaryTable.SetDescription(description); summaryTable.SetAdditionalInfoEntry("RunName", runName); summaryTable.SetAdditionalInfoEntry("Description", description); summaryTable.SetAdditionalInfoEntry("Averaging", averagingMethod.ToString()); summaryTable.AddRow(sumRow); summaryTable.GetReportAndSave(notes.folder, signature); finalReport = sumRow; //sumRow.ReportToLog(logger); sumRow.ReportToLog(notes); objectSerialization.saveObjectToXML(sumRow, notes.folder.pathFor("results.xml", imbSCI.Data.enums.getWritableFileMode.overwrite, "Experiment results", true)); logger.log("Experiment completed"); notes.SaveNote("note"); String logPrintout = logger.GetContent(startOfLog); String p = notes.folder.pathFor("log.txt", imbSCI.Data.enums.getWritableFileMode.overwrite, "Log printout during experiment execution"); File.WriteAllText(p, logPrintout); experimentRootFolder.generateReadmeFiles(signature); }
public void Save(ILogBuilder log, Boolean SubLevelCall = false) { //if (folder == null) folder = notes.folder_corpus; if (folder == null) { return; } if (purpose != FeatureCWPAnalysisSettings.AnalysisPurpose.application) { SaveFeatures(folder.Add("Features", "Features", "Features"), featuresByClass); EntryDictionary.Save(folder, log, name); if (!SubLevelCall) { var flds = fields(); DataSet dataSet = new DataSet("rep_" + name); // List<DataTableForStatistics> rdt_list = new List<DataTableForStatistics>(); foreach (String n in flds) { DataTable dt = EntryDictionary.MakeTable(n + "_" + name, EntryDictionary.description, flds , 500, flds.IndexOf(n)); dt.AddStringLine("Report for " + name); dataSet.Tables.Add(dt); //DataTableForStatistics rdt = dt.GetReportTableVersion(); //rdt_list.Add(rdt); } DataSetForStatistics report = dataSet.GetReportAndSave(folder, null, "cwp_" + name); var keys = Keys.ToList(); keys.Sort(); DataTableTypeExtended <FeatureCWPAnalysisSiteMetrics> metrics = new DataTableTypeExtended <FeatureCWPAnalysisSiteMetrics>(); Int32 c = 0; foreach (var key in keys) { metrics.AddRow(this[key]); c++; if (c > 2000) { break; } } metrics.GetReportAndSave(folder, null, "cwp_" + name + "metrics"); //foreach (var pair in entryReport) //{ // folderNode fn = folder.Add(pair.Key, pair.Key, "Sub entry report"); // pair.Value.Save(log, true); //} } } }