public static DataTable GetClassKnowledgeReport(this DocumentSetPipelineCollection pipelineCollection, IDocumentSetClass caseSet, DataTable output = null) { if (output == null) { output = new DataTable(); output.SetTitle(caseSet.name); output.Add("Name", "Name of class or web site", "", typeof(String), imbSCI.Core.enums.dataPointImportance.normal, "", "Name").SetGroup("Repository").SetWidth(25).SetUnit(""); output.Add("Sites", "Number of sites in cateogry - or 1 for single site entry", "|C_ds|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Sites").SetGroup("Repository").SetWidth(10).SetUnit("n"); output.Add("Pages", "Total number of pages detected in the repository", "|C_d|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Pages Crawled").SetGroup("Repository").SetWidth(15).SetUnit("n"); output.Add("PagesValid", "Number of pages used for the category or site", "|C_dv|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Pages Used").SetGroup("Pipeline").SetWidth(15).SetUnit("n"); output.Add("Blocks", "Number of blocks for category or site", "|C_b|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("Pipeline").SetWidth(10).SetUnit("n"); output.Add("Streams", "Number of streams for category or site", "|C_ts|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("Pipeline").SetWidth(10).SetUnit("n"); output.Add("Tokens", "Number of tokens for category or site", "|C_t|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("Pipeline").SetWidth(10).SetUnit("n"); output.Add("Chunks", "Number of chunks for category - disabled for sites", "|C_c|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("NLP").SetWidth(10).SetUnit("n"); output.Add("OnlyLetters", "Number of tokens for category or site with onlyLetter tag", "|C_ttl|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Only Letters").SetGroup("Only Letters").SetWidth(10).SetUnit("n"); output.Add("OnlyLettersResolved", "Number of tokens resolved by morphosyntactic resource", "|C_ttlr|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Accepted Tokens").SetGroup("Only Letters").SetWidth(10).SetUnit("n"); output.Add("OnlyLettersUnresolved", "Number of tokens unresolved by morphosyntactic resource", "|C_ttlu|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Dismissed").SetGroup("Only Letters").SetWidth(10).SetUnit("n"); output.Add("Numbers", "Number of tokens for category or site with a numeric content tag", "|C_ttn|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Number").SetGroup("Other").SetWidth(10).SetUnit("n"); output.Add("Symbols", "Number of tokens for category or site with a symbolic content tag", "|C_tts|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Symbols").SetGroup("Other").SetWidth(10).SetUnit("n"); output.Add("Business", "Number of tokens for category or site with any dat_business tag", "|C_ttb|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Business tags").SetGroup("Special").SetWidth(10).SetUnit("n"); output.Add("Potential", "Number of tokens for category or site with any tkn_potential data point tag", "|C_ttp|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Potential tags").SetGroup("Special").SetWidth(10).SetUnit("n"); } else { output.SetTitle("Class Set Report"); output.TableName = "multi_class_report"; } output.SetAdditionalInfoEntry("Class " + caseSet.treeLetterAcronim + " name", caseSet.name); output.SetAdditionalInfoEntry("Class " + caseSet.treeLetterAcronim + " repo", caseSet.MCRepositoryName); var sites = pipelineCollection.sitesByCategory[caseSet].ToList(); PipelineReportForClass repForClass = new PipelineReportForClass(); repForClass.deployClass(caseSet); repForClass.Chunks = pipelineCollection.chunksByCategory[caseSet].Count(); foreach (var site in sites) { var dr = output.NewRow(); PipelineReportForClass repForSite = new PipelineReportForClass(); repForSite.deploySite(site); repForSite.deployTokens(pipelineCollection.tokenBySite[site as pipelineTaskMCSiteSubject]); repForSite.SetDataRow(dr); output.Rows.Add(dr); repForClass.sum(repForSite); } var drc = output.NewRow(); repForClass.SetDataRow(drc); output.Rows.Add(drc); output.GetRowMetaSet().AddUnit(new dataValueMatchCriterionDynamicStyle <String, DataRowInReportTypeEnum>(new String[] { repForClass.name }, DataRowInReportTypeEnum.dataHighlightA, "Name")); return(output); }
public void sum(PipelineReportForClass B) { Sites += B.Sites; Pages += B.Pages; PagesValid += B.PagesValid; Blocks += B.Blocks; Streams += B.Streams; Chunks += B.Chunks; Tokens += B.Tokens; OnlyLetters += B.OnlyLetters; OnlyLettersResolved += B.OnlyLettersResolved; OnlyLettersUnresolved += B.OnlyLettersUnresolved; Numbers += B.Numbers; Symbols += B.Symbols; Business += B.Business; Potential += B.Potential; }