예제 #1
0
        public static DataTable GetClassKnowledgeReport(this DocumentSetPipelineCollection pipelineCollection, IDocumentSetClass caseSet, DataTable output = null)
        {
            if (output == null)
            {
                output = new DataTable();
                output.SetTitle(caseSet.name);
                output.Add("Name", "Name of class or web site", "", typeof(String), imbSCI.Core.enums.dataPointImportance.normal, "", "Name").SetGroup("Repository").SetWidth(25).SetUnit("");
                output.Add("Sites", "Number of sites in cateogry - or 1 for single site entry", "|C_ds|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Sites").SetGroup("Repository").SetWidth(10).SetUnit("n");
                output.Add("Pages", "Total number of pages detected in the repository", "|C_d|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Pages Crawled").SetGroup("Repository").SetWidth(15).SetUnit("n");
                output.Add("PagesValid", "Number of pages used for the category or site", "|C_dv|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Pages Used").SetGroup("Pipeline").SetWidth(15).SetUnit("n");
                output.Add("Blocks", "Number of blocks for category or site", "|C_b|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("Pipeline").SetWidth(10).SetUnit("n");
                output.Add("Streams", "Number of streams for category or site", "|C_ts|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("Pipeline").SetWidth(10).SetUnit("n");
                output.Add("Tokens", "Number of tokens for category or site", "|C_t|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("Pipeline").SetWidth(10).SetUnit("n");
                output.Add("Chunks", "Number of chunks for category - disabled for sites", "|C_c|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "").SetGroup("NLP").SetWidth(10).SetUnit("n");
                output.Add("OnlyLetters", "Number of tokens for category or site with onlyLetter tag", "|C_ttl|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Only Letters").SetGroup("Only Letters").SetWidth(10).SetUnit("n");
                output.Add("OnlyLettersResolved", "Number of tokens resolved by morphosyntactic resource", "|C_ttlr|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Accepted Tokens").SetGroup("Only Letters").SetWidth(10).SetUnit("n");
                output.Add("OnlyLettersUnresolved", "Number of tokens unresolved by morphosyntactic resource", "|C_ttlu|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Dismissed").SetGroup("Only Letters").SetWidth(10).SetUnit("n");
                output.Add("Numbers", "Number of tokens for category or site with a numeric content tag", "|C_ttn|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Number").SetGroup("Other").SetWidth(10).SetUnit("n");
                output.Add("Symbols", "Number of tokens for category or site with a symbolic content tag", "|C_tts|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Symbols").SetGroup("Other").SetWidth(10).SetUnit("n");
                output.Add("Business", "Number of tokens for category or site with any dat_business tag", "|C_ttb|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Business tags").SetGroup("Special").SetWidth(10).SetUnit("n");
                output.Add("Potential", "Number of tokens for category or site with any tkn_potential data point tag", "|C_ttp|", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal, "", "Potential tags").SetGroup("Special").SetWidth(10).SetUnit("n");
            }
            else
            {
                output.SetTitle("Class Set Report");
                output.TableName = "multi_class_report";
            }

            output.SetAdditionalInfoEntry("Class " + caseSet.treeLetterAcronim + " name", caseSet.name);
            output.SetAdditionalInfoEntry("Class " + caseSet.treeLetterAcronim + " repo", caseSet.MCRepositoryName);

            var sites = pipelineCollection.sitesByCategory[caseSet].ToList();
            PipelineReportForClass repForClass = new PipelineReportForClass();

            repForClass.deployClass(caseSet);
            repForClass.Chunks = pipelineCollection.chunksByCategory[caseSet].Count();
            foreach (var site in sites)
            {
                var dr = output.NewRow();

                PipelineReportForClass repForSite = new PipelineReportForClass();
                repForSite.deploySite(site);
                repForSite.deployTokens(pipelineCollection.tokenBySite[site as pipelineTaskMCSiteSubject]);


                repForSite.SetDataRow(dr);

                output.Rows.Add(dr);
                repForClass.sum(repForSite);
            }

            var drc = output.NewRow();

            repForClass.SetDataRow(drc);
            output.Rows.Add(drc);
            output.GetRowMetaSet().AddUnit(new dataValueMatchCriterionDynamicStyle <String, DataRowInReportTypeEnum>(new String[] { repForClass.name }, DataRowInReportTypeEnum.dataHighlightA, "Name"));

            return(output);
        }
예제 #2
0
        public void sum(PipelineReportForClass B)
        {
            Sites      += B.Sites;
            Pages      += B.Pages;
            PagesValid += B.PagesValid;
            Blocks     += B.Blocks;
            Streams    += B.Streams;
            Chunks     += B.Chunks;

            Tokens += B.Tokens;

            OnlyLetters           += B.OnlyLetters;
            OnlyLettersResolved   += B.OnlyLettersResolved;
            OnlyLettersUnresolved += B.OnlyLettersUnresolved;


            Numbers   += B.Numbers;
            Symbols   += B.Symbols;
            Business  += B.Business;
            Potential += B.Potential;
        }