public static classificationReportStyleDefinition GetDefault(ExperimentRunNameGroups _groups = null) { classificationReportStyleDefinition output = new classificationReportStyleDefinition(); if (_groups == null) { _groups = new ExperimentRunNameGroups(); _groups.CheckForDefault(); } output.layerNeedleByName.Add("Trashold", @"([\d]+)#([\d]*)", "Experiments with document selection controled by trashold"); output.groups = _groups; output.dataFlags.Add("Render", "LinkText,LinkContent,LINCAPT", "A", "Content from link anchor text"); output.dataFlags.Add("Render", "Tokens,TKN", "TKN", "Tokens extracted from URL"); output.dataFlags.Add("Render", "PageText,PageContent", "B", "Page body text, page description and title tag"); output.dataFlags.Add("Scope", "category,InCategory,Category", "C", "Items in the category"); output.dataFlags.Add("Scope", "page", "P", "Pages"); output.dataFlags.Add("Scope", "Dataset,InDataset", "D", "Items in the complete dataset"); output.dataFlags.Add("Scope", "Link", "L", "Links"); output.dataFlags.Add("Function", "selfCentric", "SC", "Compares items with web site / document set"); output.dataFlags.Add("Function", "Inverse", "I", "Score value is inversed at the end of computation"); output.dataFlags.Add("Function", "ENT", "*", "Score value is inversed at the end of computation"); output.dataFlags.Add("Function", "LNG", "*", "Score value is inversed at the end of computation"); output.dataFlags.Add("Function", "DST", "*", "Score value is inversed at the end of computation"); output.dataFlags.Add("Function", "Divergence", "DIV", "Promotes diversity, items are at greater distance in vector space"); output.dataFlags.Add("Function", "Convergence", "CON", "Promotes convergence of the items, ones at smaller distance are promoted"); output.dataFlags.Add("Function", "Variance", "VAR", "Promotes variance of the items"); output.dataFlags.Add("Function", "Offset", "OFF", "Measures difference between similarity with true label and average similariy with other labels"); //output.dataColumns.Add("Render", "", "Source of the rendered"); //output.dataColumns.Add("Scope", "", "Scope of analysis, performed by the function"); //output.dataColumns.Add("Function", "", "Function performing the analysus"); //output.dataColumns.Add("WeightModel", "", "Model used for term weighting"); return(output); }
/// <summary> /// Constructs the table. /// </summary> /// <param name="name">The name.</param> /// <param name="description">The description.</param> /// <param name="style">The style.</param> /// <returns></returns> public DataTable ConstructTable(String _name, String description, classificationReportTableMode mode = classificationReportTableMode.fullTable) { //_name = _name.or(name); var style = local_style; DataTable table = new DataTable(_name); table.SetDescription(description); DataColumn dc_runGroup = null; if (mode == classificationReportTableMode.fullTable) { dc_runGroup = table.Columns.Add("Group"); dc_runGroup.SetDefaultBackground(Color.LightBlue); dc_runGroup.SetDesc("Experiment subgroup"); dc_runGroup.SetGroup("ID"); dc_runGroup.SetWidth(10); } //, "Run Name", "N", typeof(string), dataPointImportance.normal, "", "Run group"); DataColumn dc_runName = table.Columns.Add("RunName"); //.AddColumn("RunName", "Run Name", "N", typeof(string), dataPointImportance.normal); dc_runName.SetDefaultBackground(Color.LightSkyBlue); dc_runName.SetGroup("ID"); dc_runName.SetDesc("Custom identification of experiment setup"); dc_runName.SetWidth(20); foreach (Int32 size in sizes) { DataColumn dc = table.Columns.Add("F" + size, typeof(Double)); //, "F1 measure at " + size + " pages per site", "F1", typeof(double), dataPointImportance.normal, "F5", "F1 at " + size); dc.SetHeading(style.valueToUse.key + " for " + size); if (size == maxSize) { dc.SetDesc(style.valueToUse.key + " at max size"); dc.SetImportance(dataPointImportance.important); } else { dc.SetDesc(style.valueToUse.key + " at " + size + " selected"); } dc.SetFormat(style.ScoreFormat); dc.SetUnit(size.ToString()); dc.SetLetter("FS_l=" + size.ToString()); dc.SetGroup(style.valueToUse.key + " values"); dc.SetDefaultBackground(Color.LightGray); dc.SetWidth(10); } DataColumn dc_meanDistance = null; DataColumn dc_meanScore = null; DataColumn dc_maxValue = null; if (mode == classificationReportTableMode.fullTable) { dc_meanScore = table.Columns.Add("Mean", typeof(Double)); dc_meanDistance = table.Columns.Add("Distance", typeof(Double)); dc_meanDistance.SetDefaultBackground(Color.LightBlue); dc_meanDistance.SetFormat(style.ScoreFormat); dc_meanDistance.SetGroup(style.valueToUse.key + " values"); dc_meanDistance.SetWidth(10).SetLetter("∆F_SL").SetDesc("Effectiveness of the selection method (∆FSL), at given selection limit (SL), is expressed as difference between the F1-score (FSL) and the reference score produced without page selection (FREF) method. Mean distance ∆FSL for SL=[1, 10] is adopted as the evaluation criterion."); dc_meanDistance.SetImportance(dataPointImportance.important); dc_meanScore.SetDefaultBackground(Color.LightBlue); dc_meanScore.SetFormat(style.ScoreFormat); dc_meanScore.SetGroup(style.valueToUse.key + " values").SetWidth(10); dc_meanScore.SetImportance(dataPointImportance.important); dc_maxValue = table.Columns.Add("Max", typeof(Double)); dc_maxValue.SetDefaultBackground(Color.LightBlue); dc_maxValue.SetFormat(style.ScoreFormat); dc_maxValue.SetGroup(style.valueToUse.key + " values").SetWidth(10); dc_maxValue.SetLetter("max"); foreach (var dataColumn in style.dataColumns) { String ck = "DATA_" + dataColumn.key; if (!table.Columns.Contains(ck)) { var cln = table.Columns.Add(ck, typeof(String)); cln.SetHeading(dataColumn.key); cln.SetGroup("Expanded data"); cln.SetDefaultBackground(Color.Orange); cln.SetDesc(dataColumn.description); table.SetAdditionalInfoEntry(dataColumn.key, dataColumn.value, dataColumn.description); } } } foreach (reportDataFlag flag in style.dataFlags.items) { table.SetAdditionalInfoEntry(flag.name, flag.replacement, flag.description); } DataColumn dc_timestamp_min = null; DataColumn dc_timestamp_max = null; DataColumn dc_comment = null; if (mode == classificationReportTableMode.fullTable) { dc_timestamp_min = table.Columns.Add("TimeMin", typeof(String)); //, "Time stamp", "c", typeof(string), dataPointImportance.normal, "", "Created"); dc_timestamp_min.SetWidth(20); dc_timestamp_min.SetDesc("Creation time of the oldest report for this runName"); dc_timestamp_max = table.Columns.Add("TimeMax", typeof(String)); //, "Time stamp", "c", typeof(string), dataPointImportance.normal, "", "Created"); dc_timestamp_max.SetWidth(20); dc_timestamp_max.SetDesc("Creation time of the newest report for this runName"); dc_comment = table.Columns.Add("Comment"); //, "Comment", "i", typeof(string), dataPointImportance.normal); dc_comment.SetWidth(150); dc_comment.SetDesc("Report comment, extracted from one of the reports with the same run name"); } ExperimentRunNameGroups deployedGroups = style.groups.DeployForRunNames(runNames, "FH", "PageContent", "LinkContent"); foreach (var runGroup in deployedGroups.groups) { table.SetAdditionalInfoEntry(runGroup.name, runGroup.runNames.toCsvInLine(), runGroup.description); foreach (var runName in runGroup.runNames) { DataRow rw = table.NewRow(); if (dc_runGroup != null) { rw[dc_runGroup] = runGroup.name; } if (dc_runName != null) { rw[dc_runName] = runName; } DateTime filecreation_min = DateTime.MaxValue; DateTime filecreation_max = DateTime.MinValue; classificationReportExpanded rep = null; foreach (Int32 size in sizes) { String dc_id = "F" + size; Double val = 0; if (F1RunNameVsSize[runName].ContainsKey(size)) { rep = F1RunNameVsSize[runName][size]; val = rep.GetReportValue(style.valueToUse.key); //val = rep.F1measure; if (rep.filecreation < filecreation_min) { filecreation_min = rep.filecreation; } if (rep.filecreation > filecreation_max) { filecreation_max = rep.filecreation; } } if (table.Columns.Contains(dc_id)) { rw[dc_id] = val; } } if (dc_maxValue != null) { rw[dc_maxValue] = F1RunNameVsMax[runName]; } if (dc_meanDistance != null) { rw[dc_meanDistance] = F1RunNameVsAverageDistanceFromMax[runName]; } if (dc_meanScore != null) { rw[dc_meanScore] = F1RunNameVsAverage[runName]; } if (dc_timestamp_min != null) { rw[dc_timestamp_min] = filecreation_min.ToString(); } if (dc_timestamp_max != null) { rw[dc_timestamp_max] = filecreation_max.ToString(); } if (dc_comment != null) { rw[dc_comment] = rep.Comment; } var dataDict = rep.data.GetDictionary(); foreach (var dataColumn in style.dataColumns) { if (dataDict.ContainsKey(dataColumn.key)) { if (table.Columns.Contains("DATA_" + dataColumn.key)) { rw["DATA_" + dataColumn.key] = dataDict[dataColumn.key].value; } } } table.Rows.Add(rw); } } if (dc_maxValue != null) { table.GetRowMetaSet().SetStyleForRowsWithValue <Double>(DataRowInReportTypeEnum.dataHighlightA, dc_maxValue.ColumnName, max_total); } table.AddExtra("Report space: " + name); table.AddExtra("Overall maximum: " + max_total.ToString("F5")); table.AddExtra("Total reports loaded: " + runNames.Count); table.AddExtra("Group: " + _name); table.AddExtra("Description: " + description); table.AddExtra("Dataset: " + dataset); return(table); }
/// <summary> /// Returns clone filtered for matched runNames /// </summary> /// <param name="runNames">The run names.</param> /// <returns></returns> public ExperimentRunNameGroups DeployForRunNames(IEnumerable <String> runNames, params String[] runNameSuffixes) { ExperimentRunNameGroups output = new ExperimentRunNameGroups(); List <String> names = runNames.ToList(); List <String> suffixes = new List <string>(); if (runNameSuffixes != null) { suffixes.AddRange(runNameSuffixes); } List <String> assigned = new List <string>(); foreach (ExperimentRunNameGroup group in groups) { List <String> in_group = new List <string>(); foreach (var in_name in names) { if (group.runNames.Contains(in_name)) { in_group.Add(in_name); } else { foreach (String suffix in suffixes) { if (in_name.EndsWith(suffix)) { String in_name_nosuffix = in_name.Substring(0, in_name.Length - suffix.Length); if (group.runNames.Contains(in_name_nosuffix)) { in_group.Add(in_name); } } } } } if (in_group.Any()) { assigned.AddRange(in_group); output.groups.Add(group.DeployForRunNames(in_group)); } //foreach (var nm in group.runNames) //{ //} //List<string> runs = names.Intersect(group.runNames).ToList(); //if (runs.Any()) //{ // assigned.AddRange(runs); // output.groups.Add(group.DeployForRunNames(runs)); //} } foreach (String ass in assigned) { names.Remove(ass); } if (names.Any()) { ExperimentRunNameGroup otherGroup = new ExperimentRunNameGroup(); otherGroup.name = "Other"; otherGroup.description = "Ungrouped experiment runs"; otherGroup.runNames.AddRange(names); otherGroup.Priority = -100; output.groups.Add(otherGroup); } output.groups.OrderByDescending(x => x.Priority); return(output); }