/// <summary> /// Makes ranked table with term frequencies /// </summary> /// <param name="terms">The terms.</param> /// <param name="name">The name.</param> /// <param name="description">The description.</param> /// <param name="limit">The limit.</param> /// <returns></returns> public static DataTable MakeTable(this TokenDictionary terms, string name, string description, Int32 limit = 1000) { DataTable table = new DataTable(); table.SetTitle(name); table.SetDescription(description); table.SetAdditionalInfoEntry("Dictinct terms", terms.Count, "Total distinct terms in the dictionary"); table.SetAdditionalInfoEntry("Max frequency", terms.GetMaxFrequency(), "Highest frequency"); table.SetAdditionalInfoEntry("Total tokens", terms.GetSumFrequency(), "Total number of tokens extracted from the corpus/document, i.e. sum of all frequencies"); DataColumn column_rank = table.Add("Rank", "Rank by frequency", "R", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(20); DataColumn column_id = table.Add("ID", "Token ID", "id", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(20); DataColumn column_token = table.Add("Token", "Token", "t", typeof(String), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(50); DataColumn column_freq = table.Add("Frequency", "Absolute number of token occurrences in the corpus/document", "TF", typeof(Int32), imbSCI.Core.enums.dataPointImportance.normal).SetWidth(30); // var tokens = terms.GetTokens(); var list = terms.GetRankedTokenFrequency(limit); Int32 c = 1; foreach (var pair in list) { var dr = table.NewRow(); dr[column_rank] = c; dr[column_id] = terms.GetTokenID(pair.Key); dr[column_token] = pair.Key; dr[column_freq] = pair.Value; c++; table.Rows.Add(dr); } if (terms.Count > limit) { table.AddExtra("Table contains only top [" + limit + "] entries, out of [" + terms.Count + "] enumerated in the dictionary"); } return(table); }
public static String MakeRankedList(this TokenDictionary terms, string name, string description, Int32 limit = 1000, String filepath = "") { StringBuilder sb = new StringBuilder(); var list = terms.GetRankedTokenFrequency(limit); Int32 c = 1; sb.AppendLine("Name: " + name); sb.AppendLine("Description: " + description); sb.AppendLine("Distinct terms: " + terms.Count); if (limit > 0) { sb.AppendLine("Showing top: " + limit); } sb.AppendLine("# \t\t ID \t\t KEY \t\t TKN \t\t\t FREQ"); foreach (var pair in list) { sb.AppendLine(c.ToString() + "\t\t" + terms.GetTokenID(pair.Key) + "\t\t" + pair.Key + "\t\t\t" + pair.Value); c++; if (limit > 0) { if (c > limit) { break; } } } if (!filepath.isNullOrEmpty()) { File.WriteAllText(filepath, sb.ToString()); } return(sb.ToString()); }