private void execCats() { int docId = Convert.ToInt32(ddlDocs.SelectedValue); int k = Convert.ToInt32(tbxK.Text); int c = Convert.ToInt32(tbxC.Text); d.CatsLoader cLoader = new d.CatsLoader(); d.DocCatsLoader dcLoader = new d.DocCatsLoader(cLoader); d.Tester tester = new d.Tester(new d.DocsLoader(), cLoader, dcLoader); Hashtable result = tester.GetNewCategories(docId, k, c); ArrayList arrRelevant = dcLoader.GetDocCategories(docId); Hashtable relevant = new Hashtable(); if (arrRelevant != null) { foreach (int catId in arrRelevant) { relevant.Add(catId, true); } } d.PerformanceCalculator pc = new d.PerformanceCalculator(result, relevant); StringBuilder sb = new StringBuilder(); sb.Append("<p><b>Newly assigned categories:</b>"); if (result.Count > 0) { sb.Append("<ul>"); IDictionaryEnumerator en = result.GetEnumerator(); while (en.MoveNext()) { int catId = Convert.ToInt32(en.Key); sb.AppendFormat("<li><a href='http://en.wikipedia.org/wiki/Category:{0}'>{0}</a>", cLoader.GetCategory(catId)); } sb.Append("</ul>"); } else { sb.Append("<p>none assigned"); } sb.Append("<p><b>Relevant categories:</b>"); if (relevant.Count > 0) { sb.Append("<ul>"); IDictionaryEnumerator en = relevant.GetEnumerator(); while (en.MoveNext()) { int catId = Convert.ToInt32(en.Key); sb.AppendFormat("<li><a href='http://en.wikipedia.org/wiki/Category:{0}'>{0}</a>", cLoader.GetCategory(catId)); } sb.Append("</ul>"); } else { sb.Append("<p>none relevant"); } sb.Append("<p><b>Evaluation:</b>"); sb.Append("<ul>"); sb.AppendFormat("<li>Precision: {0}", pc.Precision); sb.AppendFormat("<li>Recall: {0}", pc.Recall); sb.AppendFormat("<li>F-Measure: {0}", pc.FMeasure); sb.Append("</ul>"); ltrSimResults.Text = sb.ToString(); }
static void Main(string[] args) { AggregateTester at = new AggregateTester(0, 10, 1, 10, 100, 10, 5, 10, 1); at.Run(); Hashtable result = new Hashtable(); result.Add(1, true); result.Add(2, true); result.Add(3, true); Hashtable relevant = new Hashtable(); relevant.Add(1, true); relevant.Add(3, true); relevant.Add(5, true); relevant.Add(7, true); relevant.Add(8, true); d.PerformanceCalculator pc = new d.PerformanceCalculator(result, relevant); Console.WriteLine("Precision = " + pc.Precision); Console.WriteLine("Recall = " + pc.Recall); Console.WriteLine("FMeasure = " + pc.FMeasure); d.DocsLoader dl = new d.DocsLoader(); d.CatsLoader cl = new d.CatsLoader(); d.DocCatsLoader dc = new d.DocCatsLoader(cl); int docId = 1; ArrayList al = dc.GetDocCategories(docId); Console.WriteLine(dl.GetDocTitle(docId) + " has " + al.Count + " categories: "); foreach (int catId in al) { Console.WriteLine(" " + cl.GetCategory(catId)); } d.Index index = new d.Index(Helper.INDEX_PATH); d.DocTermItem[] dterms = index.DocTerms(0); SearchVS s = new SearchVS(Helper.INDEX_PATH); s.run(); i.DataLoader dal = new i.DataLoader(Helper.SOURCE_PATH); i.IndexBuilder ib = new i.IndexBuilder(dal, Helper.INDEX_PATH); ib.BuildIndex(); PorterStemmerAlgorithm.PorterStemmer ps = new PorterStemmerAlgorithm.PorterStemmer(); Console.WriteLine(ps.stemTerm("beautify")); TermFilter f = new TermFilter(); f.CreateNewTermsFile(); TermProcessor p = new TermProcessor(); p.CreateTermsFile(); TermDocsProcessor tdp = new TermDocsProcessor(); tdp.CreateTermDocsFile(); tdp.CreateTermDocsFile(); }
private bool isWikiCrap(int catId) { string cat = cLoader.GetCategory(catId).ToLower().Trim(); return(cat.IndexOf("articles") > -1 || cat.IndexOf("semi-protected") > -1); }