public void CategoryTestProperty() { Category c = new Category(2,"foo"); CountedCategory cc = new CountedCategory(c, 0, null); Assert.AreEqual(c, cc.Category); }
public void EqualsTestEqualsOperator() { CountedCategory o1 = new CountedCategory( new Category(2, "foo"), 42, new Dictionary<string, int>()); CountedCategory o2 = new CountedCategory( new Category(2, "foo"), 42, new Dictionary<string, int>()); Assert.IsTrue(o1 == o2); }
public void EqualsTestInequality() { CountedCategory o1 = new CountedCategory( new Category(2, "foo"), 42, new Dictionary<string, int>()); CountedCategory o2 = new CountedCategory( new Category(2, "bar"), 33, new Dictionary<string, int>()); Assert.IsFalse(o1.Equals(o2)); }
public void EqualsTestGetHasCodesEquals() { CountedCategory o1 = new CountedCategory(new Category(2, "foo"), 42, new Dictionary<string, int>()); CountedCategory o2 = new CountedCategory(new Category(2, "foo"), 42, new Dictionary<string, int>()); Assert.AreEqual(o1.GetHashCode(), o2.GetHashCode()); }
public void DocumentCountTestProperty() { CountedCategory cc = new CountedCategory(null, 42, null); Assert.AreEqual(42, cc.DocumentCount); }
public void TermsTestProperty() { Dictionary<string, int> terms = new Dictionary<string, int>(); terms.Add("foo", 42); CountedCategory cc = new CountedCategory(null, 0, terms); Assert.AreEqual(terms, cc.Terms); }
public void EqualsTestReferenceEquals() { CountedCategory o1 = new CountedCategory( new Category(2, "foo"), 42, new Dictionary<string, int>()); CountedCategory o2 = o1; Assert.IsTrue(o1.Equals(o2)); }
public void EqualsTestObjectToNullReferenceEqualsOperator() { CountedCategory o1 = new CountedCategory(new Category(2, "foo"), 42, new Dictionary<string, int>()); CountedCategory o2 = null; Assert.IsFalse(o1 == o2); }
public void EqualsTestObjectReferenceEquality() { CountedCategory o1 = new CountedCategory( new Category(2, "foo"), 42, new Dictionary<string, int>()); Object o2 = (Object) new CountedCategory( new Category(2, "foo"), 42, new Dictionary<string, int>()); Assert.IsTrue(o1.Equals(o2)); }
/// <summary> /// The main test method. Call this to run the test. /// </summary> public static void Test() { SqlCeArchivist a = new SqlCeArchivist(@"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\Experiment\Database.sdf"); a.Open(); // Uncomment to seed database. This should be done on the first test run. // Replace with dir to data. // string dataDir = @"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data\"; // DBSeeder.SeedDatabaseWithNews(a, dataDir, 3000); List<Category> categories = a.GetCategories(); // Training data listed by category. Dictionary<int, List<NewsItem>> trainingData = new Dictionary<int, List<NewsItem>>(); List<NewsItem> testData = new List<NewsItem>(); // Store results here: Dictionary<int, TestResult[]> results = new Dictionary<int, TestResult[]>(); NewsQuery query = new NewsQuery(); foreach (Category category in categories) { // Now find testData by setting an offset and a new limit. query.Offset = 10 * NEWS_COUNT_INTERVAL; query.Limit = 1000; query.CategoryId = category.Id; testData.AddRange(a.GetNews(query)); results.Add(category.Id, new TestResult[10]); } for (int i = 0; i < 10; i++) { List<CountedCategory> categoryDescriptions = new List<CountedCategory>(); foreach (Category category in categories) { // First find training data. query.Offset = i * NEWS_COUNT_INTERVAL; query.Limit = NEWS_COUNT_INTERVAL; query.CategoryId = category.Id; if (!trainingData.ContainsKey(category.Id)) { trainingData.Add(category.Id, new List<NewsItem>()); } // Add to list trainingData[category.Id].AddRange(a.GetNews(query)); // Save as CountedCategory. CountedCategory cat = new CountedCategory(); cat.DocumentCount = trainingData[category.Id].Count; cat.Category = category; // Add all term frequencies. foreach (NewsItem news in trainingData[category.Id]) { foreach (Term t in news.Terms) { if (!cat.Terms.ContainsKey(t.TermName)) { cat.Terms[t.TermName] = 0; } cat.Terms[t.TermName] += t.Frequency; } } categoryDescriptions.Add(cat); } // Now test classifier for each test data item. foreach (NewsItem item in testData) { List<string> terms = new List<string>(); // Add all terms to list (all occurrences). foreach (Term t in item.Terms) { for (int j = 0; j < t.Frequency; j++) { terms.Add(t.TermName); } } if (results[item.Category.Id][i] == null) { results[item.Category.Id][i] = new TestResult(0, 0); } // Classify and save result. if (TermUtils.DetermineCategory( terms, categoryDescriptions).Id == item.Category.Id) { results[item.Category.Id][i].CorrectCount += 1; } results[item.Category.Id][i].Count += 1; } } a.Close(); // Write to CSV. StringBuilder csv = new StringBuilder(); // Write headers. csv.Append("data-count;"); foreach(Category c in categories) { csv.Append(c.Name); csv.Append(";"); } csv.AppendLine("total"); // Iterate through each row. for(int i = 0; i < 10; i++) { // Write data-count column. csv.Append(((i + 1) * NEWS_COUNT_INTERVAL).ToString()); csv.Append(";"); int correctSum = 0; int countSum = 0; // Print each category. foreach (KeyValuePair<int, TestResult[]> result in results) { // Calculate average + add to string. csv.Append( (result.Value[i].CorrectCount / (double)result.Value[i].Count * 100.0).ToString()); csv.Append(";"); correctSum += result.Value[i].CorrectCount; countSum += result.Value[i].Count; } // Write total average csv.AppendLine( (correctSum / (double)countSum * 100.0).ToString()); } // Write to CSV. File.WriteAllText("output.csv", csv.ToString()); }
public void DetermineCategoryTestCorrectClassification() { List<string> terms = new List<string>() { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m" }; List<CountedCategory> categories = new List<CountedCategory>(); Dictionary<string, int> cat1Terms = new Dictionary<string,int>(); cat1Terms["a"] = 80; cat1Terms["d"] = 28; cat1Terms["f"] = 0; cat1Terms["k"] = 10; CountedCategory cat1 = new CountedCategory(new Category(0, "cat1"), 100, cat1Terms); categories.Add(cat1); Dictionary<string, int> cat2Terms = new Dictionary<string,int>(); cat2Terms["c"] = 10; cat2Terms["d"] = 12; cat2Terms["h"] = 9; cat2Terms["m"] = 11; CountedCategory cat2 = new CountedCategory(new Category(1, "cat2"), 80, cat2Terms); categories.Add(cat2); Dictionary<string, int> cat3Terms = new Dictionary<string,int>(); cat3Terms["b"] = 60; cat3Terms["i"] = 2; cat3Terms["l"] = 4; cat3Terms["m"] = 10; CountedCategory cat3 = new CountedCategory(new Category(2, "cat3"), 200, cat3Terms); categories.Add(cat3); Category result = TermUtils.DetermineCategory(terms, categories); // Category 1 should be the result (calculated manually). Assert.AreEqual(cat1.Category, result); }