Ejemplo n.º 1
0
        public void CategoryTestProperty()
        {
            Category c = new Category(2,"foo");
            CountedCategory cc = new CountedCategory(c, 0, null);

            Assert.AreEqual(c, cc.Category);
        }
Ejemplo n.º 2
0
 public void EqualsTestEqualsOperator()
 {
     CountedCategory o1 = new CountedCategory(
         new Category(2, "foo"), 42, new Dictionary<string, int>());
     CountedCategory o2 = new CountedCategory(
         new Category(2, "foo"), 42, new Dictionary<string, int>());
     Assert.IsTrue(o1 == o2);
 }
Ejemplo n.º 3
0
 public void EqualsTestInequality()
 {
     CountedCategory o1 = new CountedCategory(
         new Category(2, "foo"), 42, new Dictionary<string, int>());
     CountedCategory o2 = new CountedCategory(
         new Category(2, "bar"), 33, new Dictionary<string, int>());
     Assert.IsFalse(o1.Equals(o2));
 }
Ejemplo n.º 4
0
 public void EqualsTestGetHasCodesEquals()
 {
     CountedCategory o1 = new CountedCategory(new Category(2, "foo"), 42, new Dictionary<string, int>());
     CountedCategory o2 = new CountedCategory(new Category(2, "foo"), 42, new Dictionary<string, int>());
     Assert.AreEqual(o1.GetHashCode(), o2.GetHashCode());
 }
Ejemplo n.º 5
0
        public void DocumentCountTestProperty()
        {
            CountedCategory cc = new CountedCategory(null, 42, null);

            Assert.AreEqual(42, cc.DocumentCount);
        }
Ejemplo n.º 6
0
        public void TermsTestProperty()
        {
            Dictionary<string, int> terms = new Dictionary<string, int>();
            terms.Add("foo", 42);
            CountedCategory cc = new CountedCategory(null, 0, terms);

            Assert.AreEqual(terms, cc.Terms);
        }
Ejemplo n.º 7
0
 public void EqualsTestReferenceEquals()
 {
     CountedCategory o1 = new CountedCategory(
         new Category(2, "foo"), 42, new Dictionary<string, int>());
     CountedCategory o2 = o1;
     Assert.IsTrue(o1.Equals(o2));
 }
Ejemplo n.º 8
0
 public void EqualsTestObjectToNullReferenceEqualsOperator()
 {
     CountedCategory o1 = new CountedCategory(new Category(2, "foo"), 42, new Dictionary<string, int>());
     CountedCategory o2 = null;
     Assert.IsFalse(o1 == o2);
 }
Ejemplo n.º 9
0
 public void EqualsTestObjectReferenceEquality()
 {
     CountedCategory o1 = new CountedCategory(
         new Category(2, "foo"), 42, new Dictionary<string, int>());
     Object o2 = (Object) new CountedCategory(
         new Category(2, "foo"), 42, new Dictionary<string, int>());
     Assert.IsTrue(o1.Equals(o2));
 }
Ejemplo n.º 10
0
        /// <summary>
        /// The main test method. Call this to run the test.
        /// </summary>
        public static void Test()
        {
            SqlCeArchivist a = new SqlCeArchivist(@"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\Experiment\Database.sdf");
            a.Open();

            // Uncomment to seed database. This should be done on the first test run.
            // Replace with dir to data.
            // string dataDir = @"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data\";
            // DBSeeder.SeedDatabaseWithNews(a, dataDir, 3000);

            List<Category> categories = a.GetCategories();

            // Training data listed by category.
            Dictionary<int, List<NewsItem>> trainingData =
                new Dictionary<int, List<NewsItem>>();
            List<NewsItem> testData = new List<NewsItem>();

            // Store results here:
            Dictionary<int, TestResult[]> results =
                new Dictionary<int, TestResult[]>();

            NewsQuery query = new NewsQuery();
            foreach (Category category in categories)
            {
                // Now find testData by setting an offset and a new limit.
                query.Offset = 10 * NEWS_COUNT_INTERVAL;
                query.Limit = 1000;
                query.CategoryId = category.Id;
                testData.AddRange(a.GetNews(query));

                results.Add(category.Id, new TestResult[10]);
            }

            for (int i = 0; i < 10; i++)
            {
                List<CountedCategory> categoryDescriptions = new List<CountedCategory>();

                foreach (Category category in categories)
                {
                    // First find training data.
                    query.Offset = i * NEWS_COUNT_INTERVAL;
                    query.Limit = NEWS_COUNT_INTERVAL;

                    query.CategoryId = category.Id;

                    if (!trainingData.ContainsKey(category.Id))
                    {
                        trainingData.Add(category.Id, new List<NewsItem>());
                    }
                    // Add to list
                    trainingData[category.Id].AddRange(a.GetNews(query));

                    // Save as CountedCategory.
                    CountedCategory cat = new CountedCategory();
                    cat.DocumentCount = trainingData[category.Id].Count;
                    cat.Category = category;

                    // Add all term frequencies.
                    foreach (NewsItem news in trainingData[category.Id])
                    {
                        foreach (Term t in news.Terms)
                        {
                            if (!cat.Terms.ContainsKey(t.TermName))
                            {
                                cat.Terms[t.TermName] = 0;
                            }
                            cat.Terms[t.TermName] += t.Frequency;
                        }
                    }
                    categoryDescriptions.Add(cat);

                }

                // Now test classifier for each test data item.
                foreach (NewsItem item in testData)
                {
                    List<string> terms = new List<string>();
                    // Add all terms to list (all occurrences).
                    foreach (Term t in item.Terms)
                    {
                        for (int j = 0; j < t.Frequency; j++)
                        {
                            terms.Add(t.TermName);
                        }
                    }

                    if (results[item.Category.Id][i] == null)
                    {
                        results[item.Category.Id][i] = new TestResult(0, 0);
                    }

                    // Classify and save result.
                    if (TermUtils.DetermineCategory(
                        terms, categoryDescriptions).Id == item.Category.Id)
                    {
                        results[item.Category.Id][i].CorrectCount += 1;
                    }

                    results[item.Category.Id][i].Count += 1;
                }

            }
            a.Close();

            // Write to CSV.
            StringBuilder csv = new StringBuilder();
            // Write headers.
            csv.Append("data-count;");
            foreach(Category c in categories)
            {
                csv.Append(c.Name);
                csv.Append(";");
            }
            csv.AppendLine("total");

            // Iterate through each row.
            for(int i = 0; i < 10; i++)
            {
                // Write data-count column.
                csv.Append(((i + 1) * NEWS_COUNT_INTERVAL).ToString());
                csv.Append(";");

                int correctSum = 0;
                int countSum = 0;
                // Print each category.
                foreach (KeyValuePair<int, TestResult[]> result in results)
                {
                    // Calculate average + add to string.
                    csv.Append(
                        (result.Value[i].CorrectCount /
                        (double)result.Value[i].Count * 100.0).ToString());
                    csv.Append(";");
                    correctSum += result.Value[i].CorrectCount;
                    countSum += result.Value[i].Count;
                }
                // Write total average
                csv.AppendLine(
                        (correctSum /
                        (double)countSum * 100.0).ToString());
            }

            // Write to CSV.
            File.WriteAllText("output.csv", csv.ToString());
        }
Ejemplo n.º 11
0
        public void DetermineCategoryTestCorrectClassification()
        {
            List<string> terms = new List<string>()
            {
                "a",
                "b",
                "c",
                "d",
                "e",
                "f",
                "g",
                "h",
                "i",
                "j",
                "k",
                "l",
                "m"
            };

            List<CountedCategory> categories = new List<CountedCategory>();
            Dictionary<string, int> cat1Terms = new Dictionary<string,int>();
            cat1Terms["a"] = 80;
            cat1Terms["d"] = 28;
            cat1Terms["f"] = 0;
            cat1Terms["k"] = 10;
            CountedCategory cat1 = new CountedCategory(new Category(0, "cat1"),
                100, cat1Terms);
            categories.Add(cat1);

            Dictionary<string, int> cat2Terms = new Dictionary<string,int>();
            cat2Terms["c"] = 10;
            cat2Terms["d"] = 12;
            cat2Terms["h"] = 9;
            cat2Terms["m"] = 11;
            CountedCategory cat2 = new CountedCategory(new Category(1, "cat2"),
                80, cat2Terms);
            categories.Add(cat2);

            Dictionary<string, int> cat3Terms = new Dictionary<string,int>();
            cat3Terms["b"] = 60;
            cat3Terms["i"] = 2;
            cat3Terms["l"] = 4;
            cat3Terms["m"] = 10;
            CountedCategory cat3 = new CountedCategory(new Category(2, "cat3"),
                200, cat3Terms);
            categories.Add(cat3);

            Category result = TermUtils.DetermineCategory(terms, categories);

            // Category 1 should be the result (calculated manually).
            Assert.AreEqual(cat1.Category, result);
        }