Пример #1
0
        /// <summary>
        /// Runs the test.
        /// </summary>
        public static void Test()
        {
            string db = "cosine_similarity_database.sdf";
            // Check if the file exists.
            bool dbExists = File.Exists(db);

            // Set up database.
            SqlCeArchivist archivist = new SqlCeArchivist(db);
            archivist.Open();

            // Seed only if the db did not exist.
            if (!dbExists)
            {
                // Seed database. Change the dataDir to the correct one.
                string dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data";
                DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100);

                // Seed the database with redundant news. Change the dataDir to the correct one.
                dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\TestProject\cosine_similarity_test";
                DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100);

                Console.WriteLine();
            }

            // Get the redundant news.
            List<NewsItem> redundantNews = archivist.GetNews(new NewsQuery()
                {
                    CategoryId = archivist.GetCategories().
                        Find(n => n.Name.Equals("redundant")).Id
                });

            WriteCosineSimilarity(archivist, redundantNews);

            archivist.Close();
        }
Пример #2
0
 public static void CuratorTestClassInitialize(TestContext testContext)
 {
     // Initialize the archivist.
     Archivist = new SqlCeArchivist("testDb");
     Archivist.Open();
     Archivist.TruncateData();
 }
Пример #3
0
        public static void InterestFilterTestClassInitialize(TestContext testContext)
        {
            // Initialize the archivist.
            Archivist = new SqlCeArchivist("testDb");
            Archivist.Open();
            Archivist.TruncateData();

            Filter = new RedundancyFilter();
        }
Пример #4
0
        public static void QuantityTestClassInitialize(TestContext testContext)
        {
            // Set the database connection
            DatabaseConnection = new SqlCeConnection(
                "Data Source=" + testContext.TestRunDirectory + "\\database.sdf");

            // Initialize the archivist.
            Archivist = new SqlCeArchivist(DatabaseConnection);
            Archivist.Open();
        }
Пример #5
0
        /// <summary>
        /// Static constructor for <c>Client</c>.
        /// </summary>
        static Client()
        {
            string path = String.Format(@"{0}\database.sdf",
                Directory.GetCurrentDirectory());
            Archivist = new SqlCeArchivist(
                Properties.Settings.Default.databaseConnectionString);
            Archivist.Open();

            // Adds CleanUp to be called on process exit.
            AppDomain.CurrentDomain.ProcessExit += CleanUp;
        }
Пример #6
0
 static void Main(string[] args)
 {
     SqlCeArchivist a = new SqlCeArchivist(@"smallDatabase.sdf");
     a.Open();
     //NewsQuery q = new NewsQuery();
     //q.Limit = 1;
     //a.GetTfIdfVector(a.GetNews(q)[0]);
     a.TruncateData();
     string dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data";
     DBSeeder.SeedDatabaseWithNews(a, dataDir, 1);
     //string newsSourcesDir = @"C:\Users\mkaloer\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\sources.json";
     //DBSeeder.SeedNewsSources(a, newsSourcesDir);
     a.Close();
     return;
 }
        /// <summary>
        /// The main test method.
        /// </summary>
        public static void Test()
        {
            SqlCeArchivist archivist = new SqlCeArchivist("db.sdf");
            archivist.Open();
            archivist.TruncateData();
            string dataDir = @"C:\Users\mads\Desktop\data";
            DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 500);

            // Mark sports category as interesting, business as uninteresting.
            List<Category> categories = archivist.GetCategories();
            archivist.MarkCategoryInteresting(categories.First(p => p.Name.Equals("sports")), true);
            archivist.MarkCategoryInteresting(categories.First(p => p.Name.Equals("business")), false);

            _random = new Random(42);

            // Generate lists of news items.
            List<NewsMaterial> interestingNews = new List<NewsMaterial>();
            for (int i = 0; i < 500; i++)
            {
                interestingNews.Add(GenerateNewsMaterial(true));
            }
            List<NewsMaterial> uninterestingNews = new List<NewsMaterial>();
            for (int i = 0; i < 500; i++)
            {
                uninterestingNews.Add(GenerateNewsMaterial(false));
            }
            // Add news to db and save ids.
            List<int> interestingNewsIds = archivist.AddNews(interestingNews);
            List<int> uninterestingNewsIds = archivist.AddNews(uninterestingNews);

            List<int> allNewsIds = new List<int>(interestingNewsIds);
            allNewsIds.AddRange(uninterestingNewsIds);

            // Get news to filter.
            NewsQuery query = new NewsQuery();
            query.Read = ReadStatus.Unread;
            List<NewsItem> news = archivist.GetNews(query);
            // Filter news.
            InterestFilter filter = new InterestFilter();
            news = filter.Filter(archivist, news);

            int correctCount = 0;
            int falseCount = 0;
            // Compare ids and count number of correct values.
            foreach (NewsItem item in news)
            {
                if (interestingNewsIds.Contains(item.Id))
                {
                    correctCount++;
                }
                else if (uninterestingNewsIds.Contains(item.Id))
                {
                    falseCount++;
                }
            }

            Console.WriteLine("Through filter: {0}", news.Count);
            Console.WriteLine("False positive count: {0}", falseCount);
            // Print result in console.
            Console.WriteLine("{0}/{1}={2}%", correctCount,
                interestingNews.Count,
                (double) correctCount / (interestingNews.Count) * 100.0);
            Console.ReadLine();
        }
Пример #8
0
        public void StoragePathConstructor2Test()
        {
            string storagePath = TestContext.TestRunDirectory + "\\d.sdf";
            string connectionString = "Data Source=" + storagePath;
            SqlCeConnection c = new SqlCeConnection(connectionString);
            SqlCeArchivist a = new SqlCeArchivist(c);

            Assert.AreEqual(a.StoragePath, storagePath);

            // Delete the database
            if (File.Exists(TestContext.TestRunDirectory + "\\d.sdf"))
            {
                File.Delete(TestContext.TestRunDirectory + "\\d.sdf");
            }
        }
Пример #9
0
        public void StoragePathConstructor1Test()
        {
            string storagePath = TestContext.TestRunDirectory + "\\d.sdf";
            SqlCeArchivist a = new SqlCeArchivist(storagePath);

            Assert.AreEqual(a.StoragePath, storagePath);

            // Delete the database
            if (File.Exists(TestContext.TestRunDirectory + "\\d.sdf"))
            {
                File.Delete(TestContext.TestRunDirectory + "\\d.sdf");
            }
        }
Пример #10
0
        public void StorageDeviceExistsTestNoStorageDevice()
        {
            SqlCeArchivist a = new SqlCeArchivist(
                TestContext.TestRunDirectory + "\\d.sdf");

            // Delete the database.
            if (File.Exists(TestContext.TestRunDirectory + "\\d.sdf"))
            {
                File.Delete(TestContext.TestRunDirectory + "\\d.sdf");
            }

            Assert.IsFalse(a.StorageDeviceExists());
        }
Пример #11
0
        public void SqlCeArcivistConstructorTestThrowsExpection()
        {
            SqlCeConnection c = null;

            bool exceptionThrown = false;

            try
            {
                SqlCeArchivist a = new SqlCeArchivist(c);

            }
            catch (ArgumentNullException)
            {
                exceptionThrown = true;
            }

            Assert.IsTrue(exceptionThrown,
                "An ArgumentNullException was not thrown.");
        }
        /// <summary>
        /// The main test method.
        /// </summary>
        public static void Test()
        {
            string db = "redundancy_perf.sdf";
            // Check if the file exists.
            bool dbExists = File.Exists(db);

            // Set up database.
            SqlCeArchivist archivist = new SqlCeArchivist(db);
            archivist.Open();

            // Seed only if the db did not exist.
            if (!dbExists)
            {
                // Seed database. Change the dataDir to the correct one.
                string dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data";
                DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100);

                // Seed the database with redundant news. Change the dataDir to the correct one.
                dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\TestProject\redundancy_perf_test";
                DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100);

                Console.WriteLine();
            }

            // Get the redundant news set.
            List<NewsItem> redundantNews = archivist.GetNews(new NewsQuery()
                {
                    CategoryId = archivist.GetCategories().
                        Find(n => n.Name.Equals("redundant")).Id
                });

            // Get some news not in the redundant set. They themselves might
            // include some redundant news, but shouldn't be so in relation
            // to the redundant news set.
            List<NewsItem> nonRedundantNews =
                archivist.GetNews(new NewsQuery() { Limit = 200 });

            int nonRedundantNewsCount = 100 - redundantNews.Count;

            // Find news not in the redundant news.
            nonRedundantNews = nonRedundantNews.FindAll(n => !redundantNews.Contains(n));
            nonRedundantNews.RemoveRange(
                nonRedundantNewsCount - 1,
                nonRedundantNews.Count - nonRedundantNewsCount);

            // Assemble all the news.
            List<NewsItem> allNews = new List<NewsItem>();
            foreach (NewsItem n in redundantNews)
            {
                allNews.Add(n);
            }
            foreach (NewsItem n in nonRedundantNews)
            {
                allNews.Add(n);
            }

            // Set all the news as unread.
            foreach (NewsItem n in allNews)
            {
                archivist.SetNewsReadStatus(n, false);
            }

            // Each list item contains a set of redundant news items.
            List<List<int>> redundantNewsIds = new List<List<int>>();
            redundantNewsIds.Add(new List<int>()
            {
                redundantNews.Find(n => n.Title.Equals("1")).Id,
                redundantNews.Find(n => n.Title.Equals("2")).Id
            });
            redundantNewsIds.Add(new List<int>()
            {
                redundantNews.Find(n => n.Title.Equals("3")).Id,
                redundantNews.Find(n => n.Title.Equals("4")).Id
            });
            redundantNewsIds.Add(new List<int>()
            {
                redundantNews.Find(n => n.Title.Equals("5")).Id,
                redundantNews.Find(n => n.Title.Equals("6")).Id
            });
            redundantNewsIds.Add(new List<int>()
            {
                redundantNews.Find(n => n.Title.Equals("7")).Id,
                redundantNews.Find(n => n.Title.Equals("8")).Id,
                redundantNews.Find(n => n.Title.Equals("9")).Id
            });
            redundantNewsIds.Add(new List<int>()
            {
                redundantNews.Find(n => n.Title.Equals("10")).Id,
                redundantNews.Find(n => n.Title.Equals("11")).Id
            });
            redundantNewsIds.Add(new List<int>()
            {
                redundantNews.Find(n => n.Title.Equals("12")).Id,
                redundantNews.Find(n => n.Title.Equals("13")).Id
            });

            // Filter the news.
            RedundancyFilter filter = new RedundancyFilter();
            List<NewsItem> result = filter.Filter(archivist, allNews);

            // Check that the result filters redundant news.
            int correctCount = 0;
            int falsePositiveCount = 0;

            foreach(List<int> set in redundantNewsIds)
            {
                // Count number of news that went through the filter.
                int newsCount = 0;
                foreach (int id in set)
                {
                    if (result.Exists(p => p.Id == id))
                    {
                        newsCount++;
                    }
                }

                // Count correct count and false positive count.
                correctCount += set.Count - newsCount;
                falsePositiveCount += newsCount != 1 ? 1 : 0;
            }

            // Calculate the expected correct count.
            int expectedCorrectCount = 0;
            foreach (List<int> s in redundantNewsIds)
            {
                expectedCorrectCount += s.Count - 1;
            }

            // Print the results.
            Console.WriteLine("Redundant news removed: {0}/{1}, false positives: {2}",
                correctCount,
                expectedCorrectCount,
                falsePositiveCount);

            archivist.Close();
        }
Пример #13
0
        public static void AggregatorTestInitialize(TestContext testContext)
        {
            // Set the database connection
            DatabaseConnection = new SqlCeConnection(
                "Data Source=" + testContext.TestRunDirectory + "\\database.sdf");

            // Initialize the archivist.
            Archivist = new SqlCeArchivist(DatabaseConnection);
            Archivist.Open();

            AggregatorClass = new PrivateType(typeof(Aggregator));
        }
Пример #14
0
        /// <summary>
        /// The main test method. Call this to run the test.
        /// </summary>
        public static void Test()
        {
            SqlCeArchivist a = new SqlCeArchivist(@"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\Experiment\Database.sdf");
            a.Open();

            // Uncomment to seed database. This should be done on the first test run.
            // Replace with dir to data.
            // string dataDir = @"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data\";
            // DBSeeder.SeedDatabaseWithNews(a, dataDir, 3000);

            List<Category> categories = a.GetCategories();

            // Training data listed by category.
            Dictionary<int, List<NewsItem>> trainingData =
                new Dictionary<int, List<NewsItem>>();
            List<NewsItem> testData = new List<NewsItem>();

            // Store results here:
            Dictionary<int, TestResult[]> results =
                new Dictionary<int, TestResult[]>();

            NewsQuery query = new NewsQuery();
            foreach (Category category in categories)
            {
                // Now find testData by setting an offset and a new limit.
                query.Offset = 10 * NEWS_COUNT_INTERVAL;
                query.Limit = 1000;
                query.CategoryId = category.Id;
                testData.AddRange(a.GetNews(query));

                results.Add(category.Id, new TestResult[10]);
            }

            for (int i = 0; i < 10; i++)
            {
                List<CountedCategory> categoryDescriptions = new List<CountedCategory>();

                foreach (Category category in categories)
                {
                    // First find training data.
                    query.Offset = i * NEWS_COUNT_INTERVAL;
                    query.Limit = NEWS_COUNT_INTERVAL;

                    query.CategoryId = category.Id;

                    if (!trainingData.ContainsKey(category.Id))
                    {
                        trainingData.Add(category.Id, new List<NewsItem>());
                    }
                    // Add to list
                    trainingData[category.Id].AddRange(a.GetNews(query));

                    // Save as CountedCategory.
                    CountedCategory cat = new CountedCategory();
                    cat.DocumentCount = trainingData[category.Id].Count;
                    cat.Category = category;

                    // Add all term frequencies.
                    foreach (NewsItem news in trainingData[category.Id])
                    {
                        foreach (Term t in news.Terms)
                        {
                            if (!cat.Terms.ContainsKey(t.TermName))
                            {
                                cat.Terms[t.TermName] = 0;
                            }
                            cat.Terms[t.TermName] += t.Frequency;
                        }
                    }
                    categoryDescriptions.Add(cat);

                }

                // Now test classifier for each test data item.
                foreach (NewsItem item in testData)
                {
                    List<string> terms = new List<string>();
                    // Add all terms to list (all occurrences).
                    foreach (Term t in item.Terms)
                    {
                        for (int j = 0; j < t.Frequency; j++)
                        {
                            terms.Add(t.TermName);
                        }
                    }

                    if (results[item.Category.Id][i] == null)
                    {
                        results[item.Category.Id][i] = new TestResult(0, 0);
                    }

                    // Classify and save result.
                    if (TermUtils.DetermineCategory(
                        terms, categoryDescriptions).Id == item.Category.Id)
                    {
                        results[item.Category.Id][i].CorrectCount += 1;
                    }

                    results[item.Category.Id][i].Count += 1;
                }

            }
            a.Close();

            // Write to CSV.
            StringBuilder csv = new StringBuilder();
            // Write headers.
            csv.Append("data-count;");
            foreach(Category c in categories)
            {
                csv.Append(c.Name);
                csv.Append(";");
            }
            csv.AppendLine("total");

            // Iterate through each row.
            for(int i = 0; i < 10; i++)
            {
                // Write data-count column.
                csv.Append(((i + 1) * NEWS_COUNT_INTERVAL).ToString());
                csv.Append(";");

                int correctSum = 0;
                int countSum = 0;
                // Print each category.
                foreach (KeyValuePair<int, TestResult[]> result in results)
                {
                    // Calculate average + add to string.
                    csv.Append(
                        (result.Value[i].CorrectCount /
                        (double)result.Value[i].Count * 100.0).ToString());
                    csv.Append(";");
                    correctSum += result.Value[i].CorrectCount;
                    countSum += result.Value[i].Count;
                }
                // Write total average
                csv.AppendLine(
                        (correctSum /
                        (double)countSum * 100.0).ToString());
            }

            // Write to CSV.
            File.WriteAllText("output.csv", csv.ToString());
        }