/// <summary> /// Runs the test. /// </summary> public static void Test() { string db = "cosine_similarity_database.sdf"; // Check if the file exists. bool dbExists = File.Exists(db); // Set up database. SqlCeArchivist archivist = new SqlCeArchivist(db); archivist.Open(); // Seed only if the db did not exist. if (!dbExists) { // Seed database. Change the dataDir to the correct one. string dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data"; DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100); // Seed the database with redundant news. Change the dataDir to the correct one. dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\TestProject\cosine_similarity_test"; DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100); Console.WriteLine(); } // Get the redundant news. List<NewsItem> redundantNews = archivist.GetNews(new NewsQuery() { CategoryId = archivist.GetCategories(). Find(n => n.Name.Equals("redundant")).Id }); WriteCosineSimilarity(archivist, redundantNews); archivist.Close(); }
public static void CuratorTestClassInitialize(TestContext testContext) { // Initialize the archivist. Archivist = new SqlCeArchivist("testDb"); Archivist.Open(); Archivist.TruncateData(); }
public static void InterestFilterTestClassInitialize(TestContext testContext) { // Initialize the archivist. Archivist = new SqlCeArchivist("testDb"); Archivist.Open(); Archivist.TruncateData(); Filter = new RedundancyFilter(); }
public static void QuantityTestClassInitialize(TestContext testContext) { // Set the database connection DatabaseConnection = new SqlCeConnection( "Data Source=" + testContext.TestRunDirectory + "\\database.sdf"); // Initialize the archivist. Archivist = new SqlCeArchivist(DatabaseConnection); Archivist.Open(); }
/// <summary> /// Static constructor for <c>Client</c>. /// </summary> static Client() { string path = String.Format(@"{0}\database.sdf", Directory.GetCurrentDirectory()); Archivist = new SqlCeArchivist( Properties.Settings.Default.databaseConnectionString); Archivist.Open(); // Adds CleanUp to be called on process exit. AppDomain.CurrentDomain.ProcessExit += CleanUp; }
static void Main(string[] args) { SqlCeArchivist a = new SqlCeArchivist(@"smallDatabase.sdf"); a.Open(); //NewsQuery q = new NewsQuery(); //q.Limit = 1; //a.GetTfIdfVector(a.GetNews(q)[0]); a.TruncateData(); string dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data"; DBSeeder.SeedDatabaseWithNews(a, dataDir, 1); //string newsSourcesDir = @"C:\Users\mkaloer\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\sources.json"; //DBSeeder.SeedNewsSources(a, newsSourcesDir); a.Close(); return; }
/// <summary> /// The main test method. /// </summary> public static void Test() { SqlCeArchivist archivist = new SqlCeArchivist("db.sdf"); archivist.Open(); archivist.TruncateData(); string dataDir = @"C:\Users\mads\Desktop\data"; DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 500); // Mark sports category as interesting, business as uninteresting. List<Category> categories = archivist.GetCategories(); archivist.MarkCategoryInteresting(categories.First(p => p.Name.Equals("sports")), true); archivist.MarkCategoryInteresting(categories.First(p => p.Name.Equals("business")), false); _random = new Random(42); // Generate lists of news items. List<NewsMaterial> interestingNews = new List<NewsMaterial>(); for (int i = 0; i < 500; i++) { interestingNews.Add(GenerateNewsMaterial(true)); } List<NewsMaterial> uninterestingNews = new List<NewsMaterial>(); for (int i = 0; i < 500; i++) { uninterestingNews.Add(GenerateNewsMaterial(false)); } // Add news to db and save ids. List<int> interestingNewsIds = archivist.AddNews(interestingNews); List<int> uninterestingNewsIds = archivist.AddNews(uninterestingNews); List<int> allNewsIds = new List<int>(interestingNewsIds); allNewsIds.AddRange(uninterestingNewsIds); // Get news to filter. NewsQuery query = new NewsQuery(); query.Read = ReadStatus.Unread; List<NewsItem> news = archivist.GetNews(query); // Filter news. InterestFilter filter = new InterestFilter(); news = filter.Filter(archivist, news); int correctCount = 0; int falseCount = 0; // Compare ids and count number of correct values. foreach (NewsItem item in news) { if (interestingNewsIds.Contains(item.Id)) { correctCount++; } else if (uninterestingNewsIds.Contains(item.Id)) { falseCount++; } } Console.WriteLine("Through filter: {0}", news.Count); Console.WriteLine("False positive count: {0}", falseCount); // Print result in console. Console.WriteLine("{0}/{1}={2}%", correctCount, interestingNews.Count, (double) correctCount / (interestingNews.Count) * 100.0); Console.ReadLine(); }
public void StoragePathConstructor2Test() { string storagePath = TestContext.TestRunDirectory + "\\d.sdf"; string connectionString = "Data Source=" + storagePath; SqlCeConnection c = new SqlCeConnection(connectionString); SqlCeArchivist a = new SqlCeArchivist(c); Assert.AreEqual(a.StoragePath, storagePath); // Delete the database if (File.Exists(TestContext.TestRunDirectory + "\\d.sdf")) { File.Delete(TestContext.TestRunDirectory + "\\d.sdf"); } }
public void StoragePathConstructor1Test() { string storagePath = TestContext.TestRunDirectory + "\\d.sdf"; SqlCeArchivist a = new SqlCeArchivist(storagePath); Assert.AreEqual(a.StoragePath, storagePath); // Delete the database if (File.Exists(TestContext.TestRunDirectory + "\\d.sdf")) { File.Delete(TestContext.TestRunDirectory + "\\d.sdf"); } }
public void StorageDeviceExistsTestNoStorageDevice() { SqlCeArchivist a = new SqlCeArchivist( TestContext.TestRunDirectory + "\\d.sdf"); // Delete the database. if (File.Exists(TestContext.TestRunDirectory + "\\d.sdf")) { File.Delete(TestContext.TestRunDirectory + "\\d.sdf"); } Assert.IsFalse(a.StorageDeviceExists()); }
public void SqlCeArcivistConstructorTestThrowsExpection() { SqlCeConnection c = null; bool exceptionThrown = false; try { SqlCeArchivist a = new SqlCeArchivist(c); } catch (ArgumentNullException) { exceptionThrown = true; } Assert.IsTrue(exceptionThrown, "An ArgumentNullException was not thrown."); }
/// <summary> /// The main test method. /// </summary> public static void Test() { string db = "redundancy_perf.sdf"; // Check if the file exists. bool dbExists = File.Exists(db); // Set up database. SqlCeArchivist archivist = new SqlCeArchivist(db); archivist.Open(); // Seed only if the db did not exist. if (!dbExists) { // Seed database. Change the dataDir to the correct one. string dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data"; DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100); // Seed the database with redundant news. Change the dataDir to the correct one. dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\TestProject\redundancy_perf_test"; DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100); Console.WriteLine(); } // Get the redundant news set. List<NewsItem> redundantNews = archivist.GetNews(new NewsQuery() { CategoryId = archivist.GetCategories(). Find(n => n.Name.Equals("redundant")).Id }); // Get some news not in the redundant set. They themselves might // include some redundant news, but shouldn't be so in relation // to the redundant news set. List<NewsItem> nonRedundantNews = archivist.GetNews(new NewsQuery() { Limit = 200 }); int nonRedundantNewsCount = 100 - redundantNews.Count; // Find news not in the redundant news. nonRedundantNews = nonRedundantNews.FindAll(n => !redundantNews.Contains(n)); nonRedundantNews.RemoveRange( nonRedundantNewsCount - 1, nonRedundantNews.Count - nonRedundantNewsCount); // Assemble all the news. List<NewsItem> allNews = new List<NewsItem>(); foreach (NewsItem n in redundantNews) { allNews.Add(n); } foreach (NewsItem n in nonRedundantNews) { allNews.Add(n); } // Set all the news as unread. foreach (NewsItem n in allNews) { archivist.SetNewsReadStatus(n, false); } // Each list item contains a set of redundant news items. List<List<int>> redundantNewsIds = new List<List<int>>(); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("1")).Id, redundantNews.Find(n => n.Title.Equals("2")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("3")).Id, redundantNews.Find(n => n.Title.Equals("4")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("5")).Id, redundantNews.Find(n => n.Title.Equals("6")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("7")).Id, redundantNews.Find(n => n.Title.Equals("8")).Id, redundantNews.Find(n => n.Title.Equals("9")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("10")).Id, redundantNews.Find(n => n.Title.Equals("11")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("12")).Id, redundantNews.Find(n => n.Title.Equals("13")).Id }); // Filter the news. RedundancyFilter filter = new RedundancyFilter(); List<NewsItem> result = filter.Filter(archivist, allNews); // Check that the result filters redundant news. int correctCount = 0; int falsePositiveCount = 0; foreach(List<int> set in redundantNewsIds) { // Count number of news that went through the filter. int newsCount = 0; foreach (int id in set) { if (result.Exists(p => p.Id == id)) { newsCount++; } } // Count correct count and false positive count. correctCount += set.Count - newsCount; falsePositiveCount += newsCount != 1 ? 1 : 0; } // Calculate the expected correct count. int expectedCorrectCount = 0; foreach (List<int> s in redundantNewsIds) { expectedCorrectCount += s.Count - 1; } // Print the results. Console.WriteLine("Redundant news removed: {0}/{1}, false positives: {2}", correctCount, expectedCorrectCount, falsePositiveCount); archivist.Close(); }
public static void AggregatorTestInitialize(TestContext testContext) { // Set the database connection DatabaseConnection = new SqlCeConnection( "Data Source=" + testContext.TestRunDirectory + "\\database.sdf"); // Initialize the archivist. Archivist = new SqlCeArchivist(DatabaseConnection); Archivist.Open(); AggregatorClass = new PrivateType(typeof(Aggregator)); }
/// <summary> /// The main test method. Call this to run the test. /// </summary> public static void Test() { SqlCeArchivist a = new SqlCeArchivist(@"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\Experiment\Database.sdf"); a.Open(); // Uncomment to seed database. This should be done on the first test run. // Replace with dir to data. // string dataDir = @"C:\Users\mads\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data\"; // DBSeeder.SeedDatabaseWithNews(a, dataDir, 3000); List<Category> categories = a.GetCategories(); // Training data listed by category. Dictionary<int, List<NewsItem>> trainingData = new Dictionary<int, List<NewsItem>>(); List<NewsItem> testData = new List<NewsItem>(); // Store results here: Dictionary<int, TestResult[]> results = new Dictionary<int, TestResult[]>(); NewsQuery query = new NewsQuery(); foreach (Category category in categories) { // Now find testData by setting an offset and a new limit. query.Offset = 10 * NEWS_COUNT_INTERVAL; query.Limit = 1000; query.CategoryId = category.Id; testData.AddRange(a.GetNews(query)); results.Add(category.Id, new TestResult[10]); } for (int i = 0; i < 10; i++) { List<CountedCategory> categoryDescriptions = new List<CountedCategory>(); foreach (Category category in categories) { // First find training data. query.Offset = i * NEWS_COUNT_INTERVAL; query.Limit = NEWS_COUNT_INTERVAL; query.CategoryId = category.Id; if (!trainingData.ContainsKey(category.Id)) { trainingData.Add(category.Id, new List<NewsItem>()); } // Add to list trainingData[category.Id].AddRange(a.GetNews(query)); // Save as CountedCategory. CountedCategory cat = new CountedCategory(); cat.DocumentCount = trainingData[category.Id].Count; cat.Category = category; // Add all term frequencies. foreach (NewsItem news in trainingData[category.Id]) { foreach (Term t in news.Terms) { if (!cat.Terms.ContainsKey(t.TermName)) { cat.Terms[t.TermName] = 0; } cat.Terms[t.TermName] += t.Frequency; } } categoryDescriptions.Add(cat); } // Now test classifier for each test data item. foreach (NewsItem item in testData) { List<string> terms = new List<string>(); // Add all terms to list (all occurrences). foreach (Term t in item.Terms) { for (int j = 0; j < t.Frequency; j++) { terms.Add(t.TermName); } } if (results[item.Category.Id][i] == null) { results[item.Category.Id][i] = new TestResult(0, 0); } // Classify and save result. if (TermUtils.DetermineCategory( terms, categoryDescriptions).Id == item.Category.Id) { results[item.Category.Id][i].CorrectCount += 1; } results[item.Category.Id][i].Count += 1; } } a.Close(); // Write to CSV. StringBuilder csv = new StringBuilder(); // Write headers. csv.Append("data-count;"); foreach(Category c in categories) { csv.Append(c.Name); csv.Append(";"); } csv.AppendLine("total"); // Iterate through each row. for(int i = 0; i < 10; i++) { // Write data-count column. csv.Append(((i + 1) * NEWS_COUNT_INTERVAL).ToString()); csv.Append(";"); int correctSum = 0; int countSum = 0; // Print each category. foreach (KeyValuePair<int, TestResult[]> result in results) { // Calculate average + add to string. csv.Append( (result.Value[i].CorrectCount / (double)result.Value[i].Count * 100.0).ToString()); csv.Append(";"); correctSum += result.Value[i].CorrectCount; countSum += result.Value[i].Count; } // Write total average csv.AppendLine( (correctSum / (double)countSum * 100.0).ToString()); } // Write to CSV. File.WriteAllText("output.csv", csv.ToString()); }