/// <summary> /// Curates undread news from the given <c>Archivist</c> instance based /// on a maximum reading time, a maximum individual item reading time /// and a limit. /// </summary> /// <param name="archivist"> /// The <c>Archivist</c> instance giving access to the news that /// should be curated. /// </param> /// <param name="limit"> /// The maximum number of news to return. /// </param> /// <param name="maxTime"> /// The maximum reading time for the curated news. /// </param> /// <param name="maxItemTime"> /// The maximum reading time per news item. /// </param> /// <returns> /// A list of unread, curated <c>NewsItem</c>s received from the /// <c>Archivist</c> instance. /// </returns> public static List<NewsItem> GetCuratedNews(Archivist archivist, int limit, int maxTime = -1, int maxItemTime = - 1) { // Get cached data and update if null. List<NewsItem> curatedNews = CachedNews; if (curatedNews == null) { // Update needed. NewsQuery query = new NewsQuery(); // Only fetch unread news. query.Read = ReadStatus.Unread; query.OrderDateDesc = true; query.Limit = 100; curatedNews = archivist.GetNews(query); // If no news was found then there's no need to filter them. if (curatedNews.Count == 0) { return curatedNews; } // Filter for interesting news. InterestFilter interestFilter = new InterestFilter(); curatedNews = interestFilter.Filter(archivist, curatedNews); // Filter redundancy. RedundancyFilter redundancyFilter = new RedundancyFilter(); curatedNews = redundancyFilter.Filter(archivist, curatedNews); // Update cache. CachedNews = curatedNews; } // Filter quantity. QuantityFilter quantityFilter = new QuantityFilter(limit, maxTime, maxItemTime); curatedNews = quantityFilter.Filter(archivist, curatedNews); // Return curated list. return curatedNews; }
/// <summary> /// The main test method. /// </summary> public static void Test() { string db = "redundancy_perf.sdf"; // Check if the file exists. bool dbExists = File.Exists(db); // Set up database. SqlCeArchivist archivist = new SqlCeArchivist(db); archivist.Open(); // Seed only if the db did not exist. if (!dbExists) { // Seed database. Change the dataDir to the correct one. string dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsTrainer\data"; DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100); // Seed the database with redundant news. Change the dataDir to the correct one. dataDir = @"C:\Users\Andreas Petersen\Documents\GitHub\NyhedsfilterP2\System\NewsFilter\TestProject\redundancy_perf_test"; DBSeeder.SeedDatabaseWithNews(archivist, dataDir, 100); Console.WriteLine(); } // Get the redundant news set. List<NewsItem> redundantNews = archivist.GetNews(new NewsQuery() { CategoryId = archivist.GetCategories(). Find(n => n.Name.Equals("redundant")).Id }); // Get some news not in the redundant set. They themselves might // include some redundant news, but shouldn't be so in relation // to the redundant news set. List<NewsItem> nonRedundantNews = archivist.GetNews(new NewsQuery() { Limit = 200 }); int nonRedundantNewsCount = 100 - redundantNews.Count; // Find news not in the redundant news. nonRedundantNews = nonRedundantNews.FindAll(n => !redundantNews.Contains(n)); nonRedundantNews.RemoveRange( nonRedundantNewsCount - 1, nonRedundantNews.Count - nonRedundantNewsCount); // Assemble all the news. List<NewsItem> allNews = new List<NewsItem>(); foreach (NewsItem n in redundantNews) { allNews.Add(n); } foreach (NewsItem n in nonRedundantNews) { allNews.Add(n); } // Set all the news as unread. foreach (NewsItem n in allNews) { archivist.SetNewsReadStatus(n, false); } // Each list item contains a set of redundant news items. List<List<int>> redundantNewsIds = new List<List<int>>(); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("1")).Id, redundantNews.Find(n => n.Title.Equals("2")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("3")).Id, redundantNews.Find(n => n.Title.Equals("4")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("5")).Id, redundantNews.Find(n => n.Title.Equals("6")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("7")).Id, redundantNews.Find(n => n.Title.Equals("8")).Id, redundantNews.Find(n => n.Title.Equals("9")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("10")).Id, redundantNews.Find(n => n.Title.Equals("11")).Id }); redundantNewsIds.Add(new List<int>() { redundantNews.Find(n => n.Title.Equals("12")).Id, redundantNews.Find(n => n.Title.Equals("13")).Id }); // Filter the news. RedundancyFilter filter = new RedundancyFilter(); List<NewsItem> result = filter.Filter(archivist, allNews); // Check that the result filters redundant news. int correctCount = 0; int falsePositiveCount = 0; foreach(List<int> set in redundantNewsIds) { // Count number of news that went through the filter. int newsCount = 0; foreach (int id in set) { if (result.Exists(p => p.Id == id)) { newsCount++; } } // Count correct count and false positive count. correctCount += set.Count - newsCount; falsePositiveCount += newsCount != 1 ? 1 : 0; } // Calculate the expected correct count. int expectedCorrectCount = 0; foreach (List<int> s in redundantNewsIds) { expectedCorrectCount += s.Count - 1; } // Print the results. Console.WriteLine("Redundant news removed: {0}/{1}, false positives: {2}", correctCount, expectedCorrectCount, falsePositiveCount); archivist.Close(); }