/// <summary> /// Seeds the data from JSON-files located in the given directory. /// The directory must contain folders named as the category of its /// contents. /// </summary> /// <param name="archivist"> /// The <c>Archivist</c> with database access /// </param> /// <param name="dataDir"> /// The directory the data is located in. /// </param> /// <param name="perCategory"> /// Number of <c>NewsItem</c>s per category to read. /// </param> /// <returns> /// The number of <c>NewsItem</c>s added. /// </returns> public static int SeedDatabaseWithNews(Archivist archivist, string dataDir, int perCategory = 3000) { int count = 0; int errorCount = 0; // Each category should be placed in its own directory. string[] categoryDirs = Directory.GetDirectories(dataDir); // Calculate number of files (3000 pr. category). // The number of files is limited to ensure that no category is more // likely than others. int totalFilesCount = categoryDirs.Length * 3000; foreach (string s in categoryDirs) { int categoryId = archivist.AddCategory(Path.GetFileName(s)); List<NewsMaterial> newsBuffer = new List<NewsMaterial>(); // Read 3000 files. string[] files = Directory.GetFiles(s); for (int i = 0; i < perCategory && i < files.Length; i++) { string file = files[i]; string json = File.ReadAllText(file, UTF8Encoding.UTF8); NewsMaterial news = ParseJson(json); if (news != null) { newsBuffer.Add(news); count++; } else { Console.WriteLine("Error seeding item."); errorCount++; } Console.WriteLine("Loaded {0}/{1}", count + errorCount, totalFilesCount); } // Write the news to the archivist. Stopwatch watch = new Stopwatch(); watch.Start(); archivist.AddNews(newsBuffer, categoryId, true); watch.Stop(); Console.WriteLine("Time: {0}", watch.ElapsedMilliseconds); Console.WriteLine("Seeded {0}.", newsBuffer.Count); } return count; }
/// <summary> /// Aggregates news from the given list of <c>NewsSource</c>s. /// </summary> /// <param name="archivist"> /// The <c>Archivist</c> of which to save aggregated news. /// </param> /// <param name="sources"> /// The <c>NewsSource</c>s of which to load news from. /// </param> private static void AggregateNewsFromSources(Archivist archivist, List<NewsSource> sources) { List<NewsMaterial> newNewsMaterial = new List<NewsMaterial>(); // Run ParseNewsItems on each news source. // In paralell Parallel.ForEach(sources, (source, state) => { if (source != null) { newNewsMaterial.AddRange(ParseNewsItems(source)); } }); foreach (NewsSource source in sources) { archivist.UpdateNewsSource(source); } archivist.AddNews(newNewsMaterial); }