Example #1
0
        /// <summary>
        /// Seeds the data from JSON-files located in the given directory.
        /// The directory must contain folders named as the category of its
        /// contents.
        /// </summary>
        /// <param name="archivist">
        /// The <c>Archivist</c> with database access
        /// </param>
        /// <param name="dataDir">
        /// The directory the data is located in.
        /// </param>
        /// <param name="perCategory">
        /// Number of <c>NewsItem</c>s per category to read.
        /// </param>
        /// <returns>
        /// The number of <c>NewsItem</c>s added.
        /// </returns>
        public static int SeedDatabaseWithNews(Archivist archivist, string dataDir,
            int perCategory = 3000)
        {
            int count = 0;
            int errorCount = 0;

            // Each category should be placed in its own directory.
            string[] categoryDirs = Directory.GetDirectories(dataDir);

            // Calculate number of files (3000 pr. category).
            // The number of files is limited to ensure that no category is more
            // likely than others.
            int totalFilesCount = categoryDirs.Length * 3000;

            foreach (string s in categoryDirs)
            {
                int categoryId = archivist.AddCategory(Path.GetFileName(s));

                List<NewsMaterial> newsBuffer = new List<NewsMaterial>();
                // Read 3000 files.
                string[] files = Directory.GetFiles(s);
                for (int i = 0; i < perCategory && i < files.Length; i++)
                {
                    string file = files[i];
                    string json = File.ReadAllText(file, UTF8Encoding.UTF8);
                    NewsMaterial news = ParseJson(json);
                    if (news != null)
                    {
                        newsBuffer.Add(news);
                        count++;
                    }
                    else
                    {
                        Console.WriteLine("Error seeding item.");
                        errorCount++;
                    }
                    Console.WriteLine("Loaded {0}/{1}", count + errorCount,
                        totalFilesCount);
                }

                // Write the news to the archivist.
                Stopwatch watch = new Stopwatch();
                watch.Start();
                archivist.AddNews(newsBuffer, categoryId, true);
                watch.Stop();
                Console.WriteLine("Time: {0}", watch.ElapsedMilliseconds);
                Console.WriteLine("Seeded {0}.", newsBuffer.Count);
            }

            return count;
        }
Example #2
0
        /// <summary>
        /// Aggregates news from the given list of <c>NewsSource</c>s.
        /// </summary>
        /// <param name="archivist">
        /// The <c>Archivist</c> of which to save aggregated news.
        /// </param>
        /// <param name="sources">
        /// The <c>NewsSource</c>s of which to load news from.
        /// </param>
        private static void AggregateNewsFromSources(Archivist archivist,
            List<NewsSource> sources)
        {
            List<NewsMaterial> newNewsMaterial = new List<NewsMaterial>();

            // Run ParseNewsItems on each news source.
            // In paralell
            Parallel.ForEach(sources, (source, state) =>
            {
                if (source != null)
                {
                    newNewsMaterial.AddRange(ParseNewsItems(source));
                }
            });

            foreach (NewsSource source in sources)
            {
                archivist.UpdateNewsSource(source);
            }

            archivist.AddNews(newNewsMaterial);
        }