//public void ScheduleWithThreads( // List<List<string>> urls) //{ // try // { // // Use ConcurrentQueue to enable safe enqueueing from multiple threads. // var exceptions = new ConcurrentQueue<Exception>(); // double maxExceptionToleranceNumber = urls.Count() * urls[0].Count * 0.2; // Parallel.ForEach(urls, async (urlsToProcessInThisThread) => // { // foreach (var uri in urlsToProcessInThisThread) // { // try // { // //Console.WriteLine($"Processing {uri} on thread {Thread.CurrentThread.ManagedThreadId}"); // Crawler crawler = new Crawler(); // var pageBooks = await crawler.ProcessAsync(uri); // if (pageBooks != null && pageBooks.Any()) // await _context.InsertManyAsync(pageBooks); // } // catch (Exception e) // { // exceptions.Enqueue(e); // if (exceptions.Count > maxExceptionToleranceNumber) throw new AggregateException(exceptions); // } // } // }); // } // catch (AggregateException ae) // { // var ignoredExceptions = new List<Exception>(); // // This is where you can choose which exceptions to handle. // foreach (var ex in ae.Flatten().InnerExceptions) // { // if (ex is XPathNotFoundException) // Console.WriteLine(ex.Message); // else // ignoredExceptions.Add(ex); // } // if (ignoredExceptions.Count > 0) throw new AggregateException(ignoredExceptions); // } //} public async Task <int> ScheduleSingleThread(List <string> urls) { int counter = 0; foreach (var url in urls) { try { //get the page books Crawler crawler = new Crawler(); Console.WriteLine($"Processing {url}"); Log.Information("Processing page {0}", url); var pageBooks = await crawler.ProcessAsync(url); if (pageBooks != null && pageBooks.Any()) { //download images for the book url //foreach (var book in pageBooks) //{ // //var firstImage = book.Images.FirstOrDefault(); // //if (firstImage != null) // //{ // // FileDownload fileDownload = new FileDownload(); // // firstImage.Content = await fileDownload.Download(firstImage.Url); // //} //} //finally save to db await _context.InsertManyAsync(pageBooks); Log.Information("Saved books to db"); counter++; } } catch (System.Exception ex) { Log.Error("{0}", ex); return(counter);// stop on first error } } return(counter); }
public async Task <int> ScheduleWithSemaphore(List <string> urls) { int counter = 0; using (var semaphore = new SemaphoreSlim(2, 2)) { List <Task> trackedTasks = new List <Task>(); foreach (var url in urls) { try { await semaphore.WaitAsync().ConfigureAwait(false); trackedTasks.Add(Task.Run(async() => { //get the page books Crawler crawler = new Crawler(); Console.WriteLine($"Processing {url}"); Log.Information("Processing page {0}", url); var pageBooks = await crawler.ProcessAsync(url); if (pageBooks != null && pageBooks.Any()) { //download images for the book url //foreach (var book in pageBooks) //{ // var firstImage = book.Media.FirstOrDefault(); // if (firstImage != null) // { // FileDownload fileDownload = new FileDownload(); // firstImage.Content = await fileDownload.Download(firstImage.Url); // } //} //finally save to db await _context.InsertManyAsync(pageBooks); Log.Information("Saved books to db"); counter++; } semaphore.Release(); })); } catch (Exception ex) { Log.Error("{0}", ex); semaphore.Release(); return(counter);// stop on first error } //finally //{ // semaphore.Release(); //} } await Task.WhenAll(trackedTasks); return(counter); } }