コード例 #1
0
        //public void ScheduleWithThreads(
        //    List<List<string>> urls)
        //{
        //    try
        //    {
        //        // Use ConcurrentQueue to enable safe enqueueing from multiple threads.
        //        var exceptions = new ConcurrentQueue<Exception>();
        //        double maxExceptionToleranceNumber = urls.Count() * urls[0].Count * 0.2;

        //        Parallel.ForEach(urls, async (urlsToProcessInThisThread) =>
        //        {
        //            foreach (var uri in urlsToProcessInThisThread)
        //            {
        //                try
        //                {
        //                    //Console.WriteLine($"Processing {uri} on thread {Thread.CurrentThread.ManagedThreadId}");
        //                    Crawler crawler = new Crawler();
        //                    var pageBooks = await crawler.ProcessAsync(uri);
        //                    if (pageBooks != null && pageBooks.Any())
        //                        await _context.InsertManyAsync(pageBooks);
        //                }
        //                catch (Exception e)
        //                {
        //                    exceptions.Enqueue(e);
        //                    if (exceptions.Count > maxExceptionToleranceNumber) throw new AggregateException(exceptions);
        //                }
        //            }
        //        });
        //    }
        //    catch (AggregateException ae)
        //    {
        //        var ignoredExceptions = new List<Exception>();
        //        // This is where you can choose which exceptions to handle.
        //        foreach (var ex in ae.Flatten().InnerExceptions)
        //        {
        //            if (ex is XPathNotFoundException)
        //                Console.WriteLine(ex.Message);
        //            else
        //                ignoredExceptions.Add(ex);
        //        }
        //        if (ignoredExceptions.Count > 0) throw new AggregateException(ignoredExceptions);
        //    }
        //}

        public async Task <int> ScheduleSingleThread(List <string> urls)
        {
            int counter = 0;

            foreach (var url in urls)
            {
                try
                {
                    //get the page books
                    Crawler crawler = new Crawler();

                    Console.WriteLine($"Processing {url}");
                    Log.Information("Processing page {0}", url);

                    var pageBooks = await crawler.ProcessAsync(url);

                    if (pageBooks != null && pageBooks.Any())
                    {
                        //download images for the book url
                        //foreach (var book in pageBooks)
                        //{
                        //    //var firstImage = book.Images.FirstOrDefault();
                        //    //if (firstImage != null)
                        //    //{
                        //    //    FileDownload fileDownload = new FileDownload();
                        //    //    firstImage.Content = await fileDownload.Download(firstImage.Url);
                        //    //}
                        //}

                        //finally save to db
                        await _context.InsertManyAsync(pageBooks);

                        Log.Information("Saved books to db");
                        counter++;
                    }
                }
                catch (System.Exception ex)
                {
                    Log.Error("{0}", ex);
                    return(counter);// stop on first error
                }
            }

            return(counter);
        }
コード例 #2
0
        public async Task <int> ScheduleWithSemaphore(List <string> urls)
        {
            int counter = 0;

            using (var semaphore = new SemaphoreSlim(2, 2))
            {
                List <Task> trackedTasks = new List <Task>();

                foreach (var url in urls)
                {
                    try
                    {
                        await semaphore.WaitAsync().ConfigureAwait(false);

                        trackedTasks.Add(Task.Run(async() =>
                        {
                            //get the page books
                            Crawler crawler = new Crawler();

                            Console.WriteLine($"Processing {url}");
                            Log.Information("Processing page {0}", url);
                            var pageBooks = await crawler.ProcessAsync(url);

                            if (pageBooks != null && pageBooks.Any())
                            {
                                //download images for the book url
                                //foreach (var book in pageBooks)
                                //{
                                //    var firstImage = book.Media.FirstOrDefault();
                                //    if (firstImage != null)
                                //    {
                                //        FileDownload fileDownload = new FileDownload();
                                //        firstImage.Content = await fileDownload.Download(firstImage.Url);
                                //    }
                                //}

                                //finally save to db
                                await _context.InsertManyAsync(pageBooks);
                                Log.Information("Saved books to db");
                                counter++;
                            }

                            semaphore.Release();
                        }));
                    }
                    catch (Exception ex)
                    {
                        Log.Error("{0}", ex);
                        semaphore.Release();
                        return(counter);// stop on first error
                    }
                    //finally
                    //{
                    //    semaphore.Release();
                    //}
                }

                await Task.WhenAll(trackedTasks);

                return(counter);
            }
        }