Example #1
0
        static void Main(string[] args)
        {
            Console.Title = "Quote Generator by Mike";
            string url = "https://www.goodreads.com/quotes";

            Console.WriteLine("Generate quotes by keyword (separate with spaces/commas): ");
            string keywords = Console.ReadLine();

            Console.WriteLine("\nEnter the page to query.\nTo get a range of pages, enter the start and end numbers of the range, separated by commas: ");
            string numOfPagesString = Console.ReadLine();

            PageAndUrl pageAndUrl = new PageAndUrl(url);

            UserInput.GetPages(pageAndUrl, numOfPagesString);
            UserInput.GetUrl(pageAndUrl, keywords);

            Scraper scraper = new Scraper();

            stopwatch.Start();
            scraper.PageScraperAsync(pageAndUrl).Wait();
            stopwatch.Stop();

            Console.WriteLine($"\nFinished. Time Elapsed: {stopwatch.Elapsed}");
            Console.ReadKey();
        }
Example #2
0
        public static void GetUrl(PageAndUrl pageAndUrl, string keywords)
        {
            StringBuilder newUrl = new StringBuilder();

            newUrl.Append(pageAndUrl.url);

            // Getting URL Stem
            if (!string.IsNullOrEmpty(keywords))
            {
                string[] keywordList   = keywords.Split(delim, StringSplitOptions.RemoveEmptyEntries);
                int      numOfKeywords = keywordList.Length;

                newUrl.Append($"/tag?id={keywordList[0]}");

                if (numOfKeywords > 1)
                {
                    for (int i = 1; i < numOfKeywords; i++)
                    {
                        newUrl.Append($"+{keywordList[i]}");
                    }
                }
            }
            newUrl.Append("&page=");
            pageAndUrl.url = newUrl.ToString();
        }
Example #3
0
        public async Task PageScraperAsync(PageAndUrl pageAndUrl)
        {
            #region Asyncronously loads web pages
            if (pageAndUrl.pages.Count == 1)
            {
                int startPage;

                if (pageAndUrl.pages[0] == 1)
                {
                    startPage = pageAndUrl.pages[0];
                }
                else
                {
                    startPage = await MaxPossiblePageCheck(pageAndUrl.url, pageAndUrl.pages[0]);
                }

                string urlPage = pageAndUrl.url + $"{startPage}&utf8=✓";
                htmlDocsList.Add(Task.Run(() => web.LoadFromWebAsync(urlPage)));
            }
            else
            {
                int startPage = pageAndUrl.pages[0];
                int endPage   = await MaxPossiblePageCheck(pageAndUrl.url, pageAndUrl.pages[1]);

                for (int i = startPage; i <= endPage; i++)
                {
                    htmlDocsList.Add(Task.Run(() => web.LoadFromWebAsync(pageAndUrl.url + $"{i}&utf8=✓")));
                }
            }

            var allHtmlDocuments = await Task.WhenAll(htmlDocsList);

            Console.WriteLine("\nWebpages Downloaded.\n");
            #endregion

            #region Exatracts quote and author html nodes from each page
            foreach (HtmlDocument document in allHtmlDocuments)
            {
                QuoteAndAuthorNodes.Add(GetQuoteAndAuthorNode(document));
            }

            QuoteHtmlData[] QuoteAuthorNodesArray = QuoteAndAuthorNodes.ToArray();
            Console.WriteLine("Nodes extracted.\n");
            #endregion

            #region Stores quotes & authors into quoteData
            foreach (QuoteHtmlData node in QuoteAuthorNodesArray)
            {
                GetQuotesAndAuthorsText(node);
            }

            Console.WriteLine("Quotes and authors parsed.\n");
            #endregion
        }
Example #4
0
        public static void GetPages(PageAndUrl pageAndUrl, string numOfPagesString)
        {
            numOfPagesString = PageInputNullCheck(numOfPagesString);

            string[]      pageRangeString = numOfPagesString.Split(stringSplit, StringSplitOptions.RemoveEmptyEntries);
            List <string> pageRange       = new List <string>()
            {
                pageRangeString[0]
            };

            int pageRangeStrlength = pageRangeString.Length;

            if (pageRangeStrlength > 1)
            {
                for (int i = 0; i < pageRangeStrlength; i++)
                {
                    if (!pageRange[0].Equals(pageRangeString[i]))
                    {
                        pageRange.Add(pageRangeString[i]);
                        break;
                    }
                }
            }

            List <int> pages = pageRange.ConvertAll(x => Convert.ToInt32(x));

            for (int i = 0; i < pages.Count; i++)
            {
                if (pages[i] <= 0)
                {
                    pages[i] = 1;
                }
            }

            if (pages.Count > 1 && pages[1] < pages[0])
            {
                SwapPages(pages);
            }

            pageAndUrl.pages = pages.Distinct().ToList();
        }