Exemple #1
0
        private async void HandleDomLoaded(object sender, WebViewControlDOMContentLoadedEventArgs e)
        {
            // This invokes a JS method inside the webview to retrieve the text of the page.
            var content = await _headlessWebView.InvokeScriptAsync("eval", new[] { "document.body.innerText" });

            //invoke processed method with the parsed text from the webview.
            PageProcessed?.Invoke(content);
        }
Exemple #2
0
        public void Run(Site site)
        {
            if (sites.Contains(site) == false)
            {
                site.Log("Site is not registered in Scraper List!");
                return;
            }

            Queue <RawPage>   rawPages = new Queue <RawPage>(); // Setup for transfer of data between each of the classes
            List <NodeResult> results  = new List <NodeResult>();

            site.Status    = SiteStatus.Downloading; // Set site status
            site.SiteStart = DateTime.Now;

            foreach (PageLayout page in site.Pages.Values)
            {
                site.Log("Downloading " + site.URL + "...", LogType.Downloader);

                DownloadResult result = downloadManager.Next(new Uri(page.URL + page.Path), page.SearchElement, page.JSExecution, page.XPathFilter, page.PageDelay);
                // Download each page and store it,

                if (result.Status.HasFlag(DownloadStatus.ErrorOccurred)) // Error checking if any errors occured let the user know and log it
                {
                    site.Log("Error occurred in " + site.URL, LogType.Downloader);
                }

                if (result.Status.HasFlag(DownloadStatus.Failed))
                {
                    site.Log("Failed to download " + site.URL + " skipped..", LogType.Downloader);
                    continue;
                }

                result.Results.ForEach((rawPage) =>
                {
                    PageDownloaded.Invoke(rawPage, EventArgs.Empty); // Invoke the event for each page downloaded
                    rawPages.Enqueue(rawPage);
                });

                site.Log("Downloaded " + site.URL + "!", LogType.Downloader);
            }

            //Console.WriteLine("|" + string.Concat(Enumerable.Repeat("-", Console.BufferWidth - 1)));

            site.Status = SiteStatus.Processing;
            while (rawPages.Count > 0)
            {
                RawPage rawPage = rawPages.Dequeue(); // Loop back over the downloaded pages and process them

                results = pageProcessor.Next(rawPage, site, downloadManager);
                PageProcessed.Invoke(results, EventArgs.Empty);

                outputPipeline.Output(results, site, rawPage.URL.LocalPath); // Take the results from page processor and pass them to the pipeline for packaging
            }

            site.Status       = SiteStatus.Finished;
            site.SiteFinished = DateTime.Now; // Stopwatch for the sites total running time
        }
 /// <summary>
 /// Called when [page processed].
 /// </summary>
 protected virtual void OnPageProcessed()
 {
     PageProcessed?.Invoke(this, EventArgs.Empty);
 }