private async void HandleDomLoaded(object sender, WebViewControlDOMContentLoadedEventArgs e) { // This invokes a JS method inside the webview to retrieve the text of the page. var content = await _headlessWebView.InvokeScriptAsync("eval", new[] { "document.body.innerText" }); //invoke processed method with the parsed text from the webview. PageProcessed?.Invoke(content); }
public void Run(Site site) { if (sites.Contains(site) == false) { site.Log("Site is not registered in Scraper List!"); return; } Queue <RawPage> rawPages = new Queue <RawPage>(); // Setup for transfer of data between each of the classes List <NodeResult> results = new List <NodeResult>(); site.Status = SiteStatus.Downloading; // Set site status site.SiteStart = DateTime.Now; foreach (PageLayout page in site.Pages.Values) { site.Log("Downloading " + site.URL + "...", LogType.Downloader); DownloadResult result = downloadManager.Next(new Uri(page.URL + page.Path), page.SearchElement, page.JSExecution, page.XPathFilter, page.PageDelay); // Download each page and store it, if (result.Status.HasFlag(DownloadStatus.ErrorOccurred)) // Error checking if any errors occured let the user know and log it { site.Log("Error occurred in " + site.URL, LogType.Downloader); } if (result.Status.HasFlag(DownloadStatus.Failed)) { site.Log("Failed to download " + site.URL + " skipped..", LogType.Downloader); continue; } result.Results.ForEach((rawPage) => { PageDownloaded.Invoke(rawPage, EventArgs.Empty); // Invoke the event for each page downloaded rawPages.Enqueue(rawPage); }); site.Log("Downloaded " + site.URL + "!", LogType.Downloader); } //Console.WriteLine("|" + string.Concat(Enumerable.Repeat("-", Console.BufferWidth - 1))); site.Status = SiteStatus.Processing; while (rawPages.Count > 0) { RawPage rawPage = rawPages.Dequeue(); // Loop back over the downloaded pages and process them results = pageProcessor.Next(rawPage, site, downloadManager); PageProcessed.Invoke(results, EventArgs.Empty); outputPipeline.Output(results, site, rawPage.URL.LocalPath); // Take the results from page processor and pass them to the pipeline for packaging } site.Status = SiteStatus.Finished; site.SiteFinished = DateTime.Now; // Stopwatch for the sites total running time }
/// <summary> /// Called when [page processed]. /// </summary> protected virtual void OnPageProcessed() { PageProcessed?.Invoke(this, EventArgs.Empty); }