internal static IEnumerable <string> ReadDatasetURLs(string dataset) { if (Datasets.Contains(dataset)) { foreach (var line in FilesManager.ReadLinesFromEmbeddedFile("Dataset." + dataset + "_urls.csv")) { if (line.StartsWith('"')) { yield return(line.Substring(1, line.Length - 2)); } else { yield return(line); } } } }
public MainWindow() { InitializeComponent(); // Output folder link => clipboard var outputPath = FilesManager.GetOutputFolderPath(); Clipboard.SetText(outputPath); // Initialize URLs list and navigate to next URL urlsToCapture = URLsSource.ReadDatasetURLs(DATASET).ToList(); counter = STARTCOUNTER - 1; if (STARTCOUNTER == 1) { counter = FilesManager.GetLastSavedCounter(); } InitializeWebViewAsync(); }
private async void CaptureScreenshotsAndNavigateToNextURL(object sender, RoutedEventArgs e) { do { try { await DoCaptureScreenshots(); await DoNavigateToNextUrl(); } catch (Exception ex) { if (INTERACTIVE) { MessageBox.Show(ex.Message); } else { FilesManager.WriteTextToFile(counter.ToString("D5") + "_error.log", ex.Message); } } }while (!INTERACTIVE); }
static JavascriptInterop() { javascriptDefinitions = FilesManager.ReadStringFromEmbeddedFile(JS_FILE_PATH); }
private async Task DoCaptureScreenshots() { // Get unique file name for the current URL var fileName = await JavascriptInterop.GetUniqueFileNameFromURLAsync(webview.CoreWebView2); fileName = counter.ToString("D5") + "_" + fileName; if (fileName.Contains("chromewebdata")) { FilesManager.WriteTextToFile(fileName + "_error.log", "Skipping this URL because of security issues : connexion is not safe"); return; // Skip URLs with security problems } if ((int)currentStatusCode >= 400 && currentErrorStatus != CoreWebView2WebErrorStatus.Unknown) { var errorString = Enum.GetName(typeof(CoreWebView2WebErrorStatus), currentErrorStatus); var errorMessage = "Error navigating to " + currentURL + " => " + currentStatusCode + " / " + errorString; if (INTERACTIVE) { MessageBox.Show(errorMessage); } else { FilesManager.WriteTextToFile(fileName + "_error.log", errorMessage); } } else { // Get view and content dimensions var viewDimensions = ScreenCapture.GetViewDimensions(webview); var contentDimensions = await ScreenCapture.GetContentDimensionsAsync(webview.CoreWebView2); // Resize view to content size ScreenCapture.SetViewDimensions(webview, contentDimensions); // Wait 1 second for display to adjust Thread.Sleep(1000); try { PageElement pageElementsTree = null; int screenshotHeight = 0; int screenshotWidth = 0; int retryCount = 0; do { retryCount++; // Capture a description of all chars/words/lines/blocks bounding boxes pageElementsTree = await ScreenCapture.CreateAndSaveTextBoundingBoxes(webview.CoreWebView2, fileName); // Capture a screenshot var screenFile = await ScreenCapture.CreateAndSaveScreenshotAsync(webview.CoreWebView2, fileName); // Draw the bounding boxes on a second screenshot var boxesFile = MaskGenerator.DrawBoundingBoxes(screenFile, pageElementsTree); // Display both screenshots on screen var imageInfo = SixLabors.ImageSharp.Image.Identify(screenFile); screenshotHeight = imageInfo.Height; screenshotWidth = imageInfo.Width; DisplayScreenshot(screenFile, captureScreen); DisplayScreenshot(boxesFile, captureBoxes); } // Check consistency while (retryCount <= 3 && (screenshotHeight != pageElementsTree.boundingBox.height || screenshotWidth != pageElementsTree.boundingBox.width)); } catch (Exception e) { if (INTERACTIVE) { MessageBox.Show(e.Message); } else { var message = e.Message; message += "\n" + e.StackTrace; if (e.InnerException != null) { message += "\n" + e.InnerException.Message; message += "\n" + e.InnerException.StackTrace; } FilesManager.WriteTextToFile(fileName + "_error.log", message); } } if (INTERACTIVE) { // Reset view to its original size ScreenCapture.SetViewDimensions(webview, viewDimensions); } else { // Choose random width for next image viewDimensions.width = ScreenCapture.GetRandowWidth(); ScreenCapture.SetViewDimensions(webview, viewDimensions); } } }