public async Task <ScanResult> ScanAsync(string filename) { FileInfo fileInfo = new FileInfo(filename); LoggingSection log = logging.CreateChild(fileInfo.Name); log.Verbose($"Starting {GetType().Name} on {fileInfo.FullName}"); DateTime timeStarted = DateTime.Now; string result = null; bool succeeded; Exception exception = null; try { result = await DoScanAsync(fileInfo.FullName, log); succeeded = true; log.Verbose("Scan was successfull"); } catch (Exception e) { succeeded = false; exception = e; log.Warning($"Scan failed: {e.Message}{Environment.NewLine}{e.StackTrace}"); } DateTime timeEnded = DateTime.Now; log.Dispose(); return(new ScanResult(result, fileInfo.FullName, this, timeStarted, timeEnded, succeeded, exception)); }
public void Log_ReturnedLogMessage_ShouldHaveLogLevelVerbose() { //Arrange LoggingSection loggingSection = new LoggingSection(this); LogMessage logMessage; //Act logMessage = loggingSection.Verbose("Test"); //Assert Assert.NotNull(logMessage); Assert.Equal(LogLevel.VERBOSE, logMessage.LogLevel); }
/// <summary> /// Constructs a FileLogHandler /// </summary> /// <param name="path">Path to the file that should be written into</param> /// <param name="format">Pattern according to which a LogMessage is converted into a string. If not provided, a default pattern will be used.</param> /// <param name="enabledLogLevels">Enabled LogLevels. If not provided, all LogLevels are enabled by default.</param> public FileLogHandler(string path, string format = null, LogLevel[] enabledLogLevels = null) : base(format, null, enabledLogLevels) { using LoggingSection log = new LoggingSection(this); DirectoryInfo parent = Directory.GetParent(path); if (!parent.Exists) { log.Verbose($"Creating directory {parent.FullName}."); parent.Create(); } try { streamWriter = new StreamWriter(File.Open(path, FileMode.OpenOrCreate, FileAccess.Write), Encoding.UTF8); streamWriter.AutoFlush = true; } catch (Exception e) { streamWriter = null; log.Error($"Failed to open FileStream on {path}: {e.Message}."); } }
protected override string DoScan(string filename, LoggingSection log) { PdfDocument pdfDocument = PdfReader.Open(filename); StringBuilder stringBuilder = new StringBuilder(); for (int pageIndex = 0; pageIndex < pdfDocument.PageCount; pageIndex++) { log.Verbose($"Scanning page {pageIndex + 1} of {pdfDocument.PageCount}"); PdfPage pdfPage = pdfDocument.Pages[pageIndex]; //Extract text from text elements stringBuilder.Append($"{ExtractTextFromPdfPage(pdfPage)}{Environment.NewLine}"); //Extract text from image elements with Tesseract OCR - awesome! :) PdfDictionary resources = pdfPage.Elements.GetDictionary("/Resources"); if (resources != null) { PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject"); if (xObjects != null) { ICollection <PdfItem> items = xObjects.Elements.Values; foreach (PdfItem item in items) { PdfReference reference = item as PdfReference; if (reference != null) { PdfDictionary xObject = reference.Value as PdfDictionary; if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image") { Bitmap bitmap = PdfImageToBitmap(xObject); if (bitmap == null) { log.Error("Could not extract bitmap from PDF image element. Seems like the PDF image filter type is not supported. Skipping element!"); continue; } log.Debug("Rotating image"); bitmap.RotateFlip(RotateFlipType.Rotate90FlipNone); log.Debug("Upscaling image 2x"); BitmapUtils.Scale(ref bitmap, 2); log.Debug("Grayscaling image"); BitmapUtils.GrayscaleWithLockBits(bitmap); log.Debug("Denoising image"); BitmapUtils.DenoiseWithLockBits(bitmap); log.Debug("Applying OCR on image"); Pix pix = PixConverter.ToPix(bitmap); TesseractEngine tesseractEngine = Services.OCRProvider.AwaitResource(); Page tesseractPage = tesseractEngine.Process(pix); try { string text = tesseractPage.GetText(); log.Debug($"Text is {text.Length} characters long"); if (!string.IsNullOrWhiteSpace(text) && text != "\n") { stringBuilder.Append(text.Replace("\n", " ")); } } catch (InvalidOperationException e) { log.Error($"OCR failed on Page {pageIndex} of file {filename}:\n{e.StackTrace}"); } Services.OCRProvider.Feed(tesseractEngine); pix.Dispose(); } } } } } stringBuilder.Append("\n"); } log.Debug("Trimming text"); string documentText = stringBuilder.ToString(); documentText = documentText.Trim(); while (documentText.Contains(" ")) { documentText = documentText.Replace(" ", " "); } while (documentText.Contains("\n\n")) { documentText = documentText.Replace("\n\n", "\n"); } return(stringBuilder.ToString()); }