예제 #1
0
파일: Scanner.cs 프로젝트: lanicon/HawkEye
        public async Task <ScanResult> ScanAsync(string filename)
        {
            FileInfo       fileInfo = new FileInfo(filename);
            LoggingSection log      = logging.CreateChild(fileInfo.Name);

            log.Verbose($"Starting {GetType().Name} on {fileInfo.FullName}");
            DateTime  timeStarted = DateTime.Now;
            string    result      = null;
            bool      succeeded;
            Exception exception = null;

            try
            {
                result = await DoScanAsync(fileInfo.FullName, log);

                succeeded = true;
                log.Verbose("Scan was successfull");
            }
            catch (Exception e)
            {
                succeeded = false;
                exception = e;
                log.Warning($"Scan failed: {e.Message}{Environment.NewLine}{e.StackTrace}");
            }
            DateTime timeEnded = DateTime.Now;

            log.Dispose();
            return(new ScanResult(result, fileInfo.FullName, this, timeStarted, timeEnded, succeeded, exception));
        }
예제 #2
0
        public void Log_ReturnedLogMessage_ShouldHaveLogLevelVerbose()
        {
            //Arrange
            LoggingSection loggingSection = new LoggingSection(this);
            LogMessage     logMessage;

            //Act
            logMessage = loggingSection.Verbose("Test");

            //Assert
            Assert.NotNull(logMessage);
            Assert.Equal(LogLevel.VERBOSE, logMessage.LogLevel);
        }
예제 #3
0
        /// <summary>
        /// Constructs a FileLogHandler
        /// </summary>
        /// <param name="path">Path to the file that should be written into</param>
        /// <param name="format">Pattern according to which a LogMessage is converted into a string. If not provided, a default pattern will be used.</param>
        /// <param name="enabledLogLevels">Enabled LogLevels. If not provided, all LogLevels are enabled by default.</param>
        public FileLogHandler(string path, string format = null, LogLevel[] enabledLogLevels = null) : base(format, null, enabledLogLevels)
        {
            using LoggingSection log = new LoggingSection(this);

            DirectoryInfo parent = Directory.GetParent(path);

            if (!parent.Exists)
            {
                log.Verbose($"Creating directory {parent.FullName}.");
                parent.Create();
            }

            try
            {
                streamWriter           = new StreamWriter(File.Open(path, FileMode.OpenOrCreate, FileAccess.Write), Encoding.UTF8);
                streamWriter.AutoFlush = true;
            }
            catch (Exception e)
            {
                streamWriter = null;
                log.Error($"Failed to open FileStream on {path}: {e.Message}.");
            }
        }
예제 #4
0
        protected override string DoScan(string filename, LoggingSection log)
        {
            PdfDocument   pdfDocument   = PdfReader.Open(filename);
            StringBuilder stringBuilder = new StringBuilder();

            for (int pageIndex = 0; pageIndex < pdfDocument.PageCount; pageIndex++)
            {
                log.Verbose($"Scanning page {pageIndex + 1} of {pdfDocument.PageCount}");
                PdfPage pdfPage = pdfDocument.Pages[pageIndex];
                //Extract text from text elements
                stringBuilder.Append($"{ExtractTextFromPdfPage(pdfPage)}{Environment.NewLine}");

                //Extract text from image elements with Tesseract OCR - awesome! :)
                PdfDictionary resources = pdfPage.Elements.GetDictionary("/Resources");
                if (resources != null)
                {
                    PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
                    if (xObjects != null)
                    {
                        ICollection <PdfItem> items = xObjects.Elements.Values;
                        foreach (PdfItem item in items)
                        {
                            PdfReference reference = item as PdfReference;
                            if (reference != null)
                            {
                                PdfDictionary xObject = reference.Value as PdfDictionary;
                                if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
                                {
                                    Bitmap bitmap = PdfImageToBitmap(xObject);
                                    if (bitmap == null)
                                    {
                                        log.Error("Could not extract bitmap from PDF image element. Seems like the PDF image filter type is not supported. Skipping element!");
                                        continue;
                                    }
                                    log.Debug("Rotating image");
                                    bitmap.RotateFlip(RotateFlipType.Rotate90FlipNone);
                                    log.Debug("Upscaling image 2x");
                                    BitmapUtils.Scale(ref bitmap, 2);
                                    log.Debug("Grayscaling image");
                                    BitmapUtils.GrayscaleWithLockBits(bitmap);
                                    log.Debug("Denoising image");
                                    BitmapUtils.DenoiseWithLockBits(bitmap);
                                    log.Debug("Applying OCR on image");
                                    Pix             pix             = PixConverter.ToPix(bitmap);
                                    TesseractEngine tesseractEngine = Services.OCRProvider.AwaitResource();
                                    Page            tesseractPage   = tesseractEngine.Process(pix);
                                    try
                                    {
                                        string text = tesseractPage.GetText();
                                        log.Debug($"Text is {text.Length} characters long");
                                        if (!string.IsNullOrWhiteSpace(text) && text != "\n")
                                        {
                                            stringBuilder.Append(text.Replace("\n", " "));
                                        }
                                    }
                                    catch (InvalidOperationException e)
                                    {
                                        log.Error($"OCR failed on Page {pageIndex} of file {filename}:\n{e.StackTrace}");
                                    }
                                    Services.OCRProvider.Feed(tesseractEngine);
                                    pix.Dispose();
                                }
                            }
                        }
                    }
                }
                stringBuilder.Append("\n");
            }

            log.Debug("Trimming text");
            string documentText = stringBuilder.ToString();

            documentText = documentText.Trim();
            while (documentText.Contains("  "))
            {
                documentText = documentText.Replace("  ", " ");
            }
            while (documentText.Contains("\n\n"))
            {
                documentText = documentText.Replace("\n\n", "\n");
            }
            return(stringBuilder.ToString());
        }