internal static byte[] ConvertPageToGrayscaleImageIncludeAnnotations(IDocLib library, string path, int pageIndex) { using (var docReader = library.GetDocReader(path, new PageDimensions(1080, 1920))) { using (var pageReader = docReader.GetPageReader(pageIndex)) { var bytes = GetModifiedImage(pageReader); return(bytes); } } }
internal static byte[] ConvertPageToSimpleImageWithLetterOutlinesUsingScaling(IDocLib library, string path, int pageIndex) { using (var docReader = library.GetDocReader(path, new PageDimensions(1.337))) { using (var pageReader = docReader.GetPageReader(pageIndex)) { var bytes = GetModifiedImage(pageReader); return(bytes); } } }
internal static byte[] ConvertPageToSimpleImageWithoutTransparency(IDocLib library, string path, int pageIndex) { using (var docReader = library.GetDocReader(path, new PageDimensions(1080, 1920))) { using (var pageReader = docReader.GetPageReader(pageIndex)) { var bytes = GetModifiedImage(pageReader); return(bytes); } } }
public MemoryStream PdfToImage(Stream filepdf) { var pdfBytes = ReadToEnd(filepdf); MemoryStream memoryStream = new MemoryStream(); MagickImage imgBackdrop; MagickColor backdropColor = MagickColors.White; // replace transparent pixels with this color int pdfPageNum = 0; // first page is 0 using (IDocLib pdfLibrary = DocLib.Instance) { // Console.WriteLine("pdfBytes"); //Console.WriteLine(pdfBytes.Length); using (var docReader = pdfLibrary.GetDocReader(pdfBytes, new PageDimensions(1.0d))) { using (var pageReader = docReader.GetPageReader(pdfPageNum)) { var rawBytes = pageReader.GetImage(); rawBytes = RearrangeBytesToRGBA(rawBytes); var width = pageReader.GetPageWidth(); var height = pageReader.GetPageHeight(); PixelReadSettings pixelReadSettings = new PixelReadSettings(width, height, StorageType.Char, PixelMapping.RGBA); using (MagickImage imgPdfOverlay = new MagickImage(rawBytes, pixelReadSettings)) { imgBackdrop = new MagickImage(backdropColor, width, height); imgBackdrop.Composite(imgPdfOverlay, CompositeOperator.Over); } } } } imgBackdrop.Write(memoryStream, MagickFormat.Png); imgBackdrop.Dispose(); memoryStream.Position = 0; return(memoryStream); }
void StartReading() { var searchOptions = Options; Results.Clear(); StatusName = _infoService.GetSearchStatus(SearchStatus.Initializing); ItemsReady = false; Stats.IsReady = false; Stats.FilesAnalyzed = "0/0"; Stats.PagesAnalyzed = 0; Stats.ExecutionTime = "..."; var fileCounter = 0; var watch = System.Diagnostics.Stopwatch.StartNew(); if (!string.IsNullOrWhiteSpace(searchOptions.Keyword)) { List <string> discoveredFiles = searchOptions.UseSubfolders ? Directory.GetFiles(searchOptions.Path, "*.pdf", SearchOption.AllDirectories).ToList() : Directory.GetFiles(searchOptions.Path, "*.pdf").ToList(); if (discoveredFiles == null) { System.Diagnostics.Debug.WriteLine("No files found... "); return; } StatusName = _infoService.GetSearchStatus(SearchStatus.Running); ResultInfo = _infoService.GetSearchInfo(SearchInfo.Init); Stats.FilesAnalyzed = "0/" + discoveredFiles.Count.ToString(); foreach (var pdf in discoveredFiles) { System.Diagnostics.Debug.WriteLine("Checking " + pdf); using var docReader = _docLib.GetDocReader(pdf, _optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item1); for (var i = 0; i < docReader.GetPageCount(); i++) { using var pageReader = docReader.GetPageReader(i); var parsedText = pageReader.GetText().ToString(); if (searchOptions.UseOCR && string.IsNullOrWhiteSpace(parsedText)) { var rawBytes = pageReader.GetImage(_optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item2); var width = pageReader.GetPageWidth(); var height = pageReader.GetPageHeight(); using var bmp = new Bitmap(width, height, _optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item3); AddBytes(bmp, rawBytes); using var stream = new MemoryStream(); bmp.Save(stream, _optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item4); parsedText = ImageToText(stream.ToArray(), searchOptions.SelectedLanguageOCR, searchOptions.SelectedPrecisionOCR); } SearchPage(parsedText, searchOptions.Keyword, pdf, i, searchOptions.CaseSensitive); Stats.PagesAnalyzed += 1; } fileCounter += 1; Stats.FilesAnalyzed = fileCounter.ToString() + "/" + discoveredFiles.Count.ToString(); } } watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; System.Diagnostics.Debug.WriteLine("Total execution " + elapsedMs); Stats.ExecutionTime = (elapsedMs / 1000.0).ToString() + " " + _infoService.GetSecondsString(); Stats.IsReady = true; StatusName = _infoService.GetSearchStatus(SearchStatus.Ready); if (!Results.Any()) { ResultInfo = _infoService.GetSearchInfo(SearchInfo.NoResults); ItemsReady = false; } }