예제 #1
0
파일: ExampleFour.cs 프로젝트: bseay/docnet
        internal static byte[] ConvertPageToGrayscaleImageIncludeAnnotations(IDocLib library, string path, int pageIndex)
        {
            using (var docReader = library.GetDocReader(path, new PageDimensions(1080, 1920)))
            {
                using (var pageReader = docReader.GetPageReader(pageIndex))
                {
                    var bytes = GetModifiedImage(pageReader);

                    return(bytes);
                }
            }
        }
예제 #2
0
        internal static byte[] ConvertPageToSimpleImageWithLetterOutlinesUsingScaling(IDocLib library, string path, int pageIndex)
        {
            using (var docReader = library.GetDocReader(path, new PageDimensions(1.337)))
            {
                using (var pageReader = docReader.GetPageReader(pageIndex))
                {
                    var bytes = GetModifiedImage(pageReader);

                    return(bytes);
                }
            }
        }
예제 #3
0
        internal static byte[] ConvertPageToSimpleImageWithoutTransparency(IDocLib library, string path, int pageIndex)
        {
            using (var docReader = library.GetDocReader(path, new PageDimensions(1080, 1920)))
            {
                using (var pageReader = docReader.GetPageReader(pageIndex))
                {
                    var bytes = GetModifiedImage(pageReader);

                    return(bytes);
                }
            }
        }
예제 #4
0
        public MemoryStream PdfToImage(Stream filepdf)
        {
            var          pdfBytes     = ReadToEnd(filepdf);
            MemoryStream memoryStream = new MemoryStream();
            MagickImage  imgBackdrop;
            MagickColor  backdropColor = MagickColors.White; // replace transparent pixels with this color
            int          pdfPageNum    = 0;                  // first page is 0

            using (IDocLib pdfLibrary = DocLib.Instance)
            {
                // Console.WriteLine("pdfBytes");
                //Console.WriteLine(pdfBytes.Length);
                using (var docReader = pdfLibrary.GetDocReader(pdfBytes, new PageDimensions(1.0d)))
                {
                    using (var pageReader = docReader.GetPageReader(pdfPageNum))
                    {
                        var rawBytes = pageReader.GetImage();
                        rawBytes = RearrangeBytesToRGBA(rawBytes);
                        var width  = pageReader.GetPageWidth();
                        var height = pageReader.GetPageHeight();

                        PixelReadSettings pixelReadSettings = new PixelReadSettings(width, height, StorageType.Char, PixelMapping.RGBA);
                        using (MagickImage imgPdfOverlay = new MagickImage(rawBytes, pixelReadSettings))
                        {
                            imgBackdrop = new MagickImage(backdropColor, width, height);
                            imgBackdrop.Composite(imgPdfOverlay, CompositeOperator.Over);
                        }
                    }
                }
            }

            imgBackdrop.Write(memoryStream, MagickFormat.Png);
            imgBackdrop.Dispose();
            memoryStream.Position = 0;
            return(memoryStream);
        }
예제 #5
0
        void StartReading()
        {
            var searchOptions = Options;

            Results.Clear();
            StatusName          = _infoService.GetSearchStatus(SearchStatus.Initializing);
            ItemsReady          = false;
            Stats.IsReady       = false;
            Stats.FilesAnalyzed = "0/0";
            Stats.PagesAnalyzed = 0;
            Stats.ExecutionTime = "...";

            var fileCounter = 0;
            var watch       = System.Diagnostics.Stopwatch.StartNew();

            if (!string.IsNullOrWhiteSpace(searchOptions.Keyword))
            {
                List <string> discoveredFiles = searchOptions.UseSubfolders ? Directory.GetFiles(searchOptions.Path, "*.pdf", SearchOption.AllDirectories).ToList()
                : Directory.GetFiles(searchOptions.Path, "*.pdf").ToList();

                if (discoveredFiles == null)
                {
                    System.Diagnostics.Debug.WriteLine("No files found... ");
                    return;
                }

                StatusName          = _infoService.GetSearchStatus(SearchStatus.Running);
                ResultInfo          = _infoService.GetSearchInfo(SearchInfo.Init);
                Stats.FilesAnalyzed = "0/" + discoveredFiles.Count.ToString();

                foreach (var pdf in discoveredFiles)
                {
                    System.Diagnostics.Debug.WriteLine("Checking " + pdf);
                    using var docReader = _docLib.GetDocReader(pdf, _optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item1);

                    for (var i = 0; i < docReader.GetPageCount(); i++)
                    {
                        using var pageReader = docReader.GetPageReader(i);
                        var parsedText = pageReader.GetText().ToString();

                        if (searchOptions.UseOCR && string.IsNullOrWhiteSpace(parsedText))
                        {
                            var rawBytes = pageReader.GetImage(_optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item2);
                            var width    = pageReader.GetPageWidth();
                            var height   = pageReader.GetPageHeight();
                            using var bmp = new Bitmap(width, height, _optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item3);

                            AddBytes(bmp, rawBytes);
                            using var stream = new MemoryStream();
                            bmp.Save(stream, _optionService.TranslatePrecision(searchOptions.SelectedPrecisionOCR).Item4);

                            parsedText = ImageToText(stream.ToArray(), searchOptions.SelectedLanguageOCR, searchOptions.SelectedPrecisionOCR);
                        }

                        SearchPage(parsedText, searchOptions.Keyword, pdf, i, searchOptions.CaseSensitive);
                        Stats.PagesAnalyzed += 1;
                    }
                    fileCounter        += 1;
                    Stats.FilesAnalyzed = fileCounter.ToString() + "/" + discoveredFiles.Count.ToString();
                }
            }

            watch.Stop();
            var elapsedMs = watch.ElapsedMilliseconds;

            System.Diagnostics.Debug.WriteLine("Total execution " + elapsedMs);
            Stats.ExecutionTime = (elapsedMs / 1000.0).ToString() + " " + _infoService.GetSecondsString();
            Stats.IsReady       = true;
            StatusName          = _infoService.GetSearchStatus(SearchStatus.Ready);
            if (!Results.Any())
            {
                ResultInfo = _infoService.GetSearchInfo(SearchInfo.NoResults);
                ItemsReady = false;
            }
        }