protected override void OnPrintPage(PrintPageEventArgs e)
        {
            if (_currentPage < _document.PageCount)
            {
                _document.Render(
                    _currentPage++,
                    e.Graphics,
                    e.Graphics.DpiX,
                    e.Graphics.DpiY,
                    new Rectangle(
                        0,
                        0,
                        (int)((e.PageBounds.Width / 100.0) * e.Graphics.DpiX),
                        (int)((e.PageBounds.Height / 100.0) * e.Graphics.DpiY)
                        ),
                    true
                    );
            }

            int pageCount =
                PrinterSettings.ToPage == 0
                ? _document.PageCount
                : Math.Min(PrinterSettings.ToPage, _document.PageCount);

            e.HasMorePages = _currentPage < pageCount;
        }
예제 #2
0
 private static void SavePng(PdfDocument document, int pageNumber, string destFileName)
 {
     SizeF sizeInPoints = document.PageSizes[pageNumber];
     int widthInPixels = (int)Math.Round(sizeInPoints.Width * (float)dpi / 72F);
     int heightInPixels = (int)Math.Round(sizeInPoints.Height * (float)dpi / 72F);
     using (Image image = document.Render(pageNumber, widthInPixels, heightInPixels, dpi, dpi, true)) {
         image.Save(destFileName, ImageFormat.Png);
     }
 }
예제 #3
0
 private void DrawPageImage(Graphics graphics, int page, Rectangle pageBounds)
 {
     _document.Render(page, graphics, graphics.DpiX, graphics.DpiY, pageBounds, false);
 }
예제 #4
0
        protected override void OnPrintPage(PrintPageEventArgs e)
        {
            if (_currentPage < _document.PageCount)
            {
                var pageOrientation  = GetOrientation(_document.PageSizes[_currentPage]);
                var printOrientation = GetOrientation(e.PageBounds.Size);

                e.PageSettings.Landscape = pageOrientation == Orientation.Landscape;

                double left;
                double top;
                double width;
                double height;

                if (_printMode == PdfPrintMode.ShrinkToMargin)
                {
                    left   = 0;
                    top    = 0;
                    width  = e.PageBounds.Width - e.PageSettings.HardMarginX * 2;
                    height = e.PageBounds.Height - e.PageSettings.HardMarginY * 2;
                }
                else
                {
                    left   = -e.PageSettings.HardMarginX;
                    top    = -e.PageSettings.HardMarginY;
                    width  = e.PageBounds.Width;
                    height = e.PageBounds.Height;
                }

                if (pageOrientation != printOrientation)
                {
                    double tmp = width;
                    width  = height;
                    height = tmp;

                    tmp  = left;
                    left = top;
                    top  = tmp;
                }

                _document.Render(
                    _currentPage++,
                    e.Graphics,
                    e.Graphics.DpiX,
                    e.Graphics.DpiY,
                    new Rectangle(
                        AdjustDpi(e.Graphics.DpiX, left),
                        AdjustDpi(e.Graphics.DpiY, top),
                        AdjustDpi(e.Graphics.DpiX, width),
                        AdjustDpi(e.Graphics.DpiY, height)
                        ),
                    PdfRenderFlags.ForPrinting | PdfRenderFlags.Annotations
                    );
            }

            int pageCount =
                PrinterSettings.ToPage == 0
                ? _document.PageCount
                : Math.Min(PrinterSettings.ToPage, _document.PageCount);

            e.HasMorePages = _currentPage < pageCount;
        }
예제 #5
0
        public static string ExtractTextFromRawPdf(byte[] pdfFile, out ExtractedAuditLetterText extractedAuditLetterText, TimeSpan ts, DateTime dt1, int num)
        {
            //const string testReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testread.txt";
            //const string testimageReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testimageread.txt";



            List <MergedTraversedLine> txtFormattedLines   = new List <MergedTraversedLine>();
            List <MergedTraversedLine> imageFormattedLines = new List <MergedTraversedLine>();
            List <int> txtFormattedNum   = new List <int>();
            List <int> imageFormattedNum = new List <int>();

            //try {
            //    pdfRead.pdfObject.PdfDocument document = new pdfRead.pdfObject.PdfDocument(pdfFile);
            //} catch(Exception e) {
            //    throw e;
            //}

            //Merge image reader


            Spire.Pdf.PdfDocument imgDoc = new Spire.Pdf.PdfDocument(pdfFile);

            pdfRead.pdfObject.PdfDocument doc = new pdfRead.pdfObject.PdfDocument(pdfFile);
            List <PdfTextLine>            textLines;

            doc.PageTextLine(doc.pages.Count - 1);
            for (int i = 0; i < doc.pages.Count; i++)
            {
                textLines = doc.PageTextLine(i);
                if (textLines.Count > 2)
                {
                    txtFormattedNum.Add(i);
                }
                else
                {
                    imageFormattedNum.Add(i);
                }
            }


            ExtractedAuditLetterText extractedTexts      = new ExtractedAuditLetterText();
            ExtractedAuditLetterText extractedImageTexts = new ExtractedAuditLetterText();


            /// <summary>
            /// Get text from txt-formatted pdf
            /// </summary>
            if (txtFormattedNum.Count != 0)
            {
                //Get lines from pdf and they are grouped by their fontsizes.
                Dictionary <double, List <PdfTextLine> > linesKeyValue = doc.lineFontSize;
                int index = 0;
                foreach (var key in linesKeyValue)
                {
                    foreach (var value in key.Value)
                    {
                        var bdcLines = new MergedTraversedLine();
                        bdcLines.Index = index;
                        bdcLines.Text  = value.text;
                        index++;
                        txtFormattedLines.Add(bdcLines);
                    }
                }
                ExtractedAuditLetterText extractedTxtedTexts = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                var littleImages = doc.ExtractImages();
                foreach (var img in littleImages)
                {
                    var lines = Utilities.ExtractLinesFromImage(img);
                    foreach (var line in lines)
                    {
                        string other = string.Join(" ", line.Words.Select(r => r.Text));
                        if (other == "auren")
                        {
                            other = "Auren";
                        }
                        extractedTexts.Others.Add(other);
                        //Console.WriteLine(other);
                    }
                }

                //foreach(int i in txtFormattedNum) {
                //    var littleImages = imgDoc.Pages[i].ExtractImages();
                //    foreach(var img in littleImages) {
                //        var lines = Utilities.ExtractLinesFromImage(img);
                //        foreach(var line in lines) {
                //            extractedTexts.Others.Add(string.Join(" ", line.Words.Select(r => r.Text)));
                //            Console.WriteLine(string.Join(" ", line.Words.Select(r => r.Text)));
                //        }
                //    }

                //}


                //string serialzedAuditLetterTxtedText = extractedTxtedTexts.SerializeExtractedAuditLetterText();
                //extractedAuditLetterText = extractedTxtedTexts;
                //return serialzedAuditLetterTxtedText;
            }


            /// <summary>
            /// Get text from txt-image-formatted pdf
            /// </summary>
            if (imageFormattedNum.Count != 0)
            {
                int titlePageStartIndexNum = 0;
                int titlePageEndIndexNum   = -1;

                List <Image>             imagePdf = new List <Image>();
                PdfiumViewer.PdfDocument document = PdfiumViewer.PdfDocument.Load(new MemoryStream(pdfFile));
                foreach (int i in imageFormattedNum)
                {
                    Image image = document.Render(i, Consts.DpiX, Consts.DpiY, PdfRenderFlags.CorrectFromDpi);
                    imagePdf.Add(image);
                }

                List <List <Line> > linesOfImagePdf = new List <List <Line> >();
                foreach (var img in imagePdf)
                {
                    linesOfImagePdf.Add(Utilities.ExtractLinesFromImage(img));
                }

                extractedTexts.Others.AddRange(Utilities.GetRedundantLines(imagePdf, linesOfImagePdf));

                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < imagePdf.Count; i++)
                {
                    //Add small region text to ExtractedAuditLetterText.others
                    extractedTexts.Others.AddRange(Utilities.RemoveSmallRegion(imagePdf[i]));

                    //Get raw context in main body
                    List <TraversedLine> rawImageTexts = PdfExtractor.Utilities.GetContents(imagePdf[i]);

                    foreach (var line in rawImageTexts)
                    {
                        if (line.Text.IndexOf("sha-", StringComparison.OrdinalIgnoreCase) >= 0 || (line.Text.IndexOf("thumb", StringComparison.OrdinalIgnoreCase) >= 0))
                        {
                            line.Text = line.Text.Replace("O", "0").Replace("o", "0").Replace("i", "1").Replace("I", "1").Replace("l", "1");
                        }
                    }

                    //Merge raw context by paragraph
                    List <MergedTraversedLine> mergedImageTexts = PdfExtractor.Utilities.MergeTraversedLines(rawImageTexts);

                    titlePageEndIndexNum += mergedImageTexts.Count;
                    List <MergedTraversedLine> titleLines = mergedImageTexts.Where(x => x.IsTitle == true).ToList <MergedTraversedLine>();

                    //If this is a title page, add title page start index and end index to ExtractedAuditLetterText.TitlePageRanges
                    //If this is a title page, add the title index to ExtractedAuditLetterText.TitleRanges
                    if (titleLines.Count > 0)
                    {
                        extractedImageTexts.TitlePageRanges.Add(new KeyValuePair <int, int>(titlePageStartIndexNum, titlePageEndIndexNum));
                        foreach (MergedTraversedLine title in titleLines)
                        {
                            extractedImageTexts.TitleRanges.Add(new KeyValuePair <int, int>(title.Index + titlePageStartIndexNum, title.Index + titlePageStartIndexNum));
                        }
                    }
                    titlePageStartIndexNum += mergedImageTexts.Count;

                    //Add titles, contents
                    ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(mergedImageTexts);

                    extractedImageTexts.Titles.AddRange(tempText.Titles);
                    extractedImageTexts.Contents.AddRange(tempText.Contents);
                }
            }

            if (imageFormattedNum.Count == 0)
            {
                ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                extractedTexts.Titles.AddRange(tempText.Titles);
                extractedTexts.Contents.AddRange(tempText.Contents);
                StringBuilder sb = new StringBuilder();
                sb.Append(num + "Ocr:     " + ts.TotalMilliseconds.ToString() + "  ");

                string serialzedAuditLetterText2 = extractedTexts.SerializeExtractedAuditLetterText();
                extractedAuditLetterText = extractedTexts;
                DateTime dt2 = System.DateTime.Now;
                ts = dt2.Subtract(dt1);

                sb.Append("   Reader:     " + ts.TotalMilliseconds.ToString() + "\n");
                string fileTime = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\comTextTime.txt";
                File.AppendAllText(fileTime, sb.ToString());
                return(serialzedAuditLetterText2);
            }
            else if (txtFormattedNum.Count == 0)
            {
                extractedTexts.Titles.AddRange(extractedImageTexts.Titles);
                extractedTexts.Contents.AddRange(extractedImageTexts.Contents);
                extractedTexts.TitleRanges.AddRange(extractedImageTexts.TitleRanges);
            }
            else
            {
                if (txtFormattedNum[0] < imageFormattedNum[0])
                {
                    ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                    extractedTexts.Titles.AddRange(tempText.Titles);
                    extractedTexts.Contents.AddRange(tempText.Contents);
                    foreach (var line in extractedImageTexts.Titles)
                    {
                        extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.Contents)
                    {
                        extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.TitleRanges)
                    {
                        extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value));
                    }
                }
                else
                {
                    foreach (var line in extractedImageTexts.Titles)
                    {
                        extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.Contents)
                    {
                        extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.TitleRanges)
                    {
                        extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value));
                    }
                    ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                    extractedTexts.Titles.AddRange(tempText.Titles);
                    extractedTexts.Contents.AddRange(tempText.Contents);
                }
            }

            string serialzedAuditLetterText = extractedTexts.SerializeExtractedAuditLetterText();

            extractedAuditLetterText = extractedTexts;

            return(serialzedAuditLetterText);
        }