protected override void OnPrintPage(PrintPageEventArgs e) { if (_currentPage < _document.PageCount) { _document.Render( _currentPage++, e.Graphics, e.Graphics.DpiX, e.Graphics.DpiY, new Rectangle( 0, 0, (int)((e.PageBounds.Width / 100.0) * e.Graphics.DpiX), (int)((e.PageBounds.Height / 100.0) * e.Graphics.DpiY) ), true ); } int pageCount = PrinterSettings.ToPage == 0 ? _document.PageCount : Math.Min(PrinterSettings.ToPage, _document.PageCount); e.HasMorePages = _currentPage < pageCount; }
private static void SavePng(PdfDocument document, int pageNumber, string destFileName) { SizeF sizeInPoints = document.PageSizes[pageNumber]; int widthInPixels = (int)Math.Round(sizeInPoints.Width * (float)dpi / 72F); int heightInPixels = (int)Math.Round(sizeInPoints.Height * (float)dpi / 72F); using (Image image = document.Render(pageNumber, widthInPixels, heightInPixels, dpi, dpi, true)) { image.Save(destFileName, ImageFormat.Png); } }
private void DrawPageImage(Graphics graphics, int page, Rectangle pageBounds) { _document.Render(page, graphics, graphics.DpiX, graphics.DpiY, pageBounds, false); }
protected override void OnPrintPage(PrintPageEventArgs e) { if (_currentPage < _document.PageCount) { var pageOrientation = GetOrientation(_document.PageSizes[_currentPage]); var printOrientation = GetOrientation(e.PageBounds.Size); e.PageSettings.Landscape = pageOrientation == Orientation.Landscape; double left; double top; double width; double height; if (_printMode == PdfPrintMode.ShrinkToMargin) { left = 0; top = 0; width = e.PageBounds.Width - e.PageSettings.HardMarginX * 2; height = e.PageBounds.Height - e.PageSettings.HardMarginY * 2; } else { left = -e.PageSettings.HardMarginX; top = -e.PageSettings.HardMarginY; width = e.PageBounds.Width; height = e.PageBounds.Height; } if (pageOrientation != printOrientation) { double tmp = width; width = height; height = tmp; tmp = left; left = top; top = tmp; } _document.Render( _currentPage++, e.Graphics, e.Graphics.DpiX, e.Graphics.DpiY, new Rectangle( AdjustDpi(e.Graphics.DpiX, left), AdjustDpi(e.Graphics.DpiY, top), AdjustDpi(e.Graphics.DpiX, width), AdjustDpi(e.Graphics.DpiY, height) ), PdfRenderFlags.ForPrinting | PdfRenderFlags.Annotations ); } int pageCount = PrinterSettings.ToPage == 0 ? _document.PageCount : Math.Min(PrinterSettings.ToPage, _document.PageCount); e.HasMorePages = _currentPage < pageCount; }
public static string ExtractTextFromRawPdf(byte[] pdfFile, out ExtractedAuditLetterText extractedAuditLetterText, TimeSpan ts, DateTime dt1, int num) { //const string testReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testread.txt"; //const string testimageReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testimageread.txt"; List <MergedTraversedLine> txtFormattedLines = new List <MergedTraversedLine>(); List <MergedTraversedLine> imageFormattedLines = new List <MergedTraversedLine>(); List <int> txtFormattedNum = new List <int>(); List <int> imageFormattedNum = new List <int>(); //try { // pdfRead.pdfObject.PdfDocument document = new pdfRead.pdfObject.PdfDocument(pdfFile); //} catch(Exception e) { // throw e; //} //Merge image reader Spire.Pdf.PdfDocument imgDoc = new Spire.Pdf.PdfDocument(pdfFile); pdfRead.pdfObject.PdfDocument doc = new pdfRead.pdfObject.PdfDocument(pdfFile); List <PdfTextLine> textLines; doc.PageTextLine(doc.pages.Count - 1); for (int i = 0; i < doc.pages.Count; i++) { textLines = doc.PageTextLine(i); if (textLines.Count > 2) { txtFormattedNum.Add(i); } else { imageFormattedNum.Add(i); } } ExtractedAuditLetterText extractedTexts = new ExtractedAuditLetterText(); ExtractedAuditLetterText extractedImageTexts = new ExtractedAuditLetterText(); /// <summary> /// Get text from txt-formatted pdf /// </summary> if (txtFormattedNum.Count != 0) { //Get lines from pdf and they are grouped by their fontsizes. Dictionary <double, List <PdfTextLine> > linesKeyValue = doc.lineFontSize; int index = 0; foreach (var key in linesKeyValue) { foreach (var value in key.Value) { var bdcLines = new MergedTraversedLine(); bdcLines.Index = index; bdcLines.Text = value.text; index++; txtFormattedLines.Add(bdcLines); } } ExtractedAuditLetterText extractedTxtedTexts = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); var littleImages = doc.ExtractImages(); foreach (var img in littleImages) { var lines = Utilities.ExtractLinesFromImage(img); foreach (var line in lines) { string other = string.Join(" ", line.Words.Select(r => r.Text)); if (other == "auren") { other = "Auren"; } extractedTexts.Others.Add(other); //Console.WriteLine(other); } } //foreach(int i in txtFormattedNum) { // var littleImages = imgDoc.Pages[i].ExtractImages(); // foreach(var img in littleImages) { // var lines = Utilities.ExtractLinesFromImage(img); // foreach(var line in lines) { // extractedTexts.Others.Add(string.Join(" ", line.Words.Select(r => r.Text))); // Console.WriteLine(string.Join(" ", line.Words.Select(r => r.Text))); // } // } //} //string serialzedAuditLetterTxtedText = extractedTxtedTexts.SerializeExtractedAuditLetterText(); //extractedAuditLetterText = extractedTxtedTexts; //return serialzedAuditLetterTxtedText; } /// <summary> /// Get text from txt-image-formatted pdf /// </summary> if (imageFormattedNum.Count != 0) { int titlePageStartIndexNum = 0; int titlePageEndIndexNum = -1; List <Image> imagePdf = new List <Image>(); PdfiumViewer.PdfDocument document = PdfiumViewer.PdfDocument.Load(new MemoryStream(pdfFile)); foreach (int i in imageFormattedNum) { Image image = document.Render(i, Consts.DpiX, Consts.DpiY, PdfRenderFlags.CorrectFromDpi); imagePdf.Add(image); } List <List <Line> > linesOfImagePdf = new List <List <Line> >(); foreach (var img in imagePdf) { linesOfImagePdf.Add(Utilities.ExtractLinesFromImage(img)); } extractedTexts.Others.AddRange(Utilities.GetRedundantLines(imagePdf, linesOfImagePdf)); StringBuilder sb = new StringBuilder(); for (int i = 0; i < imagePdf.Count; i++) { //Add small region text to ExtractedAuditLetterText.others extractedTexts.Others.AddRange(Utilities.RemoveSmallRegion(imagePdf[i])); //Get raw context in main body List <TraversedLine> rawImageTexts = PdfExtractor.Utilities.GetContents(imagePdf[i]); foreach (var line in rawImageTexts) { if (line.Text.IndexOf("sha-", StringComparison.OrdinalIgnoreCase) >= 0 || (line.Text.IndexOf("thumb", StringComparison.OrdinalIgnoreCase) >= 0)) { line.Text = line.Text.Replace("O", "0").Replace("o", "0").Replace("i", "1").Replace("I", "1").Replace("l", "1"); } } //Merge raw context by paragraph List <MergedTraversedLine> mergedImageTexts = PdfExtractor.Utilities.MergeTraversedLines(rawImageTexts); titlePageEndIndexNum += mergedImageTexts.Count; List <MergedTraversedLine> titleLines = mergedImageTexts.Where(x => x.IsTitle == true).ToList <MergedTraversedLine>(); //If this is a title page, add title page start index and end index to ExtractedAuditLetterText.TitlePageRanges //If this is a title page, add the title index to ExtractedAuditLetterText.TitleRanges if (titleLines.Count > 0) { extractedImageTexts.TitlePageRanges.Add(new KeyValuePair <int, int>(titlePageStartIndexNum, titlePageEndIndexNum)); foreach (MergedTraversedLine title in titleLines) { extractedImageTexts.TitleRanges.Add(new KeyValuePair <int, int>(title.Index + titlePageStartIndexNum, title.Index + titlePageStartIndexNum)); } } titlePageStartIndexNum += mergedImageTexts.Count; //Add titles, contents ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(mergedImageTexts); extractedImageTexts.Titles.AddRange(tempText.Titles); extractedImageTexts.Contents.AddRange(tempText.Contents); } } if (imageFormattedNum.Count == 0) { ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); extractedTexts.Titles.AddRange(tempText.Titles); extractedTexts.Contents.AddRange(tempText.Contents); StringBuilder sb = new StringBuilder(); sb.Append(num + "Ocr: " + ts.TotalMilliseconds.ToString() + " "); string serialzedAuditLetterText2 = extractedTexts.SerializeExtractedAuditLetterText(); extractedAuditLetterText = extractedTexts; DateTime dt2 = System.DateTime.Now; ts = dt2.Subtract(dt1); sb.Append(" Reader: " + ts.TotalMilliseconds.ToString() + "\n"); string fileTime = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\comTextTime.txt"; File.AppendAllText(fileTime, sb.ToString()); return(serialzedAuditLetterText2); } else if (txtFormattedNum.Count == 0) { extractedTexts.Titles.AddRange(extractedImageTexts.Titles); extractedTexts.Contents.AddRange(extractedImageTexts.Contents); extractedTexts.TitleRanges.AddRange(extractedImageTexts.TitleRanges); } else { if (txtFormattedNum[0] < imageFormattedNum[0]) { ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); extractedTexts.Titles.AddRange(tempText.Titles); extractedTexts.Contents.AddRange(tempText.Contents); foreach (var line in extractedImageTexts.Titles) { extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.Contents) { extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.TitleRanges) { extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value)); } } else { foreach (var line in extractedImageTexts.Titles) { extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.Contents) { extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.TitleRanges) { extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value)); } ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); extractedTexts.Titles.AddRange(tempText.Titles); extractedTexts.Contents.AddRange(tempText.Contents); } } string serialzedAuditLetterText = extractedTexts.SerializeExtractedAuditLetterText(); extractedAuditLetterText = extractedTexts; return(serialzedAuditLetterText); }