public ImportPage(DiscoDataContext Database, string SessionId, PdfDocument PdfiumDocument, int PageIndex) { this.Database = Database; this.SessionId = SessionId; this.PdfiumDocument = PdfiumDocument; this.PageIndex = PageIndex; }
private void createPDF(Document document) { Path.GetDirectoryName(typeof(int).Assembly.Location); try { // Create a renderer for the MigraDoc document. var pdfRenderer = new PdfDocumentRenderer(false) { Document = document }; // Layout and render document to PDF. pdfRenderer.RenderDocument(); // Save the document... using (pdf = new MemoryStream()) { pdfRenderer.PdfDocument.Save(pdf); var pdfReader = new MemoryStream(pdf.GetBuffer()); pdfDocument = PdfiumViewer.PdfDocument.Load(pdfReader); pdfViewerShape.Renderer.Load(pdfDocument); // the Problem here cant Found pdfium.dll int pagecount = pdfDocument.PageCount; } } catch (Exception ex) { MessageBox.Show(ex.ToString()); } }
public PdfPrintDocument(PdfDocument document, PdfPrintMode printMode) { if (document == null) throw new ArgumentNullException("document"); _document = document; _printMode = printMode; }
private static void SavePng(PdfDocument document, int pageNumber, string destFileName) { SizeF sizeInPoints = document.PageSizes[pageNumber]; int widthInPixels = (int)Math.Round(sizeInPoints.Width * (float)dpi / 72F); int heightInPixels = (int)Math.Round(sizeInPoints.Height * (float)dpi / 72F); using (Image image = document.Render(pageNumber, widthInPixels, heightInPixels, dpi, dpi, true)) { image.Save(destFileName, ImageFormat.Png); } }
private void loadPdf(string pdfPath, bool changeOriginal) { PdfiumViewer.PdfDocument pdfiumDoc = PdfiumViewer.PdfDocument.Load(pdfPath); pdfRenderer1.Load(pdfiumDoc); pdfRenderer1.Show(); currentFile = pdfPath; if (changeOriginal) { originalFile = pdfPath; } this.Text = "BetaSign - " + originalFile; reader = new PdfReader(pdfPath); }
/// <summary> /// Load a <see cref="PdfDocument"/> into the control. /// </summary> /// <param name="document">Document to load.</param> public void Load(PdfDocument document) { if (document == null) throw new ArgumentNullException("document"); if (document.PageCount == 0) throw new ArgumentException("Document does not contain any pages", "document"); _document = document; SetDisplayRectLocation(new Point(0, 0)); ReloadDocument(); }
public PageYielder(string path) { doc = PdfDocument.Load(path); preRender(); }
public static string ExtractTextFromRawPdf(byte[] pdfFile, out ExtractedAuditLetterText extractedAuditLetterText, TimeSpan ts, DateTime dt1, int num) { //const string testReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testread.txt"; //const string testimageReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testimageread.txt"; List <MergedTraversedLine> txtFormattedLines = new List <MergedTraversedLine>(); List <MergedTraversedLine> imageFormattedLines = new List <MergedTraversedLine>(); List <int> txtFormattedNum = new List <int>(); List <int> imageFormattedNum = new List <int>(); //try { // pdfRead.pdfObject.PdfDocument document = new pdfRead.pdfObject.PdfDocument(pdfFile); //} catch(Exception e) { // throw e; //} //Merge image reader Spire.Pdf.PdfDocument imgDoc = new Spire.Pdf.PdfDocument(pdfFile); pdfRead.pdfObject.PdfDocument doc = new pdfRead.pdfObject.PdfDocument(pdfFile); List <PdfTextLine> textLines; doc.PageTextLine(doc.pages.Count - 1); for (int i = 0; i < doc.pages.Count; i++) { textLines = doc.PageTextLine(i); if (textLines.Count > 2) { txtFormattedNum.Add(i); } else { imageFormattedNum.Add(i); } } ExtractedAuditLetterText extractedTexts = new ExtractedAuditLetterText(); ExtractedAuditLetterText extractedImageTexts = new ExtractedAuditLetterText(); /// <summary> /// Get text from txt-formatted pdf /// </summary> if (txtFormattedNum.Count != 0) { //Get lines from pdf and they are grouped by their fontsizes. Dictionary <double, List <PdfTextLine> > linesKeyValue = doc.lineFontSize; int index = 0; foreach (var key in linesKeyValue) { foreach (var value in key.Value) { var bdcLines = new MergedTraversedLine(); bdcLines.Index = index; bdcLines.Text = value.text; index++; txtFormattedLines.Add(bdcLines); } } ExtractedAuditLetterText extractedTxtedTexts = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); var littleImages = doc.ExtractImages(); foreach (var img in littleImages) { var lines = Utilities.ExtractLinesFromImage(img); foreach (var line in lines) { string other = string.Join(" ", line.Words.Select(r => r.Text)); if (other == "auren") { other = "Auren"; } extractedTexts.Others.Add(other); //Console.WriteLine(other); } } //foreach(int i in txtFormattedNum) { // var littleImages = imgDoc.Pages[i].ExtractImages(); // foreach(var img in littleImages) { // var lines = Utilities.ExtractLinesFromImage(img); // foreach(var line in lines) { // extractedTexts.Others.Add(string.Join(" ", line.Words.Select(r => r.Text))); // Console.WriteLine(string.Join(" ", line.Words.Select(r => r.Text))); // } // } //} //string serialzedAuditLetterTxtedText = extractedTxtedTexts.SerializeExtractedAuditLetterText(); //extractedAuditLetterText = extractedTxtedTexts; //return serialzedAuditLetterTxtedText; } /// <summary> /// Get text from txt-image-formatted pdf /// </summary> if (imageFormattedNum.Count != 0) { int titlePageStartIndexNum = 0; int titlePageEndIndexNum = -1; List <Image> imagePdf = new List <Image>(); PdfiumViewer.PdfDocument document = PdfiumViewer.PdfDocument.Load(new MemoryStream(pdfFile)); foreach (int i in imageFormattedNum) { Image image = document.Render(i, Consts.DpiX, Consts.DpiY, PdfRenderFlags.CorrectFromDpi); imagePdf.Add(image); } List <List <Line> > linesOfImagePdf = new List <List <Line> >(); foreach (var img in imagePdf) { linesOfImagePdf.Add(Utilities.ExtractLinesFromImage(img)); } extractedTexts.Others.AddRange(Utilities.GetRedundantLines(imagePdf, linesOfImagePdf)); StringBuilder sb = new StringBuilder(); for (int i = 0; i < imagePdf.Count; i++) { //Add small region text to ExtractedAuditLetterText.others extractedTexts.Others.AddRange(Utilities.RemoveSmallRegion(imagePdf[i])); //Get raw context in main body List <TraversedLine> rawImageTexts = PdfExtractor.Utilities.GetContents(imagePdf[i]); foreach (var line in rawImageTexts) { if (line.Text.IndexOf("sha-", StringComparison.OrdinalIgnoreCase) >= 0 || (line.Text.IndexOf("thumb", StringComparison.OrdinalIgnoreCase) >= 0)) { line.Text = line.Text.Replace("O", "0").Replace("o", "0").Replace("i", "1").Replace("I", "1").Replace("l", "1"); } } //Merge raw context by paragraph List <MergedTraversedLine> mergedImageTexts = PdfExtractor.Utilities.MergeTraversedLines(rawImageTexts); titlePageEndIndexNum += mergedImageTexts.Count; List <MergedTraversedLine> titleLines = mergedImageTexts.Where(x => x.IsTitle == true).ToList <MergedTraversedLine>(); //If this is a title page, add title page start index and end index to ExtractedAuditLetterText.TitlePageRanges //If this is a title page, add the title index to ExtractedAuditLetterText.TitleRanges if (titleLines.Count > 0) { extractedImageTexts.TitlePageRanges.Add(new KeyValuePair <int, int>(titlePageStartIndexNum, titlePageEndIndexNum)); foreach (MergedTraversedLine title in titleLines) { extractedImageTexts.TitleRanges.Add(new KeyValuePair <int, int>(title.Index + titlePageStartIndexNum, title.Index + titlePageStartIndexNum)); } } titlePageStartIndexNum += mergedImageTexts.Count; //Add titles, contents ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(mergedImageTexts); extractedImageTexts.Titles.AddRange(tempText.Titles); extractedImageTexts.Contents.AddRange(tempText.Contents); } } if (imageFormattedNum.Count == 0) { ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); extractedTexts.Titles.AddRange(tempText.Titles); extractedTexts.Contents.AddRange(tempText.Contents); StringBuilder sb = new StringBuilder(); sb.Append(num + "Ocr: " + ts.TotalMilliseconds.ToString() + " "); string serialzedAuditLetterText2 = extractedTexts.SerializeExtractedAuditLetterText(); extractedAuditLetterText = extractedTexts; DateTime dt2 = System.DateTime.Now; ts = dt2.Subtract(dt1); sb.Append(" Reader: " + ts.TotalMilliseconds.ToString() + "\n"); string fileTime = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\comTextTime.txt"; File.AppendAllText(fileTime, sb.ToString()); return(serialzedAuditLetterText2); } else if (txtFormattedNum.Count == 0) { extractedTexts.Titles.AddRange(extractedImageTexts.Titles); extractedTexts.Contents.AddRange(extractedImageTexts.Contents); extractedTexts.TitleRanges.AddRange(extractedImageTexts.TitleRanges); } else { if (txtFormattedNum[0] < imageFormattedNum[0]) { ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); extractedTexts.Titles.AddRange(tempText.Titles); extractedTexts.Contents.AddRange(tempText.Contents); foreach (var line in extractedImageTexts.Titles) { extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.Contents) { extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.TitleRanges) { extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value)); } } else { foreach (var line in extractedImageTexts.Titles) { extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.Contents) { extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value)); } foreach (var line in extractedImageTexts.TitleRanges) { extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value)); } ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines); extractedTexts.Titles.AddRange(tempText.Titles); extractedTexts.Contents.AddRange(tempText.Contents); } } string serialzedAuditLetterText = extractedTexts.SerializeExtractedAuditLetterText(); extractedAuditLetterText = extractedTexts; return(serialzedAuditLetterText); }