Exemplo n.º 1
0
 public ImportPage(DiscoDataContext Database, string SessionId, PdfDocument PdfiumDocument, int PageIndex)
 {
     this.Database = Database;
     this.SessionId = SessionId;
     this.PdfiumDocument = PdfiumDocument;
     this.PageIndex = PageIndex;
 }
Exemplo n.º 2
0
        private void createPDF(Document document)
        {
            Path.GetDirectoryName(typeof(int).Assembly.Location);
            try
            {
                // Create a renderer for the MigraDoc document.
                var pdfRenderer = new PdfDocumentRenderer(false)
                {
                    Document = document
                };
                // Layout and render document to PDF.
                pdfRenderer.RenderDocument();
                // Save the document...
                using (pdf = new MemoryStream())
                {
                    pdfRenderer.PdfDocument.Save(pdf);
                    var pdfReader = new MemoryStream(pdf.GetBuffer());
                    pdfDocument = PdfiumViewer.PdfDocument.Load(pdfReader);


                    pdfViewerShape.Renderer.Load(pdfDocument);   // the Problem here cant Found pdfium.dll
                    int pagecount = pdfDocument.PageCount;
                }
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.ToString());
            }
        }
Exemplo n.º 3
0
        public PdfPrintDocument(PdfDocument document, PdfPrintMode printMode)
        {
            if (document == null)
                throw new ArgumentNullException("document");

            _document = document;
            _printMode = printMode;
        }
Exemplo n.º 4
0
 private static void SavePng(PdfDocument document, int pageNumber, string destFileName)
 {
     SizeF sizeInPoints = document.PageSizes[pageNumber];
     int widthInPixels = (int)Math.Round(sizeInPoints.Width * (float)dpi / 72F);
     int heightInPixels = (int)Math.Round(sizeInPoints.Height * (float)dpi / 72F);
     using (Image image = document.Render(pageNumber, widthInPixels, heightInPixels, dpi, dpi, true)) {
         image.Save(destFileName, ImageFormat.Png);
     }
 }
Exemplo n.º 5
0
        private void loadPdf(string pdfPath, bool changeOriginal)
        {
            PdfiumViewer.PdfDocument pdfiumDoc = PdfiumViewer.PdfDocument.Load(pdfPath);
            pdfRenderer1.Load(pdfiumDoc);
            pdfRenderer1.Show();

            currentFile = pdfPath;
            if (changeOriginal)
            {
                originalFile = pdfPath;
            }

            this.Text = "BetaSign - " + originalFile;

            reader = new PdfReader(pdfPath);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Load a <see cref="PdfDocument"/> into the control.
        /// </summary>
        /// <param name="document">Document to load.</param>
        public void Load(PdfDocument document)
        {
            if (document == null)
                throw new ArgumentNullException("document");
            if (document.PageCount == 0)
                throw new ArgumentException("Document does not contain any pages", "document");

            _document = document;

            SetDisplayRectLocation(new Point(0, 0));

            ReloadDocument();
        }
Exemplo n.º 7
0
        public PageYielder(string path) {
            doc = PdfDocument.Load(path);
            preRender();

        }
Exemplo n.º 8
0
        public static string ExtractTextFromRawPdf(byte[] pdfFile, out ExtractedAuditLetterText extractedAuditLetterText, TimeSpan ts, DateTime dt1, int num)
        {
            //const string testReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testread.txt";
            //const string testimageReadPath = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\testimageread.txt";



            List <MergedTraversedLine> txtFormattedLines   = new List <MergedTraversedLine>();
            List <MergedTraversedLine> imageFormattedLines = new List <MergedTraversedLine>();
            List <int> txtFormattedNum   = new List <int>();
            List <int> imageFormattedNum = new List <int>();

            //try {
            //    pdfRead.pdfObject.PdfDocument document = new pdfRead.pdfObject.PdfDocument(pdfFile);
            //} catch(Exception e) {
            //    throw e;
            //}

            //Merge image reader


            Spire.Pdf.PdfDocument imgDoc = new Spire.Pdf.PdfDocument(pdfFile);

            pdfRead.pdfObject.PdfDocument doc = new pdfRead.pdfObject.PdfDocument(pdfFile);
            List <PdfTextLine>            textLines;

            doc.PageTextLine(doc.pages.Count - 1);
            for (int i = 0; i < doc.pages.Count; i++)
            {
                textLines = doc.PageTextLine(i);
                if (textLines.Count > 2)
                {
                    txtFormattedNum.Add(i);
                }
                else
                {
                    imageFormattedNum.Add(i);
                }
            }


            ExtractedAuditLetterText extractedTexts      = new ExtractedAuditLetterText();
            ExtractedAuditLetterText extractedImageTexts = new ExtractedAuditLetterText();


            /// <summary>
            /// Get text from txt-formatted pdf
            /// </summary>
            if (txtFormattedNum.Count != 0)
            {
                //Get lines from pdf and they are grouped by their fontsizes.
                Dictionary <double, List <PdfTextLine> > linesKeyValue = doc.lineFontSize;
                int index = 0;
                foreach (var key in linesKeyValue)
                {
                    foreach (var value in key.Value)
                    {
                        var bdcLines = new MergedTraversedLine();
                        bdcLines.Index = index;
                        bdcLines.Text  = value.text;
                        index++;
                        txtFormattedLines.Add(bdcLines);
                    }
                }
                ExtractedAuditLetterText extractedTxtedTexts = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                var littleImages = doc.ExtractImages();
                foreach (var img in littleImages)
                {
                    var lines = Utilities.ExtractLinesFromImage(img);
                    foreach (var line in lines)
                    {
                        string other = string.Join(" ", line.Words.Select(r => r.Text));
                        if (other == "auren")
                        {
                            other = "Auren";
                        }
                        extractedTexts.Others.Add(other);
                        //Console.WriteLine(other);
                    }
                }

                //foreach(int i in txtFormattedNum) {
                //    var littleImages = imgDoc.Pages[i].ExtractImages();
                //    foreach(var img in littleImages) {
                //        var lines = Utilities.ExtractLinesFromImage(img);
                //        foreach(var line in lines) {
                //            extractedTexts.Others.Add(string.Join(" ", line.Words.Select(r => r.Text)));
                //            Console.WriteLine(string.Join(" ", line.Words.Select(r => r.Text)));
                //        }
                //    }

                //}


                //string serialzedAuditLetterTxtedText = extractedTxtedTexts.SerializeExtractedAuditLetterText();
                //extractedAuditLetterText = extractedTxtedTexts;
                //return serialzedAuditLetterTxtedText;
            }


            /// <summary>
            /// Get text from txt-image-formatted pdf
            /// </summary>
            if (imageFormattedNum.Count != 0)
            {
                int titlePageStartIndexNum = 0;
                int titlePageEndIndexNum   = -1;

                List <Image>             imagePdf = new List <Image>();
                PdfiumViewer.PdfDocument document = PdfiumViewer.PdfDocument.Load(new MemoryStream(pdfFile));
                foreach (int i in imageFormattedNum)
                {
                    Image image = document.Render(i, Consts.DpiX, Consts.DpiY, PdfRenderFlags.CorrectFromDpi);
                    imagePdf.Add(image);
                }

                List <List <Line> > linesOfImagePdf = new List <List <Line> >();
                foreach (var img in imagePdf)
                {
                    linesOfImagePdf.Add(Utilities.ExtractLinesFromImage(img));
                }

                extractedTexts.Others.AddRange(Utilities.GetRedundantLines(imagePdf, linesOfImagePdf));

                StringBuilder sb = new StringBuilder();
                for (int i = 0; i < imagePdf.Count; i++)
                {
                    //Add small region text to ExtractedAuditLetterText.others
                    extractedTexts.Others.AddRange(Utilities.RemoveSmallRegion(imagePdf[i]));

                    //Get raw context in main body
                    List <TraversedLine> rawImageTexts = PdfExtractor.Utilities.GetContents(imagePdf[i]);

                    foreach (var line in rawImageTexts)
                    {
                        if (line.Text.IndexOf("sha-", StringComparison.OrdinalIgnoreCase) >= 0 || (line.Text.IndexOf("thumb", StringComparison.OrdinalIgnoreCase) >= 0))
                        {
                            line.Text = line.Text.Replace("O", "0").Replace("o", "0").Replace("i", "1").Replace("I", "1").Replace("l", "1");
                        }
                    }

                    //Merge raw context by paragraph
                    List <MergedTraversedLine> mergedImageTexts = PdfExtractor.Utilities.MergeTraversedLines(rawImageTexts);

                    titlePageEndIndexNum += mergedImageTexts.Count;
                    List <MergedTraversedLine> titleLines = mergedImageTexts.Where(x => x.IsTitle == true).ToList <MergedTraversedLine>();

                    //If this is a title page, add title page start index and end index to ExtractedAuditLetterText.TitlePageRanges
                    //If this is a title page, add the title index to ExtractedAuditLetterText.TitleRanges
                    if (titleLines.Count > 0)
                    {
                        extractedImageTexts.TitlePageRanges.Add(new KeyValuePair <int, int>(titlePageStartIndexNum, titlePageEndIndexNum));
                        foreach (MergedTraversedLine title in titleLines)
                        {
                            extractedImageTexts.TitleRanges.Add(new KeyValuePair <int, int>(title.Index + titlePageStartIndexNum, title.Index + titlePageStartIndexNum));
                        }
                    }
                    titlePageStartIndexNum += mergedImageTexts.Count;

                    //Add titles, contents
                    ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(mergedImageTexts);

                    extractedImageTexts.Titles.AddRange(tempText.Titles);
                    extractedImageTexts.Contents.AddRange(tempText.Contents);
                }
            }

            if (imageFormattedNum.Count == 0)
            {
                ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                extractedTexts.Titles.AddRange(tempText.Titles);
                extractedTexts.Contents.AddRange(tempText.Contents);
                StringBuilder sb = new StringBuilder();
                sb.Append(num + "Ocr:     " + ts.TotalMilliseconds.ToString() + "  ");

                string serialzedAuditLetterText2 = extractedTexts.SerializeExtractedAuditLetterText();
                extractedAuditLetterText = extractedTexts;
                DateTime dt2 = System.DateTime.Now;
                ts = dt2.Subtract(dt1);

                sb.Append("   Reader:     " + ts.TotalMilliseconds.ToString() + "\n");
                string fileTime = @"C:\Users\t-holu\Documents\AuditLetter\JixiProjectData\comTextTime.txt";
                File.AppendAllText(fileTime, sb.ToString());
                return(serialzedAuditLetterText2);
            }
            else if (txtFormattedNum.Count == 0)
            {
                extractedTexts.Titles.AddRange(extractedImageTexts.Titles);
                extractedTexts.Contents.AddRange(extractedImageTexts.Contents);
                extractedTexts.TitleRanges.AddRange(extractedImageTexts.TitleRanges);
            }
            else
            {
                if (txtFormattedNum[0] < imageFormattedNum[0])
                {
                    ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                    extractedTexts.Titles.AddRange(tempText.Titles);
                    extractedTexts.Contents.AddRange(tempText.Contents);
                    foreach (var line in extractedImageTexts.Titles)
                    {
                        extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.Contents)
                    {
                        extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.TitleRanges)
                    {
                        extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value));
                    }
                }
                else
                {
                    foreach (var line in extractedImageTexts.Titles)
                    {
                        extractedTexts.Titles.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.Contents)
                    {
                        extractedTexts.Contents.Add(new KeyValuePair <int, string>(line.Key, line.Value));
                    }
                    foreach (var line in extractedImageTexts.TitleRanges)
                    {
                        extractedTexts.TitleRanges.Add(new KeyValuePair <int, int>(line.Key, line.Value));
                    }
                    ExtractedAuditLetterText tempText = PdfExtractor.Utilities.ConvertToExtractedAuditLetterTexts(txtFormattedLines);
                    extractedTexts.Titles.AddRange(tempText.Titles);
                    extractedTexts.Contents.AddRange(tempText.Contents);
                }
            }

            string serialzedAuditLetterText = extractedTexts.SerializeExtractedAuditLetterText();

            extractedAuditLetterText = extractedTexts;

            return(serialzedAuditLetterText);
        }