Exemplo n.º 1
0
        protected override void Index()
        {
            var startTime = DateTime.Now;

            Console.Write($"Indexing {Name}");

            #region PDF
            // Get the PDF document from a `FileStream` via `PdfReader`.
            var pdfDocument =
                new Pdf.PdfDocument(new Pdf.PdfReader(new FileStream(Location, FileMode.Open, FileAccess.Read)));
            var totalPageNumber = pdfDocument.GetNumberOfPages();

            for (var i = 1; i <= totalPageNumber; i++)
            {
                // parser.ProcessPageContent(pdfDocument.GetPage(i+1));
                // var text = strategy.GetResultantText();
                var text = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(i));

                this.AddToIndex(texts: text);
                // parser.Reset();
            }
            #endregion

            Console.Write($" >==> {Thumbnail.Count} unique words. {(DateTime.Now - startTime).TotalMilliseconds}ms\n");
        }
Exemplo n.º 2
0
        //////////////////////////////////////////////////////////
        //// public properties
        #region public properties

        /// <summary>
        /// Convert the Portable Document to text.
        /// </summary>
        /// <param name="file">The portable document file stream.</param>
        /// <param name="errors">Collection of errors that occur during reading the file stream.  Null if sucessful.</param>
        /// <returns>The text contents of the file.</returns>
        public static string Convert(Stream file, out List <Exception> errors)
        {
            StringBuilder result = new StringBuilder();

            errors = null;

            using (iText7.PdfReader reader = new iText7.PdfReader(file)) {
                using (iText7.PdfDocument doc = new iText7.PdfDocument(reader))
                {
                    int numberOfPages = doc.GetNumberOfPages();
                    for (int i = 1; i <= numberOfPages; i++)
                    {
                        try
                        {
                            var    page     = doc.GetPage(i);
                            string pagetext = iText7.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page);
                            result.Append(Common.CleanPdfText(pagetext));
                        }
                        catch (Exception e)
                        {
                            if (errors == null)
                            {
                                errors = new List <Exception>();
                            }
                            errors.Add(e);
                        }
                    }
                }
            }

            return(result.ToString());
        }
Exemplo n.º 3
0
        private async Task <string> pdfTextExtract(string sFilePath)
        {
            string texto;

            try
            {
                PdfReader reader = new PdfReader(sFilePath);
                iText.Kernel.Pdf.PdfDocument pdf = new iText.Kernel.Pdf.PdfDocument(reader);
                texto = string.Empty;
                for (int page = 1; page <= pdf.GetNumberOfPages(); page++)
                {
                    ITextExtractionStrategy its = new SimpleTextExtractionStrategy();
                    String s = PdfTextExtractor.GetTextFromPage(pdf.GetPage(page), its);
                    //s = System.Text.Encoding.UTF8.GetString(ASCIIEncoding.Convert(System.Text.Encoding.Default, System.Text.Encoding.UTF8, System.Text.Encoding.Default.GetBytes(s)));
                    texto = texto + s;
                }
                reader.Close();
            }
            catch (Exception Ex)
            {
                await new MessageDialog("Error al abrir archivo: " + Ex.Message).ShowAsync();
                return(null);
            }
            return(texto);
        }
Exemplo n.º 4
0
        public static byte[] Combine(IEnumerable <byte[]> pdfs)
        {
            using (var writerMemoryStream = new MemoryStream())
            {
                using (var writer = new PdfWriter(writerMemoryStream))
                {
                    using (var mergedDocument = new iText.Kernel.Pdf.PdfDocument(writer))
                    {
                        var merger = new PdfMerger(mergedDocument);

                        foreach (var pdfBytes in pdfs)
                        {
                            using (var copyFromMemoryStream = new MemoryStream(pdfBytes))
                            {
                                using (var reader = new iText.Kernel.Pdf.PdfReader(copyFromMemoryStream))
                                {
                                    //have to set unethical reading to true else will get password error
                                    reader.SetUnethicalReading(true);
                                    using (var copyFromDocument = new iText.Kernel.Pdf.PdfDocument(reader))
                                    {
                                        //second parameter 1 is page number from where to start merge
                                        merger.Merge(copyFromDocument, 1, copyFromDocument.GetNumberOfPages());
                                    }
                                }
                            }
                        }
                    }
                }
                return(writerMemoryStream.ToArray());
            }
        }
Exemplo n.º 5
0
        public static void AddPages()
        {
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(PATH), new PdfWriter(new FileStream(PATH_PAGED, FileMode.Create, FileAccess.Write)));
            Document    doc         = new Document(pdfDocument);

            int numberOfPages = pdfDocument.GetNumberOfPages();
            var size          = pdfDocument.GetPage(1).GetPageSize();


            for (int i = 1; i <= numberOfPages; i++)
            {
                // Write aligned text to the specified by parameters point
                doc.ShowTextAligned(new Paragraph("Strona " + i + " z " + numberOfPages),
                                    size.GetWidth() - 50, 20, i, TextAlignment.RIGHT, VerticalAlignment.BOTTOM, 0);
            }

            doc.Close();
        }
Exemplo n.º 6
0
        /// <summary>
        /// Add Page number at top of pdf file.
        /// </summary>
        /// <param name="fileName"></param>
        /// <returns> filename saved as</returns>
        public static string AddPageNumberToPdf(string fileName)
        {
            using PdfReader reader = new PdfReader(fileName);
            string fName = "cashBook_" + (DateTime.Now.ToFileTimeUtc() + 1001) + ".pdf";

            using PdfWriter writer = new PdfWriter(Path.Combine(ReportHeaderDetails.WWWroot, fName));

            using PdfDocument pdfDoc2 = new PdfDocument(reader, writer);
            Document doc2 = new Document(pdfDoc2);

            int numberOfPages = pdfDoc2.GetNumberOfPages();

            for (int i = 1; i <= numberOfPages; i++)
            {
                doc2.ShowTextAligned(new Paragraph("Page " + i + " of " + numberOfPages),
                                     559, 806, i, TextAlignment.RIGHT, VerticalAlignment.BOTTOM, 0);
            }
            doc2.Close();
            return(fName);
        }
Exemplo n.º 7
0
 public string GetTextFromPDF(string url)
 {
     try
     {
         StringBuilder ster = new StringBuilder();
         //DocumentModel document = DocumentModel.Load(url);
         PdfReader read = new PdfReader(url);
         iText.Kernel.Pdf.PdfDocument doc = new iText.Kernel.Pdf.PdfDocument(read);
         //foreach (var page in doc.GetPage(int page))
         for (int i = 0; i <= doc.GetNumberOfPages(); i++)
         {
             var word = ster.Append(PdfTextExtractor.GetTextFromPage(doc.GetPage(i)));
             text = word.ToString();
         }
         return(text);
     }
     catch (Exception)
     {
         text = "Error reading file";
         return(text);
     }
 }
Exemplo n.º 8
0
        /// <summary>
        /// An enumeration of paragraphs of the portable document.
        /// </summary>
        /// <param name="file">The portable document file stream.</param>
        /// <returns>A <see cref="IEnumerable{T}"/> of paragraphs.</returns>
        public static IEnumerable <string> Paragraphs(Stream file)
        {
            using (iText7.PdfReader reader = new iText7.PdfReader(file))
            {
                using (iText7.PdfDocument doc = new iText7.PdfDocument(reader))
                {
                    int numberOfPages = doc.GetNumberOfPages();
                    for (int i = 1; i <= numberOfPages; i++)
                    {
                        iText7.PdfPage page     = doc.GetPage(i);
                        string         pagetext = iText7.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page);
                        pagetext = Common.CleanPdfText(pagetext);

                        // Parse paragraphs.
                        IEnumerable <string> paragraphs = pagetext.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
                        foreach (string item in paragraphs)
                        {
                            yield return(item);
                        }
                    }
                }
            }
        }
Exemplo n.º 9
0
        public static string PrintCashBook(List <CashBook> cbList)
        {
            string fName = "cashBook_" + DateTime.Now.ToFileTimeUtc() + ".pdf";

            string fileName = Path.Combine(ReportHeaderDetails.WWWroot, fName);

            using PdfWriter pdfWriter = new PdfWriter(fileName);
            using PdfDocument pdfDoc  = new PdfDocument(pdfWriter);
            using Document doc        = new Document(pdfDoc, PageSize.A4);
            Paragraph header = new Paragraph(ReportHeaderDetails.FirstLine + "\n")
                               .SetTextAlignment(iText.Layout.Properties.TextAlignment.CENTER)
                               .SetFontColor(ColorConstants.RED);

            header.Add(ReportHeaderDetails.SecondLine + "\n");
            doc.Add(header);
            float[] columnWidths = { 1, 5, 15, 5, 5, 5 };
            Table   table        = new Table(UnitValue.CreatePercentArray(columnWidths)).SetBorder(new OutsetBorder(2));

            PdfFont f    = PdfFontFactory.CreateFont(StandardFonts.HELVETICA);
            Cell    cell = new Cell(1, 6)
                           .Add(new Paragraph(ReportHeaderDetails.CashBook))
                           .SetFont(f)
                           .SetFontSize(13)
                           .SetFontColor(DeviceGray.WHITE)
                           .SetBackgroundColor(DeviceGray.BLACK)
                           .SetTextAlignment(TextAlignment.CENTER);

            table.AddHeaderCell(cell);

            Cell[] headerFooter = new Cell[] {
                new Cell().SetBackgroundColor(new DeviceGray(0.75f)).Add(new Paragraph("#")),
                new Cell().SetBackgroundColor(new DeviceGray(0.75f)).Add(new Paragraph("Date").SetTextAlignment(TextAlignment.CENTER)),
                new Cell().SetBackgroundColor(new DeviceGray(0.75f)).Add(new Paragraph("Particulars").SetTextAlignment(TextAlignment.CENTER)),
                new Cell().SetBackgroundColor(new DeviceGray(0.75f)).Add(new Paragraph("In").SetTextAlignment(TextAlignment.CENTER)),
                new Cell().SetBackgroundColor(new DeviceGray(0.75f)).Add(new Paragraph("Out").SetTextAlignment(TextAlignment.CENTER)),
                new Cell().SetBackgroundColor(new DeviceGray(0.75f)).Add(new Paragraph("Balance").SetTextAlignment(TextAlignment.CENTER))
            };

            Cell[] footer = new[]
            {
                new Cell(1, 4).Add(new Paragraph(ReportHeaderDetails.FirstLine + " / " + ReportHeaderDetails.SecondLine).SetFontColor(DeviceGray.GRAY)),
                new Cell(1, 2).Add(new Paragraph("D:" + DateTime.Now).SetFontColor(DeviceGray.GRAY)),
            };

            foreach (Cell hfCell in headerFooter)
            {
                table.AddHeaderCell(hfCell);
            }
            foreach (Cell hfCell in footer)
            {
                table.AddFooterCell(hfCell);
            }

            int count = 0;

            foreach (var item in cbList)

            {
                table.AddCell(new Cell().SetTextAlignment(TextAlignment.CENTER).Add(new Paragraph((++count) + "")));
                table.AddCell(new Cell().SetTextAlignment(TextAlignment.CENTER).Add(new Paragraph(item.EDate.ToShortDateString())));
                table.AddCell(new Cell().SetTextAlignment(TextAlignment.CENTER).Add(new Paragraph(item.Particulars + "")));
                table.AddCell(new Cell().SetTextAlignment(TextAlignment.CENTER).Add(new Paragraph(item.CashIn.ToString("0.##"))));
                table.AddCell(new Cell().SetTextAlignment(TextAlignment.CENTER).Add(new Paragraph(item.CashOut.ToString("0.##"))));
                table.AddCell(new Cell().SetTextAlignment(TextAlignment.CENTER).Add(new Paragraph(item.CashBalance.ToString("0.##"))));
            }
            doc.Add(table);

            doc.Close();

            using PdfReader reader = new PdfReader(fileName);
            fName = "cashBook_" + (DateTime.Now.ToFileTimeUtc() + 1001) + ".pdf";
            using PdfWriter writer = new PdfWriter(Path.Combine(ReportHeaderDetails.WWWroot, fName));

            using PdfDocument pdfDoc2 = new PdfDocument(reader, writer);
            Document doc2 = new Document(pdfDoc2);

            int numberOfPages = pdfDoc2.GetNumberOfPages();

            for (int i = 1; i <= numberOfPages; i++)
            {
                // Write aligned text to the specified by parameters point
                doc2.ShowTextAligned(new Paragraph("Page " + i + " of " + numberOfPages),
                                     559, 806, i, TextAlignment.RIGHT, VerticalAlignment.BOTTOM, 0);
            }

            doc2.Close();


            return(fName);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Updates the links.
        /// </summary>
        /// <param name="pdfFilePath">The PDF file path.</param>
        /// <param name="htmlToPdfFiles">The HTML to PDF files.</param>
        /// <param name="logger">The logger.</param>
        internal static void UpdateLinks(
            string pdfFilePath,
            IReadOnlyCollection <HtmlToPdfFile> htmlToPdfFiles,
            ILogger logger)
        {
            string tempFilePath = Path.GetTempFileName();

            using (PdfReader pdfReader = new PdfReader(pdfFilePath))
            {
                using (PdfWriter pdfWriter = new PdfWriter(tempFilePath))
                {
                    using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(pdfReader, pdfWriter))
                    {
                        int pageCount = pdfDocument.GetNumberOfPages();
                        for (int i = 1; i <= pageCount; i++)
                        {
                            // get page
                            PdfPage pdfPage = pdfDocument.GetPage(i);

                            // get link annotations
                            IEnumerable <PdfLinkAnnotation> linkAnnotations = pdfPage.GetAnnotations().OfType <PdfLinkAnnotation>();
                            foreach (PdfLinkAnnotation linkAnnotation in linkAnnotations)
                            {
                                // get action
                                PdfDictionary action = linkAnnotation.GetAction();
                                if (action == null)
                                {
                                    continue;
                                }

                                PdfName s = action.GetAsName(PdfName.S);
                                if (s != PdfName.URI)
                                {
                                    continue;
                                }

                                PdfString uriPdfString = action.GetAsString(PdfName.URI);
                                if (!Uri.TryCreate(uriPdfString.GetValue(), UriKind.RelativeOrAbsolute, out Uri uri))
                                {
                                    continue;
                                }

                                if (!uri.IsFile)
                                {
                                    continue;
                                }

                                string htmlFilePath = uri.LocalPath.ToLower();

                                if (!htmlToPdfFiles.Any(x => string.Compare(x.Input, htmlFilePath, StringComparison.OrdinalIgnoreCase) == 0))
                                {
                                    // ex. when printing PDF from TOC.html by itself
                                    logger.LogDebug($"Could not find '{htmlFilePath}'. Referenced in '{pdfFilePath}' on page {i}.");
                                    continue;
                                }

                                HtmlToPdfFile linkedHtmlToPdfFile = htmlToPdfFiles.Single(x => x.Input == htmlFilePath);
                                int           linkedPageNumber    = linkedHtmlToPdfFile.OutputPdfFilePageNumber;

                                PdfPage linkedPage;
                                try
                                {
                                    // http://api.itextpdf.com/itext/com/itextpdf/text/pdf/PdfDestination.html
                                    linkedPage = pdfDocument.GetPage(linkedPageNumber);
                                }
                                catch (Exception ex)
                                {
                                    throw new PdfPageNotFoundException(linkedPageNumber, linkedHtmlToPdfFile.Input, ex);
                                }

                                float top = linkedPage.GetPageSize().GetTop();
                                PdfExplicitDestination destination = PdfExplicitDestination.CreateFitH(linkedPage, top);
                                PdfAction newAction = PdfAction.CreateGoTo(destination);

                                linkAnnotation.SetAction(newAction);
                            }
                        }
                    }
                }
            }

            File.Delete(pdfFilePath);
            File.Move(tempFilePath, pdfFilePath);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Finds and sets the page numbers of links mapped to HTML headings in the specified PDF file.
        /// </summary>
        /// <param name="htmlToPdfFile">The HTML to PDF file.</param>
        internal static void SetHeadingPageNumbers(HtmlToPdfFile htmlToPdfFile)
        {
            using (PdfReader pdfReader = new PdfReader(htmlToPdfFile.PdfFilePath))
            {
                using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(pdfReader))
                {
                    int pageCount = pdfDocument.GetNumberOfPages();
                    for (int i = 1; i <= pageCount; i++)
                    {
                        // get page
                        PdfPage pdfPage = pdfDocument.GetPage(i);

                        // get link annotations
                        IEnumerable <PdfLinkAnnotation> linkAnnotations = pdfPage.GetAnnotations().OfType <PdfLinkAnnotation>();
                        foreach (PdfLinkAnnotation linkAnnotation in linkAnnotations)
                        {
                            // get action
                            PdfDictionary action = linkAnnotation.GetAction();
                            if (action == null)
                            {
                                continue;
                            }

                            PdfName s = action.GetAsName(PdfName.S);
                            if (s != PdfName.URI)
                            {
                                continue;
                            }

                            PdfString uriPdfString = action.GetAsString(PdfName.URI);
                            if (!Uri.TryCreate(uriPdfString.GetValue(), UriKind.RelativeOrAbsolute, out Uri uri))
                            {
                                continue;
                            }

                            if (!uri.IsFile)
                            {
                                continue;
                            }

                            // get query string
                            NameValueCollection queryString = HttpUtility.ParseQueryString(uri.Query);

                            // ex. ?headingLevel={level}&headingText
                            string headingLevel = queryString["headingLevel"];
                            if (headingLevel == null)
                            {
                                continue;
                            }

                            if (!int.TryParse(headingLevel, out int level))
                            {
                                continue;
                            }

                            string headingText = queryString["headingText"];
                            if (headingText == null)
                            {
                                continue;
                            }

                            HtmlHeading htmlHeading = htmlToPdfFile.TitleAndHeadings.SingleOrDefault(x => (x.Level == level) && (x.Text == headingText));
                            if (htmlHeading == null)
                            {
                                continue;
                            }

                            htmlHeading.Page = i;
                        }
                    }
                }
            }
        }
Exemplo n.º 12
0
        public ActionResult buildPDF(List <InformeResponse> lista, string nombreAsada)
        {
            MemoryStream ms = new MemoryStream();
            PdfWriter    pw = new PdfWriter(ms);

            PdfDocument pdfDocument = new PdfDocument(pw);
            Document    doc         = new Document(pdfDocument, PageSize.LETTER, false);

            doc.Add(new Paragraph("Reporte " + nombreAsada).SetFontSize(20).SetTextAlignment(TextAlignment.CENTER).SetFontColor(new DeviceRgb(4, 124, 188)));
            foreach (InformeResponse item in lista)
            {
                Preguntas preguntasObj = TipoFormulario(item.tipo);

                doc.Add(new Paragraph(item.acueducto).SetFontSize(15).SetBold());
                doc.Add(new Paragraph("Fecha: " + item.fecha).SetFontSize(12));
                doc.Add(new Paragraph("Encargado: " + item.encargado).SetFontSize(12).SetPaddingBottom(2));
                doc.Add(new Paragraph("Respuestas ").SetFontSize(12).SetUnderline());

                var infra = JsonConvert.DeserializeObject <Dictionary <string, string> >(item.infraestructura);
                foreach (var kv in infra)
                {
                    if (kv.Key == "P1")
                    {
                        doc.Add(new Paragraph(preguntasObj.p1 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P2")
                    {
                        doc.Add(new Paragraph(preguntasObj.p2 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P3")
                    {
                        doc.Add(new Paragraph(preguntasObj.p3 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P4")
                    {
                        doc.Add(new Paragraph(preguntasObj.p4 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P5")
                    {
                        doc.Add(new Paragraph(preguntasObj.p5 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P6")
                    {
                        doc.Add(new Paragraph(preguntasObj.p6 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P7")
                    {
                        doc.Add(new Paragraph(preguntasObj.p7 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P8")
                    {
                        doc.Add(new Paragraph(preguntasObj.p8 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P9")
                    {
                        doc.Add(new Paragraph(preguntasObj.p9 + ": " + kv.Value).SetFontSize(10));
                    }
                }
                doc.Add(new Paragraph("Comentarios: " + item.comentarios).SetFontSize(12));
                doc.Add(new Paragraph("Tipo de formulario: " + preguntasObj.tipo).SetFontSize(12));
                Cell cell = new Cell();
                cell.Add(new Paragraph("Riesgo " + item.riesgo).SetBorder(new SolidBorder(colorRiesgo(item.riesgo), 1)).SetBackgroundColor(colorRiesgo(item.riesgo)).SetTextAlignment(iText.Layout.Properties.TextAlignment.CENTER).SetFontSize(14).SetBold());
                doc.Add(cell);

                WebClient webClient = new WebClient();
                byte[]    data      = webClient.DownloadData(item.imagen);

                ImageData imageData = ImageDataFactory.Create(data);
                Image     image     = new Image(imageData);
                var       s         = 0.4;
                float     fwi       = (float)s;
                float     fhei      = (float)s;
                doc.Add(image.Scale(fwi, fhei).SetHorizontalAlignment(HorizontalAlignment.CENTER).SetMarginBottom(15).SetMarginTop(15));
            }
            //imagen del logo de sersa
            var       s2         = 0.08;
            float     fwi2       = (float)s2;
            float     fhei2      = (float)s2;
            WebClient webClient2 = new WebClient();

            byte[]    data2      = webClient2.DownloadData(logoletra);
            ImageData imageData2 = ImageDataFactory.Create(data2);
            Image     image2     = new Image(imageData2);
            Paragraph header     = new Paragraph("");

            header.Add(image2.Scale(fwi2, fhei2).SetMarginBottom(15));


            //imagen del logo de TEC
            var       s3         = 0.4;
            float     fwi3       = (float)s3;
            float     fhei3      = (float)s3;
            WebClient webClient3 = new WebClient();

            byte[]    data3      = webClient3.DownloadData(logotec);
            ImageData imageData3 = ImageDataFactory.Create(data3);
            Image     image3     = new Image(imageData3);
            Paragraph header2    = new Paragraph("");

            header2.Add(image3.Scale(fwi3, fhei3)).SetMarginBottom(10);



            for (int i = 1; i <= pdfDocument.GetNumberOfPages(); i++)
            {
                Rectangle pageSize = pdfDocument.GetPage(i).GetPageSize();
                float     x1       = 20;
                float     y1       = pageSize.GetTop() - 55;
                float     x2       = pageSize.GetRight() - 30;
                float     y2       = pageSize.GetTop() - 40;
                doc.ShowTextAligned(header, x1, y1, i, TextAlignment.LEFT, VerticalAlignment.BOTTOM, 0);
                doc.ShowTextAligned(header2, x2, y2, i, TextAlignment.RIGHT, VerticalAlignment.BOTTOM, 0);
            }



            doc.Close();

            byte[] bytesStream = ms.ToArray();
            ms = new MemoryStream();
            ms.Write(bytesStream, 0, bytesStream.Length);
            ms.Position = 0;

            return(new FileStreamResult(ms, "application/pdf"));
        }