Ejemplo n.º 1
0
        private async Task <string> pdfTextExtract(string sFilePath)
        {
            string texto;

            try
            {
                PdfReader reader = new PdfReader(sFilePath);
                iText.Kernel.Pdf.PdfDocument pdf = new iText.Kernel.Pdf.PdfDocument(reader);
                texto = string.Empty;
                for (int page = 1; page <= pdf.GetNumberOfPages(); page++)
                {
                    ITextExtractionStrategy its = new SimpleTextExtractionStrategy();
                    String s = PdfTextExtractor.GetTextFromPage(pdf.GetPage(page), its);
                    //s = System.Text.Encoding.UTF8.GetString(ASCIIEncoding.Convert(System.Text.Encoding.Default, System.Text.Encoding.UTF8, System.Text.Encoding.Default.GetBytes(s)));
                    texto = texto + s;
                }
                reader.Close();
            }
            catch (Exception Ex)
            {
                await new MessageDialog("Error al abrir archivo: " + Ex.Message).ShowAsync();
                return(null);
            }
            return(texto);
        }
Ejemplo n.º 2
0
        //////////////////////////////////////////////////////////
        //// public properties
        #region public properties

        /// <summary>
        /// Convert the Portable Document to text.
        /// </summary>
        /// <param name="file">The portable document file stream.</param>
        /// <param name="errors">Collection of errors that occur during reading the file stream.  Null if sucessful.</param>
        /// <returns>The text contents of the file.</returns>
        public static string Convert(Stream file, out List <Exception> errors)
        {
            StringBuilder result = new StringBuilder();

            errors = null;

            using (iText7.PdfReader reader = new iText7.PdfReader(file)) {
                using (iText7.PdfDocument doc = new iText7.PdfDocument(reader))
                {
                    int numberOfPages = doc.GetNumberOfPages();
                    for (int i = 1; i <= numberOfPages; i++)
                    {
                        try
                        {
                            var    page     = doc.GetPage(i);
                            string pagetext = iText7.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page);
                            result.Append(Common.CleanPdfText(pagetext));
                        }
                        catch (Exception e)
                        {
                            if (errors == null)
                            {
                                errors = new List <Exception>();
                            }
                            errors.Add(e);
                        }
                    }
                }
            }

            return(result.ToString());
        }
Ejemplo n.º 3
0
        protected override void Index()
        {
            var startTime = DateTime.Now;

            Console.Write($"Indexing {Name}");

            #region PDF
            // Get the PDF document from a `FileStream` via `PdfReader`.
            var pdfDocument =
                new Pdf.PdfDocument(new Pdf.PdfReader(new FileStream(Location, FileMode.Open, FileAccess.Read)));
            var totalPageNumber = pdfDocument.GetNumberOfPages();

            for (var i = 1; i <= totalPageNumber; i++)
            {
                // parser.ProcessPageContent(pdfDocument.GetPage(i+1));
                // var text = strategy.GetResultantText();
                var text = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(i));

                this.AddToIndex(texts: text);
                // parser.Reset();
            }
            #endregion

            Console.Write($" >==> {Thumbnail.Count} unique words. {(DateTime.Now - startTime).TotalMilliseconds}ms\n");
        }
Ejemplo n.º 4
0
        public static void AddPages()
        {
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(PATH), new PdfWriter(new FileStream(PATH_PAGED, FileMode.Create, FileAccess.Write)));
            Document    doc         = new Document(pdfDocument);

            int numberOfPages = pdfDocument.GetNumberOfPages();
            var size          = pdfDocument.GetPage(1).GetPageSize();


            for (int i = 1; i <= numberOfPages; i++)
            {
                // Write aligned text to the specified by parameters point
                doc.ShowTextAligned(new Paragraph("Strona " + i + " z " + numberOfPages),
                                    size.GetWidth() - 50, 20, i, TextAlignment.RIGHT, VerticalAlignment.BOTTOM, 0);
            }

            doc.Close();
        }
        private void SetLogo(PdfFormField toSet, iText.Kernel.Pdf.PdfDocument pdfDoc, string filename, int pagina)
        {
            var b          = toSet as PdfButtonFormField;
            var afmetingen = b.GetWidgets().SelectMany(f => f.GetRectangle()).ToArray();
            var x          = (int)Convert.ToDouble(afmetingen[0].ToString().Replace(".", ","));

            if (x < 10)
            {
                x = 100;
            }
            var y         = (int)Convert.ToDouble(afmetingen[1].ToString().Replace(".", ","));
            var wWidth    = (int)Convert.ToDouble(afmetingen[2].ToString().Replace(".", ","));
            var pageWidth = (int)pdfDoc.GetPage(1).GetPageSizeWithRotation().GetWidth();

            if (wWidth > pageWidth - 20)
            {
                wWidth = pageWidth - 20;
            }
            var wHeight = (int)Convert.ToDouble(afmetingen[3].ToString().Replace(".", ","));

            if (pagina == 1)
            {
                wHeight -= 10;
            }

            ImageData img          = ImageDataFactory.Create(filename);
            var       pdfImage     = new iText.Layout.Element.Image(img);
            var       scaled       = pdfImage.ScaleToFit(wWidth, wHeight - y);
            var       scaledWidth  = scaled.GetImageScaledWidth();
            var       scaledHeight = scaled.GetImageScaledHeight();
            Document  d            = new Document(pdfDoc);

            var berekendeX = (x + wWidth - scaledWidth) / 2;
            var berekendeY = (y + wHeight - scaledHeight) / 2;

            scaled.SetFixedPosition(pagina, berekendeX, berekendeY);
            d.Add(scaled);
            b.SetValue("");
        }
Ejemplo n.º 6
0
 public string GetTextFromPDF(string url)
 {
     try
     {
         StringBuilder ster = new StringBuilder();
         //DocumentModel document = DocumentModel.Load(url);
         PdfReader read = new PdfReader(url);
         iText.Kernel.Pdf.PdfDocument doc = new iText.Kernel.Pdf.PdfDocument(read);
         //foreach (var page in doc.GetPage(int page))
         for (int i = 0; i <= doc.GetNumberOfPages(); i++)
         {
             var word = ster.Append(PdfTextExtractor.GetTextFromPage(doc.GetPage(i)));
             text = word.ToString();
         }
         return(text);
     }
     catch (Exception)
     {
         text = "Error reading file";
         return(text);
     }
 }
Ejemplo n.º 7
0
        /// <summary>
        /// An enumeration of paragraphs of the portable document.
        /// </summary>
        /// <param name="file">The portable document file stream.</param>
        /// <returns>A <see cref="IEnumerable{T}"/> of paragraphs.</returns>
        public static IEnumerable <string> Paragraphs(Stream file)
        {
            using (iText7.PdfReader reader = new iText7.PdfReader(file))
            {
                using (iText7.PdfDocument doc = new iText7.PdfDocument(reader))
                {
                    int numberOfPages = doc.GetNumberOfPages();
                    for (int i = 1; i <= numberOfPages; i++)
                    {
                        iText7.PdfPage page     = doc.GetPage(i);
                        string         pagetext = iText7.Canvas.Parser.PdfTextExtractor.GetTextFromPage(page);
                        pagetext = Common.CleanPdfText(pagetext);

                        // Parse paragraphs.
                        IEnumerable <string> paragraphs = pagetext.Split(new[] { Environment.NewLine }, StringSplitOptions.RemoveEmptyEntries);
                        foreach (string item in paragraphs)
                        {
                            yield return(item);
                        }
                    }
                }
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Updates the links.
        /// </summary>
        /// <param name="pdfFilePath">The PDF file path.</param>
        /// <param name="htmlToPdfFiles">The HTML to PDF files.</param>
        /// <param name="logger">The logger.</param>
        internal static void UpdateLinks(
            string pdfFilePath,
            IReadOnlyCollection <HtmlToPdfFile> htmlToPdfFiles,
            ILogger logger)
        {
            string tempFilePath = Path.GetTempFileName();

            using (PdfReader pdfReader = new PdfReader(pdfFilePath))
            {
                using (PdfWriter pdfWriter = new PdfWriter(tempFilePath))
                {
                    using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(pdfReader, pdfWriter))
                    {
                        int pageCount = pdfDocument.GetNumberOfPages();
                        for (int i = 1; i <= pageCount; i++)
                        {
                            // get page
                            PdfPage pdfPage = pdfDocument.GetPage(i);

                            // get link annotations
                            IEnumerable <PdfLinkAnnotation> linkAnnotations = pdfPage.GetAnnotations().OfType <PdfLinkAnnotation>();
                            foreach (PdfLinkAnnotation linkAnnotation in linkAnnotations)
                            {
                                // get action
                                PdfDictionary action = linkAnnotation.GetAction();
                                if (action == null)
                                {
                                    continue;
                                }

                                PdfName s = action.GetAsName(PdfName.S);
                                if (s != PdfName.URI)
                                {
                                    continue;
                                }

                                PdfString uriPdfString = action.GetAsString(PdfName.URI);
                                if (!Uri.TryCreate(uriPdfString.GetValue(), UriKind.RelativeOrAbsolute, out Uri uri))
                                {
                                    continue;
                                }

                                if (!uri.IsFile)
                                {
                                    continue;
                                }

                                string htmlFilePath = uri.LocalPath.ToLower();

                                if (!htmlToPdfFiles.Any(x => string.Compare(x.Input, htmlFilePath, StringComparison.OrdinalIgnoreCase) == 0))
                                {
                                    // ex. when printing PDF from TOC.html by itself
                                    logger.LogDebug($"Could not find '{htmlFilePath}'. Referenced in '{pdfFilePath}' on page {i}.");
                                    continue;
                                }

                                HtmlToPdfFile linkedHtmlToPdfFile = htmlToPdfFiles.Single(x => x.Input == htmlFilePath);
                                int           linkedPageNumber    = linkedHtmlToPdfFile.OutputPdfFilePageNumber;

                                PdfPage linkedPage;
                                try
                                {
                                    // http://api.itextpdf.com/itext/com/itextpdf/text/pdf/PdfDestination.html
                                    linkedPage = pdfDocument.GetPage(linkedPageNumber);
                                }
                                catch (Exception ex)
                                {
                                    throw new PdfPageNotFoundException(linkedPageNumber, linkedHtmlToPdfFile.Input, ex);
                                }

                                float top = linkedPage.GetPageSize().GetTop();
                                PdfExplicitDestination destination = PdfExplicitDestination.CreateFitH(linkedPage, top);
                                PdfAction newAction = PdfAction.CreateGoTo(destination);

                                linkAnnotation.SetAction(newAction);
                            }
                        }
                    }
                }
            }

            File.Delete(pdfFilePath);
            File.Move(tempFilePath, pdfFilePath);
        }
Ejemplo n.º 9
0
        /// <summary>
        /// Finds and sets the page numbers of links mapped to HTML headings in the specified PDF file.
        /// </summary>
        /// <param name="htmlToPdfFile">The HTML to PDF file.</param>
        internal static void SetHeadingPageNumbers(HtmlToPdfFile htmlToPdfFile)
        {
            using (PdfReader pdfReader = new PdfReader(htmlToPdfFile.PdfFilePath))
            {
                using (iText.Kernel.Pdf.PdfDocument pdfDocument = new iText.Kernel.Pdf.PdfDocument(pdfReader))
                {
                    int pageCount = pdfDocument.GetNumberOfPages();
                    for (int i = 1; i <= pageCount; i++)
                    {
                        // get page
                        PdfPage pdfPage = pdfDocument.GetPage(i);

                        // get link annotations
                        IEnumerable <PdfLinkAnnotation> linkAnnotations = pdfPage.GetAnnotations().OfType <PdfLinkAnnotation>();
                        foreach (PdfLinkAnnotation linkAnnotation in linkAnnotations)
                        {
                            // get action
                            PdfDictionary action = linkAnnotation.GetAction();
                            if (action == null)
                            {
                                continue;
                            }

                            PdfName s = action.GetAsName(PdfName.S);
                            if (s != PdfName.URI)
                            {
                                continue;
                            }

                            PdfString uriPdfString = action.GetAsString(PdfName.URI);
                            if (!Uri.TryCreate(uriPdfString.GetValue(), UriKind.RelativeOrAbsolute, out Uri uri))
                            {
                                continue;
                            }

                            if (!uri.IsFile)
                            {
                                continue;
                            }

                            // get query string
                            NameValueCollection queryString = HttpUtility.ParseQueryString(uri.Query);

                            // ex. ?headingLevel={level}&headingText
                            string headingLevel = queryString["headingLevel"];
                            if (headingLevel == null)
                            {
                                continue;
                            }

                            if (!int.TryParse(headingLevel, out int level))
                            {
                                continue;
                            }

                            string headingText = queryString["headingText"];
                            if (headingText == null)
                            {
                                continue;
                            }

                            HtmlHeading htmlHeading = htmlToPdfFile.TitleAndHeadings.SingleOrDefault(x => (x.Level == level) && (x.Text == headingText));
                            if (htmlHeading == null)
                            {
                                continue;
                            }

                            htmlHeading.Page = i;
                        }
                    }
                }
            }
        }
Ejemplo n.º 10
0
        public ActionResult buildPDF(List <InformeResponse> lista, string nombreAsada)
        {
            MemoryStream ms = new MemoryStream();
            PdfWriter    pw = new PdfWriter(ms);

            PdfDocument pdfDocument = new PdfDocument(pw);
            Document    doc         = new Document(pdfDocument, PageSize.LETTER, false);

            doc.Add(new Paragraph("Reporte " + nombreAsada).SetFontSize(20).SetTextAlignment(TextAlignment.CENTER).SetFontColor(new DeviceRgb(4, 124, 188)));
            foreach (InformeResponse item in lista)
            {
                Preguntas preguntasObj = TipoFormulario(item.tipo);

                doc.Add(new Paragraph(item.acueducto).SetFontSize(15).SetBold());
                doc.Add(new Paragraph("Fecha: " + item.fecha).SetFontSize(12));
                doc.Add(new Paragraph("Encargado: " + item.encargado).SetFontSize(12).SetPaddingBottom(2));
                doc.Add(new Paragraph("Respuestas ").SetFontSize(12).SetUnderline());

                var infra = JsonConvert.DeserializeObject <Dictionary <string, string> >(item.infraestructura);
                foreach (var kv in infra)
                {
                    if (kv.Key == "P1")
                    {
                        doc.Add(new Paragraph(preguntasObj.p1 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P2")
                    {
                        doc.Add(new Paragraph(preguntasObj.p2 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P3")
                    {
                        doc.Add(new Paragraph(preguntasObj.p3 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P4")
                    {
                        doc.Add(new Paragraph(preguntasObj.p4 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P5")
                    {
                        doc.Add(new Paragraph(preguntasObj.p5 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P6")
                    {
                        doc.Add(new Paragraph(preguntasObj.p6 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P7")
                    {
                        doc.Add(new Paragraph(preguntasObj.p7 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P8")
                    {
                        doc.Add(new Paragraph(preguntasObj.p8 + ": " + kv.Value).SetFontSize(10));
                    }
                    else if (kv.Key == "P9")
                    {
                        doc.Add(new Paragraph(preguntasObj.p9 + ": " + kv.Value).SetFontSize(10));
                    }
                }
                doc.Add(new Paragraph("Comentarios: " + item.comentarios).SetFontSize(12));
                doc.Add(new Paragraph("Tipo de formulario: " + preguntasObj.tipo).SetFontSize(12));
                Cell cell = new Cell();
                cell.Add(new Paragraph("Riesgo " + item.riesgo).SetBorder(new SolidBorder(colorRiesgo(item.riesgo), 1)).SetBackgroundColor(colorRiesgo(item.riesgo)).SetTextAlignment(iText.Layout.Properties.TextAlignment.CENTER).SetFontSize(14).SetBold());
                doc.Add(cell);

                WebClient webClient = new WebClient();
                byte[]    data      = webClient.DownloadData(item.imagen);

                ImageData imageData = ImageDataFactory.Create(data);
                Image     image     = new Image(imageData);
                var       s         = 0.4;
                float     fwi       = (float)s;
                float     fhei      = (float)s;
                doc.Add(image.Scale(fwi, fhei).SetHorizontalAlignment(HorizontalAlignment.CENTER).SetMarginBottom(15).SetMarginTop(15));
            }
            //imagen del logo de sersa
            var       s2         = 0.08;
            float     fwi2       = (float)s2;
            float     fhei2      = (float)s2;
            WebClient webClient2 = new WebClient();

            byte[]    data2      = webClient2.DownloadData(logoletra);
            ImageData imageData2 = ImageDataFactory.Create(data2);
            Image     image2     = new Image(imageData2);
            Paragraph header     = new Paragraph("");

            header.Add(image2.Scale(fwi2, fhei2).SetMarginBottom(15));


            //imagen del logo de TEC
            var       s3         = 0.4;
            float     fwi3       = (float)s3;
            float     fhei3      = (float)s3;
            WebClient webClient3 = new WebClient();

            byte[]    data3      = webClient3.DownloadData(logotec);
            ImageData imageData3 = ImageDataFactory.Create(data3);
            Image     image3     = new Image(imageData3);
            Paragraph header2    = new Paragraph("");

            header2.Add(image3.Scale(fwi3, fhei3)).SetMarginBottom(10);



            for (int i = 1; i <= pdfDocument.GetNumberOfPages(); i++)
            {
                Rectangle pageSize = pdfDocument.GetPage(i).GetPageSize();
                float     x1       = 20;
                float     y1       = pageSize.GetTop() - 55;
                float     x2       = pageSize.GetRight() - 30;
                float     y2       = pageSize.GetTop() - 40;
                doc.ShowTextAligned(header, x1, y1, i, TextAlignment.LEFT, VerticalAlignment.BOTTOM, 0);
                doc.ShowTextAligned(header2, x2, y2, i, TextAlignment.RIGHT, VerticalAlignment.BOTTOM, 0);
            }



            doc.Close();

            byte[] bytesStream = ms.ToArray();
            ms = new MemoryStream();
            ms.Write(bytesStream, 0, bytesStream.Length);
            ms.Position = 0;

            return(new FileStreamResult(ms, "application/pdf"));
        }