Пример #1
0
        /// <summary>
        /// This method is used to search for the location words in pdf and update it with the words given from replacingText variable
        /// </summary>
        /// <param name="pSearch">Searchable String</param>
        /// <param name="replacingText">Replacing String</param>
        /// <param name="SC">Case Ignorance</param>
        /// <param name="SourceFile">Path of the source file</param>
        /// <param name="DestinationFile">Path of the destination file</param>
        public static void PDFTextGetter(string pSearch, string replacingText, StringComparison SC, string SourceFile, string DestinationFile)
        {
            try
            {
                iTextSharp.text.pdf.PdfContentByte cb     = null;
                iTextSharp.text.pdf.PdfContentByte cb2    = null;
                iTextSharp.text.pdf.PdfWriter      writer = null;
                iTextSharp.text.pdf.BaseFont       bf     = null;

                if (System.IO.File.Exists(SourceFile))
                {
                    PdfReader pReader = new PdfReader(SourceFile);


                    for (int page = 1; page <= pReader.NumberOfPages; page++)
                    {
                        myLocationTextExtractionStrategy strategy = new myLocationTextExtractionStrategy();
                        cb  = stamper.GetOverContent(page);
                        cb2 = stamper.GetOverContent(page);

                        //Send some data contained in PdfContentByte, looks like the first is always cero for me and the second 100,
                        //but i'm not sure if this could change in some cases
                        strategy.UndercontentCharacterSpacing  = (int)cb.CharacterSpacing;
                        strategy.UndercontentHorizontalScaling = (int)cb.HorizontalScaling;

                        //It's not really needed to get the text back, but we have to call this line ALWAYS,
                        //because it triggers the process that will get all chunks from PDF into our strategy Object
                        string currentText = PdfTextExtractor.GetTextFromPage(pReader, page, strategy);

                        //The real getter process starts in the following line
                        List <iTextSharp.text.Rectangle> MatchesFound = strategy.GetTextLocations(pSearch, SC);

                        //Set the fill color of the shapes, I don't use a border because it would make the rect bigger
                        //but maybe using a thin border could be a solution if you see the currect rect is not big enough to cover all the text it should cover
                        cb.SetColorFill(BaseColor.WHITE);

                        //MatchesFound contains all text with locations, so do whatever you want with it, this highlights them using PINK color:

                        foreach (iTextSharp.text.Rectangle rect in MatchesFound)
                        {
                            //width
                            cb.Rectangle(rect.Left, rect.Bottom, 60, rect.Height);
                            cb.Fill();
                            cb2.SetColorFill(BaseColor.BLACK);
                            bf = BaseFont.CreateFont(BaseFont.HELVETICA_BOLD, BaseFont.CP1252, BaseFont.NOT_EMBEDDED);

                            cb2.SetFontAndSize(bf, 9);

                            cb2.BeginText();
                            cb2.ShowTextAligned(0, replacingText, rect.Left, rect.Bottom, 0);
                            cb2.EndText();
                            cb2.Fill();
                        }
                    }
                }
            }
            catch (Exception ex)
            {
            }
        }
Пример #2
0
        public void PDFTextGetter(string pSearch, StringComparison SC, string SourceFile, string DestinationFile)
        {
            iTextSharp.text.pdf.PdfStamper     stamper = null;
            iTextSharp.text.pdf.PdfContentByte cb      = null;

            this.Cursor = Cursors.WaitCursor;
            if (File.Exists(SourceFile))
            {
                PdfReader pReader = new PdfReader(SourceFile);

                stamper    = new iTextSharp.text.pdf.PdfStamper(pReader, new System.IO.FileStream(DestinationFile, FileMode.Create));
                PB.Value   = 0;
                PB.Maximum = pReader.NumberOfPages;
                for (int page = 1; page <= pReader.NumberOfPages; page++)
                {
                    myLocationTextExtractionStrategy strategy = new myLocationTextExtractionStrategy();
                    cb = stamper.GetUnderContent(page);

                    //Send some data contained in PdfContentByte, looks like the first is always cero for me and the second 100, but i'm not sure if this could change in some cases
                    strategy.UndercontentCharacterSpacing  = cb.CharacterSpacing;
                    strategy.UndercontentHorizontalScaling = cb.HorizontalScaling;

                    //It's not really needed to get the text back, but we have to call this line ALWAYS,
                    //because it triggers the process that will get all chunks from PDF into our strategy Object
                    string currentText = PdfTextExtractor.GetTextFromPage(pReader, page, strategy);

                    //The real getter process starts in the following line
                    List <iTextSharp.text.Rectangle> MatchesFound = strategy.GetTextLocations(pSearch, SC);

                    //Set the fill color of the shapes, I don't use a border because it would make the rect bigger
                    //but maybe using a thin border could be a solution if you see the currect rect is not big enough to cover all the text it should cover
                    cb.SetColorFill(BaseColor.PINK);

                    //MatchesFound contains all text with locations, so do whatever you want with it, this highlights them using PINK color:

                    foreach (iTextSharp.text.Rectangle rect in MatchesFound)
                    {
                        cb.Rectangle(rect.Left, rect.Bottom, rect.Width, rect.Height);
                    }
                    cb.Fill();

                    PB.Value = PB.Value + 1;
                }
                stamper.Close();
                pReader.Close();
            }
            this.Cursor = Cursors.Default;
        }
Пример #3
0
        public string ReadPdfFile(string fileName)
        {
            StringBuilder text = new StringBuilder();

            if (File.Exists(fileName))
            {
                PdfReader pdfReader = new PdfReader(fileName);

                for (int page = 0; page <= pdfReader.NumberOfPages; page++)
                {
                    myLocationTextExtractionStrategy strategy = new myLocationTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                    pdfReader.Close();
                }
            }
            return(text.ToString());
        }