Beispiel #1
0
        public virtual void Test()
        {
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "test.pdf"));

            String[] expectedText = new String[] { "PostScript Compatibility", "Because the PostScript language does not support the transparent imaging \n"
                                                   + "model, PDF 1.4 consumer applications must have some means for converting the \n" + "appearance of a document that uses transparency to a purely opaque description \n"
                                                   + "for printing on PostScript output devices. Similar techniques can also be used to \n" + "convert such documents to a form that can be correctly viewed by PDF 1.3 and \n"
                                                   + "earlier consumers. ", "Otherwise, flatten the colors to some assumed device color space with pre-\n"
                                                   + "determined calibration. In the generated PostScript output, paint the flattened \n" + "colors in a CIE-based color space having that calibration. " };
            Rectangle[] regions = new Rectangle[] { new Rectangle(90, 581, 130, 24), new Rectangle(80, 486, 370, 92),
                                                    new Rectangle(103, 143, 357, 53) };
            TextRegionEventFilter[] regionFilters = new TextRegionEventFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
            {
                regionFilters[i] = new TextRegionEventFilter(regions[i]);
            }
            FilteredEventListener listener = new FilteredEventListener();

            LocationTextExtractionStrategy[] extractionStrategies = new LocationTextExtractionStrategy[regions.Length]
            ;
            for (int i = 0; i < regions.Length; i++)
            {
                extractionStrategies[i] = listener.AttachEventListener(new LocationTextExtractionStrategy(), regionFilters
                                                                       [i]);
            }
            new PdfCanvasProcessor(listener).ProcessPageContent(pdfDocument.GetPage(1));
            for (int i = 0; i < regions.Length; i++)
            {
                String actualText = extractionStrategies[i].GetResultantText();
                NUnit.Framework.Assert.AreEqual(expectedText[i], actualText);
            }
        }
Beispiel #2
0
        public static void ExtractPhysicalAddress()
        {
            var di = new DirectoryInfo(@"c:\temp\ime");

            foreach (var file in di.GetFiles("*.pdf"))
            {
                PdfDocument pdfDoc = new PdfDocument(new PdfReader(file.FullName));

                Rectangle             rect         = new Rectangle(300, 470, 70, 150);
                TextRegionEventFilter regionFilter = new TextRegionEventFilter(rect);

                FilteredEventListener listener = new FilteredEventListener();

                LocationTextExtractionStrategy extractionStrategy = listener
                                                                    .AttachEventListener(new LocationTextExtractionStrategy(), regionFilter);


                new PdfCanvasProcessor(listener).ProcessPageContent(pdfDoc.GetPage(2));


                String actualText = extractionStrategy.GetResultantText();

                pdfDoc.Close();

                Console.WriteLine(file.Name);
                Console.WriteLine(actualText);

                using (StreamWriter writer = new StreamWriter(file.FullName.Replace(".pdf", ".txt")))
                {
                    writer.Write(actualText);
                }
            }
        }
        private string GetTextFromRectangle(Rectangle rectangle)
        {
            string rectText = String.Empty;

            TextRegionEventFilter          textRegionEventFilter = new TextRegionEventFilter(rectangle);
            LocationTextExtractionStrategy extractionStrategy    = new LocationTextExtractionStrategy();

            foreach (IEventData textRender in textRenderList)
            {
                if (textRegionEventFilter.IsInsideRectangle(textRender, EventType.RENDER_TEXT))
                {
                    extractionStrategy.EventOccurred(textRender, EventType.RENDER_TEXT);
                }
                else if (textRegionEventFilter.Accept(textRender, EventType.RENDER_TEXT))
                {
                    TextRenderInfo         textRenderInfo = (TextRenderInfo)textRender;
                    IList <TextRenderInfo> renderInfoList = textRenderInfo.GetCharacterRenderInfos();
                    for (int index = 0; index < renderInfoList.Count(); index++)
                    {
                        if (textRegionEventFilter.IsInsideRectangle(renderInfoList[index], EventType.RENDER_TEXT))
                        {
                            extractionStrategy.EventOccurred(renderInfoList[index], EventType.RENDER_TEXT);
                        }
                    }
                }
            }
            rectText = extractionStrategy.GetResultantText();
            return(rectText);
        }
        public override string GetDateRange(PdfDocument pdfDocument)
        {
            var possibleDateRanges = new List <string>();

            foreach (var statementPeriodLocation in StatementPeriodLocations)
            {
                try
                {
                    var    regionFilter  = new TextRegionEventFilter(statementPeriodLocation);
                    var    strategy      = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
                    string textDateRange = PdfTextExtractor.GetTextFromPage(pdfDocument.GetFirstPage(), strategy);

                    var textDateRangeArray = textDateRange?.Split('-');
                    var textStartDate      = textDateRangeArray[0].Trim();
                    var textEndDate        = textDateRangeArray[1].Trim();

                    possibleDateRanges.Add(ConvertToDateRangeFormat(textStartDate, textEndDate));
                }
                catch (Exception)
                {
                    // Log here rather than throw
                }
            }

            return(possibleDateRanges.FirstOrDefault());
        }
Beispiel #5
0
        public virtual void Test01()
        {
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "test01.pdf"));

            String[] expectedText = new String[] { "\u0928\u093F\u0930\u094D\u0935\u093E\u091A\u0915", "\u0928\u0917\u0930\u0928\u093F\u0917\u092E / "
                                                   + "\u0928\u0917\u0930\u092A\u0930\u093F\u0937\u0926" + " / \u0928\u0917\u0930\u092A\u093E\u0932\u093F\u0915\u093E \u0915\u093E \u0928\u093E\u092E"
                                                   , "\u0935 " + "\u0938\u0902\u0916\u094D\u092F\u093E", "\u0938\u0902\u0915\u094D\u0937\u093F\u092A\u094D\u0924 \u092A\u0941\u0928\u0930\u0940\u0915\u094D\u0937\u0923"
                                                   , "\u092E\u0924\u0926\u093E\u0928 " + "\u0915\u0947\u0928\u094D\u0926\u094D\u0930" + "\u0915\u093E", "\u0906\u0930\u0902\u092D\u093F\u0915 "
                                                   + "\u0915\u094D\u0930\u092E\u0938\u0902\u0916\u094D\u092F\u093E" };
            Rectangle[] regions = new Rectangle[] { new Rectangle(30, 779, 45, 20), new Rectangle(30, 745, 210, 20), new
                                                    Rectangle(30, 713, 42, 20), new Rectangle(30, 679, 80, 20), new Rectangle(30, 647, 73, 20), new Rectangle
                                                        (30, 612, 93, 20) };
            TextRegionEventFilter[] regionFilters = new TextRegionEventFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
            {
                regionFilters[i] = new TextRegionEventFilter(regions[i]);
            }
            FilteredEventListener listener = new FilteredEventListener();

            LocationTextExtractionStrategy[] extractionStrategies = new LocationTextExtractionStrategy[regions.Length]
            ;
            for (int i = 0; i < regions.Length; i++)
            {
                extractionStrategies[i] = listener.AttachEventListener(new LocationTextExtractionStrategy().SetUseActualText
                                                                           (true), regionFilters[i]);
            }
            new PdfCanvasProcessor(listener).ProcessPageContent(pdfDocument.GetPage(1));
            for (int i = 0; i < regions.Length; i++)
            {
                String actualText = extractionStrategies[i].GetResultantText();
                NUnit.Framework.Assert.AreEqual(expectedText[i], actualText);
            }
        }
Beispiel #6
0
        public virtual void Test02()
        {
            // Again not completely correct. see test04()
            //TODO DEVSIX-2648
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "in02.pdf"));

            String[]    expectedText = new String[] { "1879 ", "\u05D4\u05D0\u05D5\u05E4\u05E0\u05D5\u05E2", ")\u05D2\u05D5\u05D8\u05DC\u05D9\u05D1 \u05D3\u05D9\u05D9\u05DE\u05DC\u05E8 \u05D5\u05D5\u05D9\u05DC\u05D4\u05DC\u05DD \u05DE\u05D9\u05D9\u05D1\u05D0\u05DA,1885 (," };
            Rectangle[] regions      = new Rectangle[] { new Rectangle(493, 765, 23, 11), new Rectangle(522, 784, 38, 12),
                                                         new Rectangle(332, 784, 185, 12) };
            TextRegionEventFilter[] regionFilters = new TextRegionEventFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
            {
                regionFilters[i] = new TextRegionEventFilter(regions[i]);
            }
            FilteredEventListener listener = new FilteredEventListener();

            LocationTextExtractionStrategy[] extractionStrategies = new LocationTextExtractionStrategy[regions.Length]
            ;
            for (int i = 0; i < regions.Length; i++)
            {
                extractionStrategies[i] = listener.AttachEventListener(new LocationTextExtractionStrategy().SetRightToLeftRunDirection
                                                                           (true), regionFilters[i]);
            }
            new PdfCanvasProcessor(listener).ProcessPageContent(pdfDocument.GetPage(1));
            for (int i = 0; i < regions.Length; i++)
            {
                String actualText = extractionStrategies[i].GetResultantText();
                NUnit.Framework.Assert.AreEqual(expectedText[i], actualText);
            }
        }
Beispiel #7
0
        /// <summary>
        /// Parsing data from Oy axis
        /// </summary>
        /// <param name="page">Data of page</param>
        /// <returns>data of Oy axis</returns>
        internal StringBuilder ParsingOyAxis(PdfPage page)
        {
            // temp variable
            Rectangle                      readBox;
            TextRegionEventFilter          readText;
            FilteredEventListener          listener;
            LocationTextExtractionStrategy extractor;
            PdfCanvasProcessor             parser;

            string[]      lines;
            StringBuilder result = new StringBuilder();

            // area limit for read
            readBox = new Rectangle(Margin.Left, Margin.Bottom + 60, 20,
                                    page.GetPageSize().GetHeight() - Margin.Bottom - 160);

            readText = new TextRegionEventFilter(readBox);
            listener = new FilteredEventListener();

            // create a text extraction renderer
            extractor = listener
                        .AttachEventListener(new LocationTextExtractionStrategy(),
                                             readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            // read every line (row)
            lines = extractor
                    .GetResultantText()
                    .Split('\n');

            foreach (string line in lines)
            {
                if (!string.IsNullOrEmpty(line.Trim()))
                {
                    result.AppendLine(line);
                }
            }

            TextExtractionStrategy strategy =
                listener.AttachEventListener(new TextExtractionStrategy(), readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            PositionOyAxis = strategy.TextResult.ToArray();

            return(result);
        }
        public virtual void Test()
        {
            PdfDocument  pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "user10.pdf"));
            Rectangle    rectangle   = new Rectangle(71, 708, 154, 9);
            IEventFilter filter      = new TextRegionEventFilter(rectangle);
            String       txt         = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(1), new FilteredTextEventListener(new LocationTextExtractionStrategy
                                                                                                                                  (), filter));

            NUnit.Framework.Assert.AreEqual("Pname Dname Email Address", txt);
        }
Beispiel #9
0
        //Приватная функция, недоступная для вызова извне класса. Принимает в себя виртуальный PdfDocument и прямоугольник для извлечения текста
        private string GetTextFromArea(PdfDocument PdfDoc, Rectangle Rectan)
        {
            var Page   = PageID == 0 ? PdfDoc.GetFirstPage() : PdfDoc.GetPage(PageID);                                   //Берём номер страницы из параметра класса. Если этот параметр не задан, берём первую страницу
            var Filter = new IEventFilter[1];                                                                            //Задаём фильтр событий iText

            Filter[0] = new TextRegionEventFilter(Rectan);                                                               //Задаём текстовый фильтр событий для нашего прямоугольника
            var FilteredTextEventListener = new FilteredTextEventListener(new LocationTextExtractionStrategy(), Filter); //Задаём стратегию извлечения текста
            var Result = PdfTextExtractor.GetTextFromPage(Page, FilteredTextEventListener);                              //Извлекаем текст из прямоугольника

            return(Result.Trim());                                                                                       //Возвращаем тримленный извлечённый текст
        }
Beispiel #10
0
        public virtual void ExtractPlanPeriodText(PdfDocument pdfDoc)
        {
            Rectangle planpriodare = new Rectangle(563, 638, 9, 111);

            TextRegionEventFilter regionFilter = new TextRegionEventFilter(planpriodare);
            ITextExtractionStrategy strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);

            // Note: If you want to re-use the PdfCanvasProcessor, you must call PdfCanvasProcessor.reset()
            new PdfCanvasProcessor(strategy).ProcessPageContent(pdfDoc.GetFirstPage());
            period = strategy.GetResultantText();
        }
Beispiel #11
0
        private void pdfAnalize(string src, int j)
        {
            iText.Kernel.Pdf.PdfDocument pdfDoc = new PdfDocument(new PdfReader(src));
            int n = pdfDoc.GetNumberOfPages();

            logger.Info($"********** Document:{src} --> Total Pages:{n} **********");
            _textBoxListener.WriteLine($"INFO: ********** Document:{src} --> Total Pages:{n} **********");
            logTextBox.SelectionStart = logTextBox.TextLength;
            logTextBox.ScrollToCaret();

            //Rectangle rect = new Rectangle(0, 0,595, 842);
            iText.Kernel.Geom.Rectangle rect         = new iText.Kernel.Geom.Rectangle(0, 0, 595, 50);
            TextRegionEventFilter       regionFilter = new TextRegionEventFilter(rect);

            for (int i = 1; i <= n; i++)
            {
                totalPages++;
                PdfPage page = pdfDoc.GetPage(i);
                ITextExtractionStrategy strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
                string text = PdfTextExtractor.GetTextFromPage(page, strategy);
                //var result = Regex.Split(text, "\r\n|\r|\n");
                if (text.Length > 0)
                {
                    analizeText(i, text, j);
                }
                else
                {
                    logger.Error($"Page: {i} --NO TEXT HAVE BEEN FOUND");
                    _textBoxListener.WriteLine($"ERROR: Page: {i} --NO TEXT HAVE BEEN FOUND");
                    logTextBox.SelectionStart = logTextBox.TextLength;
                    logTextBox.ScrollToCaret();

                    dataGridView1.Rows[j].Cells[2].Value           = "KO";
                    dataGridView1.Rows[j].Cells[2].Style.BackColor = Color.Red;
                    if (dataGridView1.Rows[j].Cells[3].Value.ToString() == "" || dataGridView1.Rows[j].Cells[3].Value.ToString().Equals(""))
                    {
                        dataGridView1.Rows[j].Cells[3].Value = $" Check=> Page:{i}";
                    }
                    else
                    {
                        dataGridView1.Rows[j].Cells[3].Value = dataGridView1.Rows[j].Cells[3].Value + " | " + $"Page:{i}";
                    }
                    dataGridView1.Refresh();
                }
            }

            logger.Info($"********** Document:{src} --> Extract Pages finished **********");
            _textBoxListener.WriteLine($"INFO: ********** Document:{src} --> Extract Pages finished **********");
            logTextBox.SelectionStart = logTextBox.TextLength;
            logTextBox.ScrollToCaret();
            pdfDoc.Close();
        }
Beispiel #12
0
        public SetRectangle(int x1, int y1, int x2, int y2, string sourceFileName)
        {
            Rectangle             rect         = new Rectangle(x1, y1, x2, y2);
            TextRegionEventFilter regionFilter = new TextRegionEventFilter(rect);

            pdfReader = new PdfReader(sourceFileName);
            pdfDoc    = new PdfDocument(pdfReader);
            strategy  = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
            sn        = sourceFileName;
            this.x1   = x1;
            this.y1   = y1;
            this.x2   = x2;
            this.y2   = y2;
        }
        public override string GetAccountNumber(PdfDocument pdfDocument)
        {
            try
            {
                var    regionFilter  = new TextRegionEventFilter(AccountNumberLocation);
                var    strategy      = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
                string accountNumber = PdfTextExtractor.GetTextFromPage(pdfDocument.GetFirstPage(), strategy).Trim();

                return(StripOutBsb(accountNumber));
            }
            catch (Exception e)
            {
                throw;
            }
        }
        public override string GetAccountNumber(PdfDocument pdfDocument)
        {
            try
            {
                var    regionFilter  = new TextRegionEventFilter(AccountNumberLocation);
                var    strategy      = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
                string accountNumber = PdfTextExtractor.GetTextFromPage(pdfDocument.GetFirstPage(), strategy).Trim();

                return(StripOutBsb(accountNumber));
            }
            catch (Exception e)
            {
                MessageBox.Show($"There was a problem trying to extract the Account Number: {e.Message}");
                return("");
            }
        }
Beispiel #15
0
        public void SplitPDF()
        {
            Rectangle               rect         = new Rectangle(x1, y1, x2, y2);
            TextRegionEventFilter   regionFilter = new TextRegionEventFilter(rect);
            ITextExtractionStrategy strategy;
            StringBuilder           sb = new StringBuilder();

            PdfReader   pdfReader = new PdfReader(sourceFileName);
            PdfDocument pdfDoc    = new PdfDocument(pdfReader);

            for (int page = 1; page <= pdfDoc.GetNumberOfPages(); page++)
            {
                strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
                string[] preFilename = PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(page), strategy).Split(' ');
            }
        }
        private string GetEndDate(PdfDocument pdfDocument)
        {
            var    regionFilter   = new TextRegionEventFilter(EndDateRegion);
            var    strategy       = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
            string textFromRegion = PdfTextExtractor.GetTextFromPage(pdfDocument.GetLastPage(), strategy).Trim();

            var textFromRegionArray = textFromRegion.Split('\n').Reverse();

            foreach (string rowItem in textFromRegionArray)
            {
                if (DateTime.TryParse(rowItem.Trim(), out DateTime dateResult))
                {
                    return(rowItem.Trim());
                }
            }

            return("");
        }
Beispiel #17
0
        /// <summary>
        /// Parsing columns name
        /// </summary>
        /// <param name="page">Data of page</param>
        /// <returns>names of columns from page</returns>
        internal StringBuilder ParsingColumns(PdfPage page)
        {
            // temp variable
            Rectangle                      readBox;
            TextRegionEventFilter          readText;
            FilteredEventListener          listener;
            LocationTextExtractionStrategy extractor;
            PdfCanvasProcessor             parser;

            string[]      lines;
            StringBuilder result = new StringBuilder();

            // area limit for read
            readBox = new Rectangle(Margin.Left,
                                    page.GetPageSize().GetHeight() - Margin.Top - 70,
                                    (page.GetPageSize().GetWidth() - Margin.Right) / 4, 10);
            readText = new TextRegionEventFilter(readBox);
            listener = new FilteredEventListener();

            // create a text extraction renderer
            extractor = listener
                        .AttachEventListener(new LocationTextExtractionStrategy(),
                                             readText);

            lock (block)
            {
                (parser = new PdfCanvasProcessor(listener))
                .ProcessPageContent(page);
                parser.Reset();
            }

            // read every line (row)
            lines = extractor.GetResultantText()
                    .Split('\n');

            foreach (string line in lines)
            {
                result.AppendLine(line);
            }

            return(result);
        }
Beispiel #18
0
        public static void LocScan(string path)
        {
            PdfReader   reader = new PdfReader(path);
            PdfDocument doc    = new PdfDocument(reader);

            for (int i = 1; i <= doc.GetNumberOfPages(); i++)
            {
                PdfPage   d    = doc.GetPage(i);
                string    full = PdfTextExtractor.GetTextFromPage(d, new LocationTextExtractionStrategy());
                Rectangle t    = d.GetPageSize();
                Rectangle z    = d.GetPageSize();
                //z.SetY(-450);
                z.SetHeight(15);
                z.SetWidth(200);
                z.SetY(375);
                TextRegionEventFilter     filter = new TextRegionEventFilter(z);
                FilteredTextEventListener list   = new FilteredTextEventListener(new LocationTextExtractionStrategy(), filter);
                string half = PdfTextExtractor.GetTextFromPage(d, list);
            }
        }
        private string GetStartDate(PdfDocument pdfDocument)
        {
            var    regionFilter   = new TextRegionEventFilter(StartDateRegion);
            var    strategy       = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
            string textFromRegion = PdfTextExtractor.GetTextFromPage(pdfDocument.GetFirstPage(), strategy).Trim();

            /*We should always be getting 2 items here. The 'Date' Row, then the start date for transactions
             * we need to do this to account for addresses in the pdf that slightly nudge the starting positions of these
             * rows
             */

            var textArray = textFromRegion.Split('\n');

            if (textArray.Length == 2)
            {
                return(textArray[1].Trim());
            }

            throw new IndexOutOfRangeException("Could not extract start date correctly");
        }
Beispiel #20
0
        public virtual void MultipleFiltersForOneRegionTest()
        {
            PdfDocument pdfDocument = new PdfDocument(new PdfReader(sourceFolder + "test.pdf"));

            Rectangle[] regions = new Rectangle[] { new Rectangle(0, 0, 500, 650), new Rectangle(0, 0, 400, 400), new
                                                    Rectangle(200, 200, 300, 400), new Rectangle(100, 100, 350, 300) };
            TextRegionEventFilter[] regionFilters = new TextRegionEventFilter[regions.Length];
            for (int i = 0; i < regions.Length; i++)
            {
                regionFilters[i] = new TextRegionEventFilter(regions[i]);
            }
            FilteredEventListener          listener           = new FilteredEventListener();
            LocationTextExtractionStrategy extractionStrategy = listener.AttachEventListener(new LocationTextExtractionStrategy
                                                                                                 (), regionFilters);

            new PdfCanvasProcessor(listener).ProcessPageContent(pdfDocument.GetPage(1));
            String actualText   = extractionStrategy.GetResultantText();
            String expectedText = PdfTextExtractor.GetTextFromPage(pdfDocument.GetPage(1), new FilteredTextEventListener
                                                                       (new LocationTextExtractionStrategy(), regionFilters));

            NUnit.Framework.Assert.AreEqual(expectedText, actualText);
        }
Beispiel #21
0
        public virtual String ExtractPageDutys(PdfPage pdfPage)
        {
            Rectangle leftdutycolumn = new Rectangle(0, 554, 490, 290);
            Rectangle middledutycolumn = new Rectangle(0, 290, 490, 255);
            Rectangle rightdutycolumn = new Rectangle(0, 0, 490, 290);

            TextRegionEventFilter regionFilter = new TextRegionEventFilter(leftdutycolumn);
            ITextExtractionStrategy strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
            new PdfCanvasProcessor(strategy).ProcessPageContent(pdfPage);
            String PageDutyText = strategy.GetResultantText();

            regionFilter = new TextRegionEventFilter(middledutycolumn);
            strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
            new PdfCanvasProcessor(strategy).ProcessPageContent(pdfPage);
            PageDutyText += strategy.GetResultantText();
            
            regionFilter = new TextRegionEventFilter(rightdutycolumn);
            strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), regionFilter);
            new PdfCanvasProcessor(strategy).ProcessPageContent(pdfPage);
            PageDutyText += strategy.GetResultantText();

            return PageDutyText;
        }
Beispiel #22
0
        public void FindTextInPdf(string SearchStr, string[] sources)
        {
            if (sources.Count() > 2)
            {
                foreach (var item in sources.Skip(2))
                {
                    if (File.Exists(item))
                    {
                        using (PdfReader reader = new PdfReader(item))
                            using (var doc = new PdfDocument(reader))
                            {
                                var pageCount = doc.GetNumberOfPages();

                                for (int i = 1; i <= pageCount; i++)
                                {
                                    PdfPage page = doc.GetPage(i);
                                    var     box  = page.GetCropBox();
                                    var     rect = new Rectangle(box.GetX(), box.GetY(), box.GetWidth(), box.GetHeight());

                                    var filter = new IEventFilter[1];
                                    filter[0] = new TextRegionEventFilter(rect);


                                    ITextExtractionStrategy strategy = new FilteredTextEventListener(new LocationTextExtractionStrategy(), filter);

                                    var str = PdfTextExtractor.GetTextFromPage(page, strategy);
                                    if (str.Contains(SearchStr) == true)
                                    {
                                        Console.WriteLine("Searched text found in file:[ " + item + " ] page : [ " + i + " ]");
                                    }
                                }
                            }
                    }
                }
            }
        }