//static const Regex.Replace(s, @"\t|\n|\r", ""); static void Main(string[] args) { string fileName = @"D:\mobi.pdf"; //Open PDF document using (PdfReader reader = new PdfReader(fileName)) { var sb = new TextRenderEx(); //var parser = new PdfReaderContentParser(reader); for (int page = 1; page <= reader.NumberOfPages; page++) { var size = reader.GetCropBox(page); Console.WriteLine(size.Width); Console.WriteLine(size.Height); PdfDictionary pdfDictionary = reader.GetPageN(page); IRenderListener listener = new SBTextRenderer(sb); PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener); PdfDictionary pageDic = reader.GetPageN(page); PdfDictionary resourcesDic = pageDic.GetAsDict(PdfName.RESOURCES); processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, page), resourcesDic); //Create an instance of our strategy //var t2 = new MyLocationTextExtractionStrategy(searchText, System.Globalization.CompareOptions.IgnoreCase); //var ex = PdfTextExtractor.GetTextFromPage(reader, page, t2); ////Loop through each chunk found //foreach (var p in t2.myPoints) //{ // Console.WriteLine(string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom)); //} //var strategy = parser.ProcessContent(i, new LocationTextExtractionStrategyEx()); //var res = strategy.GetLocations(); var its = new LocationTextExtractionStrategyEx2(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); var result = new StringBuilder(); foreach (var t in its.Columbs.Values) { string rs = t.ToString(); Console.WriteLine(rs); } string str = result.ToString(); //Console.Write(str); //if (!string.IsNullOrWhiteSpace(str) && (str.IndexOf(SearchText) != -1)||searchText.IndexOf(str) != -1) { // Console.Write(str); //} //Console.WriteLine(pageResult.ToString()); // System.Diagnostics.Debug.WriteLine(s); //var its2 = new LocationTextExtractionStrategyEx(searchText, page); //String ss = PdfTextExtractor.GetTextFromPage(reader, page, its2); //for (int i1 = 0; i1 < its2.m_SearchResultsList.Count; i1++) //{ // SearchResult t = its2.m_SearchResultsList[i1]; // Console.WriteLine(string.Format("text:{2}; x:{0},y:{1}", t.iPosX, t.iPosY, t.Text)); // Console.WriteLine(string.Format("topleft: x:{0},y:{1}", t.TopLeft[Vector.I1], t.TopLeft[Vector.I2])); //} var bbb = sb.sb.ToString(); var asdf = ""; } } }
public SBTextRenderer(TextRenderEx builder) { obj = builder; }