Ejemplo n.º 1
0
        //static const Regex.Replace(s, @"\t|\n|\r", "");
        static void Main(string[] args)
        {
            string fileName = @"D:\mobi.pdf";

            //Open PDF document
            using (PdfReader reader = new PdfReader(fileName))
            {
                var sb = new TextRenderEx();
                //var parser = new PdfReaderContentParser(reader);
                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    var size = reader.GetCropBox(page);
                    Console.WriteLine(size.Width);
                    Console.WriteLine(size.Height);

                    PdfDictionary             pdfDictionary = reader.GetPageN(page);
                    IRenderListener           listener      = new SBTextRenderer(sb);
                    PdfContentStreamProcessor processor     = new PdfContentStreamProcessor(listener);
                    PdfDictionary             pageDic       = reader.GetPageN(page);
                    PdfDictionary             resourcesDic  = pageDic.GetAsDict(PdfName.RESOURCES);
                    processor.ProcessContent(ContentByteUtils.GetContentBytesForPage(reader, page), resourcesDic);

                    //Create an instance of our strategy
                    //var t2 = new MyLocationTextExtractionStrategy(searchText, System.Globalization.CompareOptions.IgnoreCase);
                    //var ex = PdfTextExtractor.GetTextFromPage(reader, page, t2);
                    ////Loop through each chunk found
                    //foreach (var p in t2.myPoints)
                    //{
                    //    Console.WriteLine(string.Format("Found text {0} at {1}x{2}", p.Text, p.Rect.Left, p.Rect.Bottom));
                    //}
                    //var strategy = parser.ProcessContent(i, new LocationTextExtractionStrategyEx());

                    //var res = strategy.GetLocations();
                    var    its    = new LocationTextExtractionStrategyEx2();
                    String s      = PdfTextExtractor.GetTextFromPage(reader, page, its);
                    var    result = new StringBuilder();

                    foreach (var t in its.Columbs.Values)
                    {
                        string rs = t.ToString();
                        Console.WriteLine(rs);
                    }

                    string str = result.ToString();
                    //Console.Write(str);
                    //if (!string.IsNullOrWhiteSpace(str) && (str.IndexOf(SearchText) != -1)||searchText.IndexOf(str) != -1) {
                    //    Console.Write(str);
                    //}

                    //Console.WriteLine(pageResult.ToString());

                    // System.Diagnostics.Debug.WriteLine(s);
                    //var its2 = new LocationTextExtractionStrategyEx(searchText, page);
                    //String ss = PdfTextExtractor.GetTextFromPage(reader, page, its2);
                    //for (int i1 = 0; i1 < its2.m_SearchResultsList.Count; i1++)
                    //{
                    //    SearchResult t = its2.m_SearchResultsList[i1];
                    //    Console.WriteLine(string.Format("text:{2}; x:{0},y:{1}", t.iPosX, t.iPosY, t.Text));
                    //    Console.WriteLine(string.Format("topleft: x:{0},y:{1}", t.TopLeft[Vector.I1], t.TopLeft[Vector.I2]));
                    //}
                    var bbb  = sb.sb.ToString();
                    var asdf = "";
                }
            }
        }
Ejemplo n.º 2
0
 public SBTextRenderer(TextRenderEx builder)
 {
     obj = builder;
 }