public static void xpdfPage(PdfReader reader, int pageNum, TextWriter outp) { outp.WriteLine("==============Page " + pageNum + "===================="); PdfDictionary pageDictionary = reader.GetPageN(pageNum); if (_outputDictionary) { outp.WriteLine("- - - - - Dictionary - - - - - -"); //outp.WriteLine(PdfContentReaderTool.GetDictionaryDetail(pageDictionary)); //string s = PdfContentReaderTool.GetDictionaryDetail(pageDictionary); string s = GetDictionaryDetail(pageDictionary); outp.WriteLine(s); } if (_outputXObject) { outp.WriteLine("- - - - - XObject summary - - - - - -"); outp.WriteLine(PdfContentReaderTool.GetXObjectDetail(pageDictionary.GetAsDict(PdfName.RESOURCES))); } if (_outputContentStream) { outp.WriteLine("- - - - - Content stream - - - - - -"); RandomAccessFileOrArray f = reader.SafeFile; byte[] contentBytes = reader.GetPageContent(pageNum, f); f.Close(); outp.Flush(); foreach (byte b in contentBytes) { outp.Write((char)b); } outp.Flush(); } Test_iTextSharp.LocationTextExtractionStrategy strategy = new Test_iTextSharp.LocationTextExtractionStrategy(); //GetTextFromPage(reader, pageNum, strategy); Test_iTextSharp.PdfTools.ProcessContentPage(reader, pageNum, strategy); if (_outputText) { outp.WriteLine("- - - - - Text extraction - - - - - -"); //LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy(); //String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy()); string extractedText = strategy.GetResultantText(); if (extractedText.Length != 0) { outp.WriteLine(extractedText); outp.WriteLine(); } else { outp.WriteLine("No text found on page " + pageNum); } } if (_outputTextBlocks1) { outp.WriteLine("- - - - - Text blocks extraction 1 - - - - - -"); //GetTextFromPage(reader, pageNum, strategy); //PrintTextBlocks(outp, strategy.textBlocks); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { PrintTextBlock(outp, textBlock, 0); } outp.WriteLine(); } if (_outputTextBlocks2) { outp.WriteLine("- - - - - Text blocks extraction 2 - - - - - -"); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { outp.Write("block "); //outp.WriteLine(GetTextBlock(textBlock)); outp.WriteLine(textBlock.GetText()); if (textBlock.childs.Count > 0) { outp.WriteLine(" **** warning childs blocks not printed ****"); } } outp.WriteLine(); } if (_outputTextBlocks3) { outp.WriteLine("- - - - - Text blocks extraction 3 - - - - - -"); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { bool first = true; //foreach (string s in GetTextBlockByLines(textBlock, _outputMaxCol)) foreach (string s in textBlock.GetTextByLines(_outputMaxCol)) { if (first) { outp.Write("block "); first = false; } else { outp.Write(" "); } outp.WriteLine(s); } if (textBlock.childs.Count > 0) { outp.WriteLine(" **** warning childs blocks not printed ****"); } } outp.WriteLine(); } outp.WriteLine(); }
public static void xpdfPage(PdfReader reader, int pageNum, TextWriter outp) { outp.WriteLine("==============Page " + pageNum + "===================="); PdfDictionary pageDictionary = reader.GetPageN(pageNum); if (_outputDictionary) { outp.WriteLine("- - - - - Dictionary - - - - - -"); //outp.WriteLine(PdfContentReaderTool.GetDictionaryDetail(pageDictionary)); //string s = PdfContentReaderTool.GetDictionaryDetail(pageDictionary); string s = GetDictionaryDetail(pageDictionary); outp.WriteLine(s); } if (_outputXObject) { outp.WriteLine("- - - - - XObject summary - - - - - -"); outp.WriteLine(PdfContentReaderTool.GetXObjectDetail(pageDictionary.GetAsDict(PdfName.RESOURCES))); } if (_outputContentStream) { outp.WriteLine("- - - - - Content stream - - - - - -"); RandomAccessFileOrArray f = reader.SafeFile; byte[] contentBytes = reader.GetPageContent(pageNum, f); f.Close(); outp.Flush(); foreach (byte b in contentBytes) { outp.Write((char)b); } outp.Flush(); } Test_iTextSharp.LocationTextExtractionStrategy strategy = new Test_iTextSharp.LocationTextExtractionStrategy(); //GetTextFromPage(reader, pageNum, strategy); Test_iTextSharp.PdfTools.ProcessContentPage(reader, pageNum, strategy); if (_outputText) { outp.WriteLine("- - - - - Text extraction - - - - - -"); //LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy(); //String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy()); string extractedText = strategy.GetResultantText(); if (extractedText.Length != 0) { outp.WriteLine(extractedText); outp.WriteLine(); } else outp.WriteLine("No text found on page " + pageNum); } if (_outputTextBlocks1) { outp.WriteLine("- - - - - Text blocks extraction 1 - - - - - -"); //GetTextFromPage(reader, pageNum, strategy); //PrintTextBlocks(outp, strategy.textBlocks); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) PrintTextBlock(outp, textBlock, 0); outp.WriteLine(); } if (_outputTextBlocks2) { outp.WriteLine("- - - - - Text blocks extraction 2 - - - - - -"); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { outp.Write("block "); //outp.WriteLine(GetTextBlock(textBlock)); outp.WriteLine(textBlock.GetText()); if (textBlock.childs.Count > 0) outp.WriteLine(" **** warning childs blocks not printed ****"); } outp.WriteLine(); } if (_outputTextBlocks3) { outp.WriteLine("- - - - - Text blocks extraction 3 - - - - - -"); foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks) { bool first = true; //foreach (string s in GetTextBlockByLines(textBlock, _outputMaxCol)) foreach (string s in textBlock.GetTextByLines(_outputMaxCol)) { if (first) { outp.Write("block "); first = false; } else outp.Write(" "); outp.WriteLine(s); } if (textBlock.childs.Count > 0) outp.WriteLine(" **** warning childs blocks not printed ****"); } outp.WriteLine(); } outp.WriteLine(); }