Exemplo n.º 1
0
Arquivo: xpdf.cs Projeto: 24/source_04
        public static void xpdfPage(PdfReader reader, int pageNum, TextWriter outp)
        {
            outp.WriteLine("==============Page " + pageNum + "====================");
            PdfDictionary pageDictionary = reader.GetPageN(pageNum);

            if (_outputDictionary)
            {
                outp.WriteLine("- - - - - Dictionary - - - - - -");
                //outp.WriteLine(PdfContentReaderTool.GetDictionaryDetail(pageDictionary));
                //string s = PdfContentReaderTool.GetDictionaryDetail(pageDictionary);
                string s = GetDictionaryDetail(pageDictionary);
                outp.WriteLine(s);
            }

            if (_outputXObject)
            {
                outp.WriteLine("- - - - - XObject summary - - - - - -");
                outp.WriteLine(PdfContentReaderTool.GetXObjectDetail(pageDictionary.GetAsDict(PdfName.RESOURCES)));
            }

            if (_outputContentStream)
            {
                outp.WriteLine("- - - - - Content stream - - - - - -");
                RandomAccessFileOrArray f = reader.SafeFile;
                byte[] contentBytes       = reader.GetPageContent(pageNum, f);
                f.Close();

                outp.Flush();

                foreach (byte b in contentBytes)
                {
                    outp.Write((char)b);
                }

                outp.Flush();
            }

            Test_iTextSharp.LocationTextExtractionStrategy strategy = new Test_iTextSharp.LocationTextExtractionStrategy();
            //GetTextFromPage(reader, pageNum, strategy);
            Test_iTextSharp.PdfTools.ProcessContentPage(reader, pageNum, strategy);

            if (_outputText)
            {
                outp.WriteLine("- - - - - Text extraction - - - - - -");
                //LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();
                //String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy());
                string extractedText = strategy.GetResultantText();
                if (extractedText.Length != 0)
                {
                    outp.WriteLine(extractedText);
                    outp.WriteLine();
                }
                else
                {
                    outp.WriteLine("No text found on page " + pageNum);
                }
            }

            if (_outputTextBlocks1)
            {
                outp.WriteLine("- - - - - Text blocks extraction 1 - - - - - -");
                //GetTextFromPage(reader, pageNum, strategy);
                //PrintTextBlocks(outp, strategy.textBlocks);
                foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks)
                {
                    PrintTextBlock(outp, textBlock, 0);
                }
                outp.WriteLine();
            }

            if (_outputTextBlocks2)
            {
                outp.WriteLine("- - - - - Text blocks extraction 2 - - - - - -");
                foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks)
                {
                    outp.Write("block  ");
                    //outp.WriteLine(GetTextBlock(textBlock));
                    outp.WriteLine(textBlock.GetText());
                    if (textBlock.childs.Count > 0)
                    {
                        outp.WriteLine("   **** warning childs blocks not printed ****");
                    }
                }
                outp.WriteLine();
            }

            if (_outputTextBlocks3)
            {
                outp.WriteLine("- - - - - Text blocks extraction 3 - - - - - -");
                foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks)
                {
                    bool first = true;
                    //foreach (string s in GetTextBlockByLines(textBlock, _outputMaxCol))
                    foreach (string s in textBlock.GetTextByLines(_outputMaxCol))
                    {
                        if (first)
                        {
                            outp.Write("block  ");
                            first = false;
                        }
                        else
                        {
                            outp.Write("       ");
                        }
                        outp.WriteLine(s);
                    }
                    if (textBlock.childs.Count > 0)
                    {
                        outp.WriteLine("   **** warning childs blocks not printed ****");
                    }
                }
                outp.WriteLine();
            }

            outp.WriteLine();
        }
Exemplo n.º 2
0
        public static void xpdfPage(PdfReader reader, int pageNum, TextWriter outp)
        {
            outp.WriteLine("==============Page " + pageNum + "====================");
            PdfDictionary pageDictionary = reader.GetPageN(pageNum);
            if (_outputDictionary)
            {
                outp.WriteLine("- - - - - Dictionary - - - - - -");
                //outp.WriteLine(PdfContentReaderTool.GetDictionaryDetail(pageDictionary));
                //string s = PdfContentReaderTool.GetDictionaryDetail(pageDictionary);
                string s = GetDictionaryDetail(pageDictionary);
                outp.WriteLine(s);
            }

            if (_outputXObject)
            {
                outp.WriteLine("- - - - - XObject summary - - - - - -");
                outp.WriteLine(PdfContentReaderTool.GetXObjectDetail(pageDictionary.GetAsDict(PdfName.RESOURCES)));
            }

            if (_outputContentStream)
            {
                outp.WriteLine("- - - - - Content stream - - - - - -");
                RandomAccessFileOrArray f = reader.SafeFile;
                byte[] contentBytes = reader.GetPageContent(pageNum, f);
                f.Close();

                outp.Flush();

                foreach (byte b in contentBytes)
                {
                    outp.Write((char)b);
                }

                outp.Flush();
            }

            Test_iTextSharp.LocationTextExtractionStrategy strategy = new Test_iTextSharp.LocationTextExtractionStrategy();
            //GetTextFromPage(reader, pageNum, strategy);
            Test_iTextSharp.PdfTools.ProcessContentPage(reader, pageNum, strategy);

            if (_outputText)
            {
                outp.WriteLine("- - - - - Text extraction - - - - - -");
                //LocationTextExtractionStrategy strategy = new LocationTextExtractionStrategy();
                //String extractedText = PdfTextExtractor.GetTextFromPage(reader, pageNum, new LocationTextExtractionStrategy());
                string extractedText = strategy.GetResultantText();
                if (extractedText.Length != 0)
                {
                    outp.WriteLine(extractedText);
                    outp.WriteLine();
                }
                else
                    outp.WriteLine("No text found on page " + pageNum);
            }

            if (_outputTextBlocks1)
            {
                outp.WriteLine("- - - - - Text blocks extraction 1 - - - - - -");
                //GetTextFromPage(reader, pageNum, strategy);
                //PrintTextBlocks(outp, strategy.textBlocks);
                foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks)
                    PrintTextBlock(outp, textBlock, 0);
                outp.WriteLine();
            }

            if (_outputTextBlocks2)
            {
                outp.WriteLine("- - - - - Text blocks extraction 2 - - - - - -");
                foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks)
                {
                    outp.Write("block  ");
                    //outp.WriteLine(GetTextBlock(textBlock));
                    outp.WriteLine(textBlock.GetText());
                    if (textBlock.childs.Count > 0)
                        outp.WriteLine("   **** warning childs blocks not printed ****");
                }
                outp.WriteLine();
            }

            if (_outputTextBlocks3)
            {
                outp.WriteLine("- - - - - Text blocks extraction 3 - - - - - -");
                foreach (Test_iTextSharp.TextBlock textBlock in strategy.textBlocks)
                {
                    bool first = true;
                    //foreach (string s in GetTextBlockByLines(textBlock, _outputMaxCol))
                    foreach (string s in textBlock.GetTextByLines(_outputMaxCol))
                    {
                        if (first)
                        {
                            outp.Write("block  ");
                            first = false;
                        }
                        else
                            outp.Write("       ");
                        outp.WriteLine(s);
                    }
                    if (textBlock.childs.Count > 0)
                        outp.WriteLine("   **** warning childs blocks not printed ****");
                }
                outp.WriteLine();
            }

            outp.WriteLine();
        }