Esempio n. 1
0
        void  LoadDocument()
        {
            MemoryStream  memory   = new MemoryStream(convertDocToByteArray(@"D:\Code Project\Subrip\Document\HSK Book.pdf"));
            BinaryReader  BRreader = new BinaryReader(memory);
            StringBuilder text     = new StringBuilder();


            iText.Kernel.Pdf.PdfReader   iTextReader = new iText.Kernel.Pdf.PdfReader(memory);
            iText.Kernel.Pdf.PdfDocument pdfDoc      = new iText.Kernel.Pdf.PdfDocument(iTextReader);



            int numberofpages = pdfDoc.GetNumberOfPages();
            List <Tuple <int, string> > Contents = new List <Tuple <int, string> >();


            for (int page = 1; page <= numberofpages; page++)
            {
                iText.Kernel.Pdf.Canvas.Parser.Listener.ITextExtractionStrategy strategy = new iText.Kernel.Pdf.Canvas.Parser.Listener.LocationTextExtractionStrategy();

                string currentText = iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(pdfDoc.GetPage(page), strategy);
                currentText = Encoding.UTF8.GetString(UTF8Encoding.Convert(
                                                          Encoding.UTF8, Encoding.UTF8, Encoding.UTF8.GetBytes(currentText)));
                text.Append(currentText);


                Tuple <int, string> tuple = new Tuple <int, string>(page, currentText);
                Contents.Add(tuple);
            }


            document = Contents;
        }