예제 #1
0
        private void IText7ReadPDF()
        {
            StringBuilder text     = new StringBuilder();
            string        fileName = @"C:\Users\Administrator\Desktop\巨力电梯x(3).pdf";

            if (File.Exists(fileName))
            {
                PdfReader pdfReader = new PdfReader(fileName);

                for (int page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));

                    //var res = ConvertToHebrew(currentText);
                    text.Append(currentText);
                }
                pdfReader.Close();
            }
            //var read = new iText.Kernel.Pdf.PdfReader(@"C:\Users\Administrator\Desktop\巨力电梯x(3).pdf");
            //var doc = new iText.Kernel.Pdf.PdfDocument(read);
            //StringBuilder sb = new StringBuilder();
            ////var info = doc.GetDocumentInfo();
            //for (int i = 1; i < doc.GetNumberOfPages(); i++)
            //{
            //    sb.Append(iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(doc.GetPage(i)));
            //}
            //div
            //doc.GetDocumentInfo().;
        }
예제 #2
0
        private static int LoadAllPDFs(string folder, TextBox TB)
        {
            int           PDFCounter = 0;
            StringBuilder text       = new StringBuilder();
            var           files      = Directory.GetFiles(folder + @"\");

            foreach (var file in files)
            {
                using (PdfReader pdfReader = new PdfReader(file))
                {
                    for (int page = 1; page <= pdfReader.NumberOfPages; page++)
                    {
                        ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                        string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                        currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                        text.Append(currentText);
                    }
                    pdfReader.Close();
                }
                PDFCounter++;
                text.Append("\n{NEWARTICLE}\n");
            }
            TB.Text = text.ToString();
            return(PDFCounter);
        }
예제 #3
0
        public static string Mainlog()
        {
            string strx = string.Empty;
            string main = "";

            try
            {
                //adding the pdf to the rich text box
                PdfReader reader = new PdfReader("main.pdf");
                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                    String s = PdfTextExtractor.GetTextFromPage(reader, page, its);
                    s    = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
                    strx = strx + s;
                }
                main = strx;
                reader.Close();
            }

            catch (Exception ex)
            {
            }
            return(main);
        }
예제 #4
0
        public static void SetProperties(string filename)
        {
            Filename = filename;
            FileInfo fi = new FileInfo(Filename);

            Size      = Math.Round(Convert.ToDouble(fi.Length) / (1048576), 2); //1048576=1024*1024
            Extension = fi.Extension;

            if (Extension == ".pdf")
            {
                DocumentType = "Portable Document Format(.pdf)";


                PdfReader     pdfr    = new PdfReader(Filename);
                StringBuilder pdfText = new StringBuilder();

                TotalPages = pdfr.NumberOfPages;

                //loop to read pdf page by page

                for (int page = 1; page <= pdfr.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfr, page, strategy);



                    currentText =
                        Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8,
                                                                      Encoding.Default.GetBytes(currentText)));

                    pdfText.Append(currentText);
                }

                pdfr.Close();

                string completetext = pdfText.ToString();
                // NoOfWords=completetext.Split(' ').Length;

                NoOfWords = Regex.Matches(completetext, @"[A-Za-z0-9]+").Count;
            }

            else if (Extension == ".odt")
            {
                DocumentType = "Open Document Format(.odt)";
                ComputeStatistics();
            }

            else if (Extension == ".docx")
            {
                DocumentType = "Microsoft Word Document(.docx)";
                ComputeStatistics();
            }

            else
            {
                DocumentType = "Word 97-2003 document(.doc)";
                ComputeStatistics();
            }
        }
        private List <InvoiceItem> GetFaturaItems(string fileName)
        {
            var result = new List <InvoiceItem>();

            try
            {
                PdfReader reader = new PdfReader(fileName);

                var its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();

                String rawTextV = PdfTextExtractor.GetTextFromPage(reader, 1, its);

                var pageVencimento = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(rawTextV)));

                var vencimento = GetVencimento(pageVencimento);


                for (int pageNumber = 1; pageNumber < reader.NumberOfPages + 1; pageNumber++)
                {
                    its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();

                    String rawText = PdfTextExtractor.GetTextFromPage(reader, pageNumber, its);

                    var page = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(rawText)));
                    result.AddRange(ParsePage(page, vencimento));
                }
                reader.Close();
            }
            catch (Exception)
            {
                //TODO: Bug Hidden
            }
            return(result);
        }
예제 #6
0
        public string Parse()
        {
            if (!File.Exists(this._fileName))
            {
                throw new FileNotFoundException();
            }

            StringBuilder text = new StringBuilder();

            if (File.Exists(_fileName))
            {
                PdfReader pdfReader = new PdfReader(_fileName);

                for (int page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                    text.Append("\r\n");
                }
                pdfReader.Close();
            }
            return(text.ToString());
        }
예제 #7
0
        private void button1_Click(object sender, EventArgs e)
        {
            OpenFileDialog dlg = new OpenFileDialog();
            string         filePath;

            dlg.Filter = "PDF Files(*.PDF)|*.PDF|All Files(*.*)|*.*";
            if (dlg.ShowDialog() == DialogResult.OK)
            {
                filePath = dlg.FileName.ToString();



                string strText = string.Empty;
                try
                {
                    PdfReader reader = new PdfReader(filePath);
                    for (int page = 1; page <= reader.NumberOfPages; page++)
                    {
                        ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                        String s = PdfTextExtractor.GetTextFromPage(reader, page, its);
                        s                 = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
                        strText           = strText + s;
                        richTextBox1.Text = strText;
                    }
                    reader.Close();
                }
                catch (Exception ex)
                {
                    MessageBox.Show(ex.Message);
                }
            }
        }
예제 #8
0
        /*
         * Convert a PDF file to a text by extracting just the text.
         */
        public static string Convert(string infile)
        {
            StringBuilder strPdfContent = new StringBuilder();

            PdfReader reader = new PdfReader(infile);

            /*
             * This conversion code is thanks to the developers of iTextSharp and asturcon at
             * http://www.codeproject.com/Questions/770857/Convert-PDF-tp-text-formatted-using-iTextSharp-csh
             * Before this was used, manual conversion was done with Adobe Acrobat or Microsoft Word.
             * They both convert very badly - missing spaces, linefeeds, reversed lines, etc. Their problems appear
             * to be related to how they handle default character encoding on Windows. For an explanation, see:
             * https://www.informit.com/guides/content.aspx?g=dotnet&seqNum=163
             */
            for (int i = 1; i <= reader.NumberOfPages; i++)
            {
                ITextExtractionStrategy objExtractStrategy = new SimpleTextExtractionStrategy();
                string strLineText = PdfTextExtractor.GetTextFromPage(reader, i, objExtractStrategy);
                strLineText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strLineText)));

                strPdfContent.Append(strLineText);
                strPdfContent.Append("\n");
            }
            reader.Close();
            string text = strPdfContent.ToString();

            return(text);
        }
예제 #9
0
        public string ReadPdfFile(string fileName)
        {
            StringBuilder text = new StringBuilder();

            if (File.Exists(fileName))
            {
                PdfReader pdfReader = new PdfReader(fileName);
                var       datar     = pdfReader.AcroFields.Fields.Select(x => x.Key + ": " + pdfReader.AcroFields.GetField(x.Key)).ToList();
                for (int page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string        currentText        = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
                    List <string> dataarr            = new List <string>();


                    dataarr = datar;


                    XmlSerializer serializer = new XmlSerializer(typeof(List <string>));
                    using (TextWriter writer = new StreamWriter(_path + "test.xml"))
                    {
                        serializer.Serialize(writer, dataarr);
                    }

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                }
                pdfReader.Close();
            }
            return(text.ToString());
        }
예제 #10
0
        public void GetPDFText(string fileName)
        {
            StringBuilder text = new StringBuilder();

            if (File.Exists(fileName))
            {
                PdfReader pdfReader = new PdfReader(fileName);

                for (int i = 1; i <= pdfReader.NumberOfPages; i++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy);

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                    texts.Add(currentText);
                }
                PDFText = text.ToString();
                pdfReader.Close();


                if (PDFText.Contains(JPM_TAG))
                {
                    data = new JPMData();
                }
            }
        }
예제 #11
0
        public string Open(RichTextBox docBox, Label LabelShowFileName)
        {
            OpenFileDialog openFileDialog = new OpenFileDialog();

            openFileDialog.Filter = "TXT File (*.txt)|*.txt|RTF File (*.rtf)|*.rtf|PDF File (*.pdf)|*.pdf";

            if (openFileDialog.ShowDialog() == true)
            {
                TextRange doc = new TextRange(docBox.Document.ContentStart, docBox.Document.ContentEnd);

                LabelShowFileName.Content = System.IO.Path.GetFileName(openFileDialog.FileName);

                var textType = System.IO.Path.GetExtension(openFileDialog.FileName).ToLower();

                switch (textType)
                {
                case ".rtf":
                {
                    var text = File.ReadAllText(openFileDialog.FileName, Encoding.GetEncoding(1251));

                    FlowDocument flowDocument = new FlowDocument();
                    flowDocument.Blocks.Add(new System.Windows.Documents.Paragraph(new Run(text)));
                    docBox.Document = flowDocument;
                }
                break;

                case ".txt":
                {
                    var text = File.ReadAllText(openFileDialog.FileName, Encoding.GetEncoding(1251));

                    FlowDocument flowDocument = new FlowDocument();
                    flowDocument.Blocks.Add(new System.Windows.Documents.Paragraph(new Run(text)));
                    docBox.Document = flowDocument;
                }
                break;

                case ".pdf":
                {
                    string strText = string.Empty;

                    PdfReader reader = new PdfReader(openFileDialog.FileName.ToString());

                    for (int page = 1; page <= reader.NumberOfPages; page++)
                    {
                        ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                        string s = PdfTextExtractor.GetTextFromPage(reader, page, its);
                        s        = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));  // Encoding s
                        strText += s;
                    }
                    reader.Close();

                    FlowDocument flowDocument = new FlowDocument();
                    flowDocument.Blocks.Add(new System.Windows.Documents.Paragraph(new Run(strText)));
                    docBox.Document = flowDocument;
                }
                break;
                }
            }
            return(openFileDialog.FileName);
        }
예제 #12
0
        //Este metodo lee el PDF
        public string ReadPdfFile(object Filename)
        {
            string strText = string.Empty;

            //try para obtener el error en caso de que ocurra
            try
            {
                PdfReader readerPdf = new PdfReader((string)Filename);

                for (int page = 1; page <= readerPdf.NumberOfPages; page++)
                {
                    ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
                    PdfReader reader            = new PdfReader((string)Filename);
                    String    s = PdfTextExtractor.GetTextFromPage(reader, page, its);

                    s       = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
                    strText = strText + s;
                    reader.Close();
                }
            }
            //Manejo de errores
            catch (Exception ex)
            {
                //Muestro el error en caso de que ocurra
                lblMsjs.ForeColor = Color.Crimson;
                lblMsjs.Text      = ex.Message.ToString();
            }
            //retorno el texto
            return(strText);
        }
예제 #13
0
        /// <summary>
        /// Reads the text of the given page
        /// put the text in the given StringBuilder
        /// </summary>
        /// <param name="file"></param>
        /// <returns>String builder with the Data</returns>
        private StringBuilder ReadText(string file)
        {
            StringBuilder textBuilder = new StringBuilder();

            if (File.Exists(file))
            {
                PdfReader pdfReader = new PdfReader(file);

                for (int page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                    currentText = Encoding.UTF8.GetString(
                        ASCIIEncoding.Convert(
                            Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)
                            ));

                    if (!String.IsNullOrEmpty(currentText))
                    {
                        textBuilder.Append(currentText);
                    }
                }
                pdfReader.Close();
            }
            return(textBuilder);
        }
예제 #14
0
        /// <summary>
        /// Writing the extracted date into textfile.
        /// </summary>
        /// <param name="reader"> Open reader to the to read pdf file </param>
        /// <param name="page"> which page are we going to extract the information from the pdf file </param>
        /// <param name="its"> Which extraction strategy do we use when extracting our data </param>
        /// <param name="outPath"> Where is the textfile located in my computer </param>
        ///
        private static void WriteInfile(PdfReader reader, int page, ITextExtractionStrategy its, string outPath)
        {
            string strText = string.Empty;

            strText = PdfTextExtractor.GetTextFromPage(reader, page, its);
            strText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strText)));
            string[] lines = strText.Split('\n');
            foreach (string line in lines)
            {
                using (System.IO.StreamWriter file = new System.IO.StreamWriter(outPath, true))
                {
                    string test  = line + "\0";
                    int    index = test.Length;
                    if (index > 0 && index < 55 && !char.IsPunctuation(test[index - 2]) && !char.IsDigit(test[0]))
                    {
                        Console.WriteLine("TITLE = " + line + "  " + index);
                        file.Write("Title - - - - - ");
                        file.WriteLine(line + "\n");
                    }
                    else
                    {
                        file.WriteLine(line);
                    }
                }
            }
            using (System.IO.StreamWriter file = new System.IO.StreamWriter(outPath, true))
                file.WriteLine("- - - - - - - - - - - - - - - - - - - - - - - - - - - ");
        }
예제 #15
0
        /// <summary>
        /// Get Text from PDF page
        /// </summary>
        /// <param name="filepath">PDF Filepath</param>
        /// <param name="page">Document Page number (optional)</param>
        /// <returns>Page Content as Text</returns>
        public static string GetPDFPageAsText(string filepath, int page = 1)
        {
            if (!System.IO.File.Exists(filepath))
            {
                throw new Exception(Properties.Resources.FileNotFoundError);
            }

            string content = string.Empty;

            // Open a new memory stream
            using (var ms = new System.IO.MemoryStream())
            {
                // Create a new pdf reader and get the first page
                PdfReader myPdfReader = new PdfReader(filepath);

                if (page < 1 || page > myPdfReader.NumberOfPages)
                {
                    throw new Exception(Properties.Resources.WrongPageNumber);
                }

                string nonformatcontent = PdfTextExtractor.GetTextFromPage(myPdfReader, page, new SimpleTextExtractionStrategy());
                content = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(nonformatcontent)));
            }

            return(content);
        }
예제 #16
0
        public string ReadTextPDF(string path)
        {
            try
            {
                PdfReader reader     = new PdfReader(path);
                int       numberPage = reader.NumberOfPages;

                StringBuilder textPages = new StringBuilder();

                for (int i = 0; i < numberPage; i++)
                {
                    ITextExtractionStrategy textExtractionStrategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();

                    string textPage = PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy);

                    textPage = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(textPage)));

                    textPages.Append(textPage);
                }
                reader.Close();

                string txt = textPages.ToString();

                return(txt);
            }
            catch (Exception ex)
            {
                throw new Exception(ex.Message);
            }
        }
예제 #17
0
        /// <summary>
        /// Get all Text from PDF
        /// </summary>
        /// <param name="filepath"></param>
        /// <returns></returns>
        public static string GetEntirePDFAsText(string filepath)
        {
            if (!System.IO.File.Exists(filepath))
            {
                throw new Exception(Properties.Resources.FileNotFoundError);
            }

            string content = string.Empty;

            // Open a new memory stream
            using (var ms = new System.IO.MemoryStream())
            {
                // Create a new pdf reader and get the first page
                PdfReader myPdfReader = new PdfReader(filepath);

                for (int i = 1; i <= myPdfReader.NumberOfPages; i++)
                {
                    string nonformatcontent = PdfTextExtractor.GetTextFromPage(myPdfReader, i, new SimpleTextExtractionStrategy());
                    string pagebreak        = (i > 1) ? "\n" : "";

                    content += pagebreak + Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(nonformatcontent)));
                }
            }

            return(content);
        }
예제 #18
0
        public static string ParsePdf(string filename)
        {
            if (!File.Exists(filename))
            {
                throw new FileNotFoundException("fileName");
            }

            using (PdfReader textreader = new PdfReader(filename))
            {
                StringBuilder sb = new StringBuilder();

                ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                for (int page = 0; page < textreader.NumberOfPages; page++)
                {
                    string text = PdfTextExtractor.GetTextFromPage(textreader, page + 1, strategy);
                    if (!string.IsNullOrWhiteSpace(text))
                    {
                        sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text))));
                    }
                }
                string sb_final            = sb.ToString();
                int    new_file_name_index = sb_final.IndexOf("Number");
                int    startValue          = new_file_name_index + 8;
                string new_file_name       = sb_final.Substring(startValue, 6);

                return(new_file_name);
            }
        }
예제 #19
0
        private void UTF8Encoding(string someText)
        {
            //throw new NotImplementedException();
            //foreach ( ) {
            //byte[] utf = Encoding.UTF8.GetBytes(v);

            //Console.WriteLine(utf);

            // create a string to convert to a byte array...
            //String someText = "some ascii text to convert";
            Console.WriteLine(someText);

            // encode the string as an ASCII byte array
            byte[] myASCIIBytes = Encoding.ASCII.GetBytes(someText);
            Console.WriteLine(BitConverter.ToString(myASCIIBytes));

            // convert the ASCII byte array to a UTF-8 byte array
            byte[] myUTF8Bytes = ASCIIEncoding.Convert(Encoding.ASCII, Encoding.UTF8, myASCIIBytes);
            Console.WriteLine(BitConverter.ToString(myUTF8Bytes));

            // reconstitute a string from the UTF-8 byte array
            String deserialisedText = Encoding.UTF8.GetString(myUTF8Bytes);

            Console.WriteLine(deserialisedText);

            //... only convert ASCII characters in the
            // range 0x00 - 0x7F to avoid loss of value.
        }
예제 #20
0
        private static async Task <string> ParsePDF(string url, string filename)
        {
            var text = new StringBuilder();

            await Client.DownloadFileTaskAsync(
                url, filename);

            if (File.Exists(filename))
            {
                Console.WriteLine("file exists");
                var pdfReader = new PdfReader(filename);

                for (var page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new LocationTextExtractionStrategy();
                    var currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8,
                                                                                Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                }

                pdfReader.Close();
            }

            return(text.ToString());
        }
예제 #21
0
        //[Dependency]
        //public IF0413Repository F0413Repository { get; set; }
        //[Dependency]
        //public IF0413Business F0413Business { get; set; }
        //[Dependency]
        //public IF0414Business F0414Business { get; set; }

        public String BuscarDatosPdf(string nombreArchivo)
        {
            /*path = path + “/ extjs.pdf”;
             * string salida = ReadPdfFile(path);
             *
             * y luego defino el método*/
            try
            {
                PdfReader reader2 = new PdfReader(nombreArchivo);
                string    strText = string.Empty;

                for (int page = 1; page <= reader2.NumberOfPages; page++)
                {
                    ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
                    PdfReader reader            = new PdfReader(nombreArchivo);
                    String    s = PdfTextExtractor.GetTextFromPage(reader, page, its);

                    s       = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
                    strText = strText + s;
                    reader.Close();
                }
                reader2.Close();
                return(strText);
            }
            catch (IOException ex)
            {
                throw new ExportException("Error: No se encuentra el archivo o recurso: " + nombreArchivo + ". --> Traza Original: " + ex.StackTrace);
            }
        }
예제 #22
0
        public string ParsePdfPage(string fileName)
        {
            if (fileName.Length == 0)
            {
                return("");
            }
            if (fileName.Length > 0)
            {
                if (!File.Exists(fileName))
                {
                    return("Not found file: " + fileName);
                }

                pReader   = new PdfReader(fileName);
                totalPage = pReader.NumberOfPages;
                FileInfo f = new FileInfo(fileName);
                size = f.Length;
            }
            StringBuilder sb = new StringBuilder();

            ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();


            string text = PdfTextExtractor.GetTextFromPage(pReader, pPage, strategy);

            if (text.Length > 0)
            {
                sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text))));
            }
            return(sb.ToString());
        }
        //lines of code required to convert a pdf file to text
        private void btn_Convert_Click(object sender, EventArgs e)
        {
            string filePath;

            filePath = textboxFilePath.Text.ToString();

            string strText = string.Empty;

            try
            {
                PdfReader reader = new PdfReader(filePath);
                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                    String s = PdfTextExtractor.GetTextFromPage(reader, page, its);

                    s                   = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
                    strText             = strText + s;
                    rtxt_Paragraph.Text = strText;

                    labelFilePath.Text = labelFilePath.Text + @"\" + textboxFileName.Text;
                }
                reader.Close();
            }
            catch (Exception ex)
            {
                MessageBox.Show(ex.Message);
            }
        }
예제 #24
0
        public string ParsePdf(string fileName)
        {
            if (!File.Exists(fileName))
            {
                return("Not found file: " + fileName);
            }

            using (PdfReader reader = new PdfReader(fileName))
            {
                StringBuilder sb = new StringBuilder();

                ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                totalPage = reader.NumberOfPages;
                FileInfo f = new FileInfo(fileName);
                size = f.Length;

                for (int page = 0; page < totalPage; page++)
                {
                    string text = PdfTextExtractor.GetTextFromPage(reader, page + 1, strategy);
                    if (text.Length > 0)
                    {
                        sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text))));
                    }
                }
                reader.Close();
                return(sb.ToString());
            }
        }
예제 #25
0
        //Allows to read inside a PDF to find specific text inside the document
        public string GetPDF(string path)
        {
            StringBuilder text = new StringBuilder();

            try
            {
                if (File.Exists(path))
                {
                    PdfReader pdfReader = new PdfReader(path);

                    for (int page = 1; page <= pdfReader.NumberOfPages; page++)
                    {
                        ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                        string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                        currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                        text.Append(currentText);
                    }
                    pdfReader.Close();
                }
            }
            catch (Exception ex)
            {
                System.Windows.MessageBox.Show("Impossible de lire le fichier" + ex.ToString());
            }

            return(text.ToString());
        }
예제 #26
0
        public string ReadPdfFile(object fileName)
        {
            //var filename=Server.MapPath("~") +fileName;

            var filename = _hostingEnvironment.WebRootPath + (string)fileName;

            //var c=GetFileEncoding(filename);

            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);

            StringBuilder text = new StringBuilder();

            if (File.Exists(filename))
            {
                PdfReader pdfReader = new PdfReader(filename);

                for (int page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();
                    string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));

                    text.Append(currentText);
                }
                pdfReader.Close();
            }

            return(text.ToString());
        }
예제 #27
0
        private void button3_Click(object sender, EventArgs e)
        {
            /*string src = "ejemplo.pdf";
             * string dest = "Ejemplo4.pdf";
             *
             * File.Copy(src,dest);
             *
             *
             *  MessageBox.Show("Pdf copiado con exito");*/
            String archivo = textBox6.Text;

            PdfReader inputDocument = new PdfReader(archivo);

            StringBuilder text = new StringBuilder();

            for (int page = 1; page <= inputDocument.NumberOfPages; page++)

            {
                ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy();

                string currentText = PdfTextExtractor.GetTextFromPage(inputDocument, page, strategy);



                currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));

                text.Append(currentText);
            }

            inputDocument.Close();

            MessageBox.Show(text.ToString());
        }
예제 #28
0
        static void Main(string[] args)
        {
            StringBuilder text         = new StringBuilder();
            var           invoiceLines = new List <string[]>();

            //ImageText();
            if (File.Exists(fileName))
            {
                PdfReader pdfReader = new PdfReader(fileName);

                for (var page = 1; page <= pdfReader.NumberOfPages; page++)
                {
                    ImageText();
                    var strategy    = new LocationTextExtractionStrategy();
                    var currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page);

                    currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
                    text.Append(currentText);
                    var strings      = currentText.Split('\n');
                    var invoiceItems = GetInvoiceItems(strings);
                    WriteExcel(page.ToString(), invoiceItems);
                    if (invoiceItems.Count == 0)
                    {
                    }
                }
                pdfReader.Close();
            }
        }
예제 #29
0
        public static void ReadPDFLog(string type, RichTextBox rt)
        {
            string strx = string.Empty;
            string name = "";

            if (type == "toDo")
            {
                name = "todo.pdf";
            }
            else if (type == "done")
            {
                name = "done.pdf";
            }

            try
            {
                //adding the pdf to the rich text box
                PdfReader reader = new PdfReader(name);
                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy();
                    String s = PdfTextExtractor.GetTextFromPage(reader, page, its);
                    s       = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
                    strx    = strx + s;
                    rt.Text = strx;
                }
                reader.Close();
            }
            catch (Exception ex)
            {
                MessageBox.Show("No local logs found, please create new");
            }
        }
예제 #30
0
        public String readTextData(String readText)
        {
            String str = null;

            if (readText.Substring(readText.Length - 3, 3).Equals("pdf"))
            {
                PdfReader reader = new PdfReader((string)readText);

                for (int page = 1; page <= reader.NumberOfPages; page++)
                {
                    ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy();
                    String s = PdfTextExtractor.GetTextFromPage(reader, page, its);

                    s   = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s)));
                    str = str + s;
                }
                reader.Close();
            }
            else
            {
                str = System.IO.File.ReadAllText(@readText);
            }
            str.Replace("\n", " ");
            return(str);
        }