private void IText7ReadPDF() { StringBuilder text = new StringBuilder(); string fileName = @"C:\Users\Administrator\Desktop\巨力电梯x(3).pdf"; if (File.Exists(fileName)) { PdfReader pdfReader = new PdfReader(fileName); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); //var res = ConvertToHebrew(currentText); text.Append(currentText); } pdfReader.Close(); } //var read = new iText.Kernel.Pdf.PdfReader(@"C:\Users\Administrator\Desktop\巨力电梯x(3).pdf"); //var doc = new iText.Kernel.Pdf.PdfDocument(read); //StringBuilder sb = new StringBuilder(); ////var info = doc.GetDocumentInfo(); //for (int i = 1; i < doc.GetNumberOfPages(); i++) //{ // sb.Append(iText.Kernel.Pdf.Canvas.Parser.PdfTextExtractor.GetTextFromPage(doc.GetPage(i))); //} //div //doc.GetDocumentInfo().; }
private static int LoadAllPDFs(string folder, TextBox TB) { int PDFCounter = 0; StringBuilder text = new StringBuilder(); var files = Directory.GetFiles(folder + @"\"); foreach (var file in files) { using (PdfReader pdfReader = new PdfReader(file)) { for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } pdfReader.Close(); } PDFCounter++; text.Append("\n{NEWARTICLE}\n"); } TB.Text = text.ToString(); return(PDFCounter); }
public static string Mainlog() { string strx = string.Empty; string main = ""; try { //adding the pdf to the rich text box PdfReader reader = new PdfReader("main.pdf"); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strx = strx + s; } main = strx; reader.Close(); } catch (Exception ex) { } return(main); }
public static void SetProperties(string filename) { Filename = filename; FileInfo fi = new FileInfo(Filename); Size = Math.Round(Convert.ToDouble(fi.Length) / (1048576), 2); //1048576=1024*1024 Extension = fi.Extension; if (Extension == ".pdf") { DocumentType = "Portable Document Format(.pdf)"; PdfReader pdfr = new PdfReader(Filename); StringBuilder pdfText = new StringBuilder(); TotalPages = pdfr.NumberOfPages; //loop to read pdf page by page for (int page = 1; page <= pdfr.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfr, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); pdfText.Append(currentText); } pdfr.Close(); string completetext = pdfText.ToString(); // NoOfWords=completetext.Split(' ').Length; NoOfWords = Regex.Matches(completetext, @"[A-Za-z0-9]+").Count; } else if (Extension == ".odt") { DocumentType = "Open Document Format(.odt)"; ComputeStatistics(); } else if (Extension == ".docx") { DocumentType = "Microsoft Word Document(.docx)"; ComputeStatistics(); } else { DocumentType = "Word 97-2003 document(.doc)"; ComputeStatistics(); } }
private List <InvoiceItem> GetFaturaItems(string fileName) { var result = new List <InvoiceItem>(); try { PdfReader reader = new PdfReader(fileName); var its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); String rawTextV = PdfTextExtractor.GetTextFromPage(reader, 1, its); var pageVencimento = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(rawTextV))); var vencimento = GetVencimento(pageVencimento); for (int pageNumber = 1; pageNumber < reader.NumberOfPages + 1; pageNumber++) { its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); String rawText = PdfTextExtractor.GetTextFromPage(reader, pageNumber, its); var page = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(rawText))); result.AddRange(ParsePage(page, vencimento)); } reader.Close(); } catch (Exception) { //TODO: Bug Hidden } return(result); }
public string Parse() { if (!File.Exists(this._fileName)) { throw new FileNotFoundException(); } StringBuilder text = new StringBuilder(); if (File.Exists(_fileName)) { PdfReader pdfReader = new PdfReader(_fileName); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); text.Append("\r\n"); } pdfReader.Close(); } return(text.ToString()); }
private void button1_Click(object sender, EventArgs e) { OpenFileDialog dlg = new OpenFileDialog(); string filePath; dlg.Filter = "PDF Files(*.PDF)|*.PDF|All Files(*.*)|*.*"; if (dlg.ShowDialog() == DialogResult.OK) { filePath = dlg.FileName.ToString(); string strText = string.Empty; try { PdfReader reader = new PdfReader(filePath); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; richTextBox1.Text = strText; } reader.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } } }
/* * Convert a PDF file to a text by extracting just the text. */ public static string Convert(string infile) { StringBuilder strPdfContent = new StringBuilder(); PdfReader reader = new PdfReader(infile); /* * This conversion code is thanks to the developers of iTextSharp and asturcon at * http://www.codeproject.com/Questions/770857/Convert-PDF-tp-text-formatted-using-iTextSharp-csh * Before this was used, manual conversion was done with Adobe Acrobat or Microsoft Word. * They both convert very badly - missing spaces, linefeeds, reversed lines, etc. Their problems appear * to be related to how they handle default character encoding on Windows. For an explanation, see: * https://www.informit.com/guides/content.aspx?g=dotnet&seqNum=163 */ for (int i = 1; i <= reader.NumberOfPages; i++) { ITextExtractionStrategy objExtractStrategy = new SimpleTextExtractionStrategy(); string strLineText = PdfTextExtractor.GetTextFromPage(reader, i, objExtractStrategy); strLineText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strLineText))); strPdfContent.Append(strLineText); strPdfContent.Append("\n"); } reader.Close(); string text = strPdfContent.ToString(); return(text); }
public string ReadPdfFile(string fileName) { StringBuilder text = new StringBuilder(); if (File.Exists(fileName)) { PdfReader pdfReader = new PdfReader(fileName); var datar = pdfReader.AcroFields.Fields.Select(x => x.Key + ": " + pdfReader.AcroFields.GetField(x.Key)).ToList(); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); List <string> dataarr = new List <string>(); dataarr = datar; XmlSerializer serializer = new XmlSerializer(typeof(List <string>)); using (TextWriter writer = new StreamWriter(_path + "test.xml")) { serializer.Serialize(writer, dataarr); } currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } pdfReader.Close(); } return(text.ToString()); }
public void GetPDFText(string fileName) { StringBuilder text = new StringBuilder(); if (File.Exists(fileName)) { PdfReader pdfReader = new PdfReader(fileName); for (int i = 1; i <= pdfReader.NumberOfPages; i++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, i, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); texts.Add(currentText); } PDFText = text.ToString(); pdfReader.Close(); if (PDFText.Contains(JPM_TAG)) { data = new JPMData(); } } }
public string Open(RichTextBox docBox, Label LabelShowFileName) { OpenFileDialog openFileDialog = new OpenFileDialog(); openFileDialog.Filter = "TXT File (*.txt)|*.txt|RTF File (*.rtf)|*.rtf|PDF File (*.pdf)|*.pdf"; if (openFileDialog.ShowDialog() == true) { TextRange doc = new TextRange(docBox.Document.ContentStart, docBox.Document.ContentEnd); LabelShowFileName.Content = System.IO.Path.GetFileName(openFileDialog.FileName); var textType = System.IO.Path.GetExtension(openFileDialog.FileName).ToLower(); switch (textType) { case ".rtf": { var text = File.ReadAllText(openFileDialog.FileName, Encoding.GetEncoding(1251)); FlowDocument flowDocument = new FlowDocument(); flowDocument.Blocks.Add(new System.Windows.Documents.Paragraph(new Run(text))); docBox.Document = flowDocument; } break; case ".txt": { var text = File.ReadAllText(openFileDialog.FileName, Encoding.GetEncoding(1251)); FlowDocument flowDocument = new FlowDocument(); flowDocument.Blocks.Add(new System.Windows.Documents.Paragraph(new Run(text))); docBox.Document = flowDocument; } break; case ".pdf": { string strText = string.Empty; PdfReader reader = new PdfReader(openFileDialog.FileName.ToString()); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); // Encoding s strText += s; } reader.Close(); FlowDocument flowDocument = new FlowDocument(); flowDocument.Blocks.Add(new System.Windows.Documents.Paragraph(new Run(strText))); docBox.Document = flowDocument; } break; } } return(openFileDialog.FileName); }
//Este metodo lee el PDF public string ReadPdfFile(object Filename) { string strText = string.Empty; //try para obtener el error en caso de que ocurra try { PdfReader readerPdf = new PdfReader((string)Filename); for (int page = 1; page <= readerPdf.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); PdfReader reader = new PdfReader((string)Filename); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; reader.Close(); } } //Manejo de errores catch (Exception ex) { //Muestro el error en caso de que ocurra lblMsjs.ForeColor = Color.Crimson; lblMsjs.Text = ex.Message.ToString(); } //retorno el texto return(strText); }
/// <summary> /// Reads the text of the given page /// put the text in the given StringBuilder /// </summary> /// <param name="file"></param> /// <returns>String builder with the Data</returns> private StringBuilder ReadText(string file) { StringBuilder textBuilder = new StringBuilder(); if (File.Exists(file)) { PdfReader pdfReader = new PdfReader(file); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString( ASCIIEncoding.Convert( Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText) )); if (!String.IsNullOrEmpty(currentText)) { textBuilder.Append(currentText); } } pdfReader.Close(); } return(textBuilder); }
/// <summary> /// Writing the extracted date into textfile. /// </summary> /// <param name="reader"> Open reader to the to read pdf file </param> /// <param name="page"> which page are we going to extract the information from the pdf file </param> /// <param name="its"> Which extraction strategy do we use when extracting our data </param> /// <param name="outPath"> Where is the textfile located in my computer </param> /// private static void WriteInfile(PdfReader reader, int page, ITextExtractionStrategy its, string outPath) { string strText = string.Empty; strText = PdfTextExtractor.GetTextFromPage(reader, page, its); strText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strText))); string[] lines = strText.Split('\n'); foreach (string line in lines) { using (System.IO.StreamWriter file = new System.IO.StreamWriter(outPath, true)) { string test = line + "\0"; int index = test.Length; if (index > 0 && index < 55 && !char.IsPunctuation(test[index - 2]) && !char.IsDigit(test[0])) { Console.WriteLine("TITLE = " + line + " " + index); file.Write("Title - - - - - "); file.WriteLine(line + "\n"); } else { file.WriteLine(line); } } } using (System.IO.StreamWriter file = new System.IO.StreamWriter(outPath, true)) file.WriteLine("- - - - - - - - - - - - - - - - - - - - - - - - - - - "); }
/// <summary> /// Get Text from PDF page /// </summary> /// <param name="filepath">PDF Filepath</param> /// <param name="page">Document Page number (optional)</param> /// <returns>Page Content as Text</returns> public static string GetPDFPageAsText(string filepath, int page = 1) { if (!System.IO.File.Exists(filepath)) { throw new Exception(Properties.Resources.FileNotFoundError); } string content = string.Empty; // Open a new memory stream using (var ms = new System.IO.MemoryStream()) { // Create a new pdf reader and get the first page PdfReader myPdfReader = new PdfReader(filepath); if (page < 1 || page > myPdfReader.NumberOfPages) { throw new Exception(Properties.Resources.WrongPageNumber); } string nonformatcontent = PdfTextExtractor.GetTextFromPage(myPdfReader, page, new SimpleTextExtractionStrategy()); content = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(nonformatcontent))); } return(content); }
public string ReadTextPDF(string path) { try { PdfReader reader = new PdfReader(path); int numberPage = reader.NumberOfPages; StringBuilder textPages = new StringBuilder(); for (int i = 0; i < numberPage; i++) { ITextExtractionStrategy textExtractionStrategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); string textPage = PdfTextExtractor.GetTextFromPage(reader, i + 1, textExtractionStrategy); textPage = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(textPage))); textPages.Append(textPage); } reader.Close(); string txt = textPages.ToString(); return(txt); } catch (Exception ex) { throw new Exception(ex.Message); } }
/// <summary> /// Get all Text from PDF /// </summary> /// <param name="filepath"></param> /// <returns></returns> public static string GetEntirePDFAsText(string filepath) { if (!System.IO.File.Exists(filepath)) { throw new Exception(Properties.Resources.FileNotFoundError); } string content = string.Empty; // Open a new memory stream using (var ms = new System.IO.MemoryStream()) { // Create a new pdf reader and get the first page PdfReader myPdfReader = new PdfReader(filepath); for (int i = 1; i <= myPdfReader.NumberOfPages; i++) { string nonformatcontent = PdfTextExtractor.GetTextFromPage(myPdfReader, i, new SimpleTextExtractionStrategy()); string pagebreak = (i > 1) ? "\n" : ""; content += pagebreak + Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(nonformatcontent))); } } return(content); }
public static string ParsePdf(string filename) { if (!File.Exists(filename)) { throw new FileNotFoundException("fileName"); } using (PdfReader textreader = new PdfReader(filename)) { StringBuilder sb = new StringBuilder(); ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); for (int page = 0; page < textreader.NumberOfPages; page++) { string text = PdfTextExtractor.GetTextFromPage(textreader, page + 1, strategy); if (!string.IsNullOrWhiteSpace(text)) { sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text)))); } } string sb_final = sb.ToString(); int new_file_name_index = sb_final.IndexOf("Number"); int startValue = new_file_name_index + 8; string new_file_name = sb_final.Substring(startValue, 6); return(new_file_name); } }
private void UTF8Encoding(string someText) { //throw new NotImplementedException(); //foreach ( ) { //byte[] utf = Encoding.UTF8.GetBytes(v); //Console.WriteLine(utf); // create a string to convert to a byte array... //String someText = "some ascii text to convert"; Console.WriteLine(someText); // encode the string as an ASCII byte array byte[] myASCIIBytes = Encoding.ASCII.GetBytes(someText); Console.WriteLine(BitConverter.ToString(myASCIIBytes)); // convert the ASCII byte array to a UTF-8 byte array byte[] myUTF8Bytes = ASCIIEncoding.Convert(Encoding.ASCII, Encoding.UTF8, myASCIIBytes); Console.WriteLine(BitConverter.ToString(myUTF8Bytes)); // reconstitute a string from the UTF-8 byte array String deserialisedText = Encoding.UTF8.GetString(myUTF8Bytes); Console.WriteLine(deserialisedText); //... only convert ASCII characters in the // range 0x00 - 0x7F to avoid loss of value. }
private static async Task <string> ParsePDF(string url, string filename) { var text = new StringBuilder(); await Client.DownloadFileTaskAsync( url, filename); if (File.Exists(filename)) { Console.WriteLine("file exists"); var pdfReader = new PdfReader(filename); for (var page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new LocationTextExtractionStrategy(); var currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } pdfReader.Close(); } return(text.ToString()); }
//[Dependency] //public IF0413Repository F0413Repository { get; set; } //[Dependency] //public IF0413Business F0413Business { get; set; } //[Dependency] //public IF0414Business F0414Business { get; set; } public String BuscarDatosPdf(string nombreArchivo) { /*path = path + “/ extjs.pdf”; * string salida = ReadPdfFile(path); * * y luego defino el método*/ try { PdfReader reader2 = new PdfReader(nombreArchivo); string strText = string.Empty; for (int page = 1; page <= reader2.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); PdfReader reader = new PdfReader(nombreArchivo); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; reader.Close(); } reader2.Close(); return(strText); } catch (IOException ex) { throw new ExportException("Error: No se encuentra el archivo o recurso: " + nombreArchivo + ". --> Traza Original: " + ex.StackTrace); } }
public string ParsePdfPage(string fileName) { if (fileName.Length == 0) { return(""); } if (fileName.Length > 0) { if (!File.Exists(fileName)) { return("Not found file: " + fileName); } pReader = new PdfReader(fileName); totalPage = pReader.NumberOfPages; FileInfo f = new FileInfo(fileName); size = f.Length; } StringBuilder sb = new StringBuilder(); ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string text = PdfTextExtractor.GetTextFromPage(pReader, pPage, strategy); if (text.Length > 0) { sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text)))); } return(sb.ToString()); }
//lines of code required to convert a pdf file to text private void btn_Convert_Click(object sender, EventArgs e) { string filePath; filePath = textboxFilePath.Text.ToString(); string strText = string.Empty; try { PdfReader reader = new PdfReader(filePath); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; rtxt_Paragraph.Text = strText; labelFilePath.Text = labelFilePath.Text + @"\" + textboxFileName.Text; } reader.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } }
public string ParsePdf(string fileName) { if (!File.Exists(fileName)) { return("Not found file: " + fileName); } using (PdfReader reader = new PdfReader(fileName)) { StringBuilder sb = new StringBuilder(); ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); totalPage = reader.NumberOfPages; FileInfo f = new FileInfo(fileName); size = f.Length; for (int page = 0; page < totalPage; page++) { string text = PdfTextExtractor.GetTextFromPage(reader, page + 1, strategy); if (text.Length > 0) { sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text)))); } } reader.Close(); return(sb.ToString()); } }
//Allows to read inside a PDF to find specific text inside the document public string GetPDF(string path) { StringBuilder text = new StringBuilder(); try { if (File.Exists(path)) { PdfReader pdfReader = new PdfReader(path); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } pdfReader.Close(); } } catch (Exception ex) { System.Windows.MessageBox.Show("Impossible de lire le fichier" + ex.ToString()); } return(text.ToString()); }
public string ReadPdfFile(object fileName) { //var filename=Server.MapPath("~") +fileName; var filename = _hostingEnvironment.WebRootPath + (string)fileName; //var c=GetFileEncoding(filename); Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); StringBuilder text = new StringBuilder(); if (File.Exists(filename)) { PdfReader pdfReader = new PdfReader(filename); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } pdfReader.Close(); } return(text.ToString()); }
private void button3_Click(object sender, EventArgs e) { /*string src = "ejemplo.pdf"; * string dest = "Ejemplo4.pdf"; * * File.Copy(src,dest); * * * MessageBox.Show("Pdf copiado con exito");*/ String archivo = textBox6.Text; PdfReader inputDocument = new PdfReader(archivo); StringBuilder text = new StringBuilder(); for (int page = 1; page <= inputDocument.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(inputDocument, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } inputDocument.Close(); MessageBox.Show(text.ToString()); }
static void Main(string[] args) { StringBuilder text = new StringBuilder(); var invoiceLines = new List <string[]>(); //ImageText(); if (File.Exists(fileName)) { PdfReader pdfReader = new PdfReader(fileName); for (var page = 1; page <= pdfReader.NumberOfPages; page++) { ImageText(); var strategy = new LocationTextExtractionStrategy(); var currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); var strings = currentText.Split('\n'); var invoiceItems = GetInvoiceItems(strings); WriteExcel(page.ToString(), invoiceItems); if (invoiceItems.Count == 0) { } } pdfReader.Close(); } }
public static void ReadPDFLog(string type, RichTextBox rt) { string strx = string.Empty; string name = ""; if (type == "toDo") { name = "todo.pdf"; } else if (type == "done") { name = "done.pdf"; } try { //adding the pdf to the rich text box PdfReader reader = new PdfReader(name); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strx = strx + s; rt.Text = strx; } reader.Close(); } catch (Exception ex) { MessageBox.Show("No local logs found, please create new"); } }
public String readTextData(String readText) { String str = null; if (readText.Substring(readText.Length - 3, 3).Equals("pdf")) { PdfReader reader = new PdfReader((string)readText); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); str = str + s; } reader.Close(); } else { str = System.IO.File.ReadAllText(@readText); } str.Replace("\n", " "); return(str); }