private void button2_Click(object sender, EventArgs e) { OpenFileDialog dlg = new OpenFileDialog(); string filepath; dlg.Filter = "PDF Files(*.PDF)|*.PDF|All Files(*.*)|*.*"; if(dlg.ShowDialog()==DialogResult.OK) { filepath = dlg.FileName.ToString(); string strtext = string.Empty; try { PdfReader reader = new PdfReader(filepath); for(int page=1;page<=reader.NumberOfPages;page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strtext = strtext + s; richTextBox1.Text = strtext; } reader.Close(); } catch(Exception ex) { MessageBox.Show(ex.Message); } } }
public static string ExtractTextFromPdf(string path) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); using (PdfReader reader = new PdfReader(path)) { StringBuilder text = new StringBuilder(); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(reader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); } System.IO.StreamWriter file = new System.IO.StreamWriter("c:\\test.txt"); file.WriteLine(text); file.Close(); return text.ToString(); } }
public ToxyDocument Parse() { if (!File.Exists(Context.Path)) throw new FileNotFoundException("File " + Context.Path + " is not found"); ToxyDocument rdoc = new ToxyDocument(); ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); using (PdfReader reader = new PdfReader(this.Context.Path)) { for (int i = 1; i <= reader.NumberOfPages; i++) { string thePage = PdfTextExtractor.GetTextFromPage(reader, i, its); string[] theLines = thePage.Split('\n'); foreach (var theLine in theLines) { ToxyParagraph para = new ToxyParagraph(); para.Text = theLine; rdoc.Paragraphs.Add(para); } } } return rdoc; }
public override string GetFileContent() { StringBuilder text = new StringBuilder(); for (int page = 1; page <= pdfReader.NumberOfPages; page++) { ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy); currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText))); text.Append(currentText); pdfReader.Close(); } return text.ToString(); }
public string Parse() { using (PdfReader reader = new PdfReader(this.Context.Path)) { StringBuilder text = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string thePage = PdfTextExtractor.GetTextFromPage(reader, i, its); text.AppendLine(thePage); } return text.ToString(); } }
/** * Parses the PDF using PRTokeniser * @param src the path to the original PDF file * @param dest the path to the resulting text file */ public string[] ParsePdfToText(string src) { iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(src); //reader.t //StreamWriter output = new StreamWriter(); LocationTextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); List<string> text = new List<string>(); int pageCount = reader.NumberOfPages; for (int pg = 1; pg <= pageCount; pg++) { string content=PdfTextExtractor.GetTextFromPage(reader, pg, strategy); text.Add(content); } return text.ToArray(); }
public string Parse() { if (!File.Exists(Context.Path)) throw new FileNotFoundException("File " + Context.Path + " is not found"); using (PdfReader reader = new PdfReader(this.Context.Path)) { StringBuilder text = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string thePage = PdfTextExtractor.GetTextFromPage(reader, i, its); text.AppendLine(thePage); } return text.ToString(); } }
static void Main(string[] args) { try { var filePath = ConfigurationManager.AppSettings["FilePath"]; var extention = System.IO.Path.GetExtension(filePath); SpeechSynthesizer synthesizer = new SpeechSynthesizer(); synthesizer.SelectVoiceByHints(VoiceGender.Male, VoiceAge.Adult); synthesizer.Volume = 100; // (0 - 100) synthesizer.Rate = -1; // (-10 - 10) switch (extention) { case ".pdf": { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); using (PdfReader pdfReader = new PdfReader(filePath)) { for (int i = 1; i <= pdfReader.NumberOfPages; i++) { string thePage = PdfTextExtractor.GetTextFromPage(pdfReader, i, its); synthesizer.Speak(thePage); } } break; } case ".txt": { string text = System.IO.File.ReadAllText(filePath); synthesizer.Speak(text); } break; default: Console.WriteLine("Error!!"); break; } } catch (Exception ex) { Console.WriteLine("Exceptoion!!\n" + ex.Message); } }
private void BuLoadIn_Click(object sender, EventArgs e) { OpenFileDialog FileFinder = new OpenFileDialog(); FileFinder.Filter = FileTypes; if (FileFinder.ShowDialog() == DialogResult.OK) { try { using (PdfReader PDF = new PdfReader(FileFinder.FileName)) { _PagesText = new List <string>(PDF.NumberOfPages); _PageCount = PDF.NumberOfPages; for (int P = 0; P < PDF.NumberOfPages; P++) { ITextExtractionStrategy ExtractionStrategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string PageText = PdfTextExtractor.GetTextFromPage(PDF, P + 1, ExtractionStrategy); // Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(PageText));//why but keeping it for now _PagesText.Add(PageText); } _Page = 0; Bu_Prev.Enabled = false; La_PageNumber.Text = La_PagerNumberQuick + 1; if (_PageCount > 1) { Bu_Next.Enabled = true; } else { Bu_Next.Enabled = false; } RiTe_View.Text = _PagesText[_Page]; } } catch (Exception Exp) { MessageBox.Show(Exp.Message); } } }
//read a pdf file into a rih text field and convert it into a text file //open the text file after conversion public static void read_pd_file(RichTextBox rt) { OpenFileDialog op = new OpenFileDialog(); string pathf; op.Filter = "PDF Files(*.PDF)|*.PDF|All files(*.*)|*.*"; if (op.ShowDialog() == DialogResult.OK) { pathf = op.FileName.ToString(); string strx = string.Empty; try { //adding the pdf to the rich text box PdfReader reader = new PdfReader(pathf); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strx = strx + s; rt.Text = strx; } reader.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } } //converting the pdf to text StreamWriter sw = new StreamWriter("pdf_to_text.txt"); sw.Write(rt.Text); sw.Close(); System.Diagnostics.Process.Start("pdf_to_text.txt"); }
/// <summary> /// function to read and process pdf file /// </summary> /// <param name="pdf_file">name of the pdf file to be processed</param> private static void ReadPDF(string pdf_file) { /* * How to extract text line by line when using iTextSharp * https://stackoverflow.com/questions/15748800/extract-text-by-line-from-pdf-using-itextsharp-c-sharp */ // ITextExtractionStrategy Strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); /* * https://stackoverflow.com/questions/83152/reading-pdf-documents-in-net * ITextExtractionStrategy Strategy = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); */ using (PdfReader reader = new PdfReader(pdf_file)) { PageReaderHelper page_reader = new PageReaderHelper(); // break the whole pdf into pages and then process page by page string page_content; for (int page_count = 0; page_count < reader.NumberOfPages;) { page_count++; /* Why are GetTextFromPage from iTextSharp returning longer and longer strings? * https://stackoverflow.com/questions/35911062/why-are-gettextfrompage-from-itextsharp-returning-longer-and-longer-strings */ ITextExtractionStrategy Strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); // get the whole page content page_content = PdfTextExtractor.GetTextFromPage(reader, page_count, Strategy); page_reader.ProcessPage(page_content); // Debug : do first 3 pages testing //if (page_count == 1) break; } } }
public static string ExtractTextFromPdf(string path) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); using (PdfReader reader = new PdfReader(path)) { StringBuilder text = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { string thePage = PdfTextExtractor.GetTextFromPage(reader, i, its); string[] theLines = thePage.Split('\n'); foreach (var theLine in theLines) { text.AppendLine(theLine); } } return(text.ToString()); } }
public string Parse() { if (!File.Exists(Context.Path)) { throw new FileNotFoundException("File " + Context.Path + " is not found"); } using (PdfReader reader = new PdfReader(this.Context.Path)) { StringBuilder text = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string thePage = PdfTextExtractor.GetTextFromPage(reader, i, its); text.AppendLine(thePage); } return(text.ToString()); } }
/// <summary> /// Obtiene la pagina tabla solicitada del archivo PDF en formato de string. /// </summary> /// <param name="pagina">Número de la pagina a obtener.</param> public string ObtenerPaginaTabla(int pagina) { if (!File.Exists(ruta)) { throw new FileNotFoundException("ruta"); } using (PdfReader reader = new PdfReader(ruta)) { StringBuilder sb = new StringBuilder(); ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string text = PdfTextExtractor.GetTextFromPage(reader, pagina, its); sb.Append(Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(text)))); return(sb.ToString()); } }
public CitestePDF(string path) { using (PdfReader reader = new PdfReader(path)) { StringBuilder text = new StringBuilder(); ITextExtractionStrategy Strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); for (int i = 1; i <= reader.NumberOfPages; i++) { string page = ""; Console.WriteLine("CitestePDF()"); page = PdfTextExtractor.GetTextFromPage(reader, i, Strategy); string[] lines = page.Split('\n'); foreach (string line in lines) { Console.WriteLine(line); } } } }
public void ExtractTextFromPdf(string path) { using (PdfReader reader = new PdfReader(path)) { StringBuilder text = new StringBuilder(); ITextExtractionStrategy Strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); for (int i = 1; i <= reader.NumberOfPages; i++) { string page = ""; page = PdfTextExtractor.GetTextFromPage(reader, i, Strategy); string[] lines = page.Split('\n'); int a = 0; foreach (string line in lines) { MessageBox.Show(line); } } } }
public void iTextSharp(BackgroundWorker backgroundWorker, string path) { using (PdfReader reader = new PdfReader(path)) { mpath = path; StringBuilder builder = new StringBuilder(); for (int i = 1; i <= reader.NumberOfPages; i++) { if (!backgroundWorker.CancellationPending) { backgroundWorker.ReportProgress((i * 100) / reader.NumberOfPages); //bulider.Append(PdfTextExtractor.GetTextFromPage(reader, i)); ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string s = PdfTextExtractor.GetTextFromPage(reader, i, its); //s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); builder.Append(s); } } MainForm.finaltext = builder.ToString(); } }
private void getContenido(string book) { page = Convert.ToInt32(Request.QueryString["track"]); if (File.Exists(book)) { string texto = ""; lblTexto.Text = ""; string ExtractedData = string.Empty; PdfReader reader = new PdfReader(book); ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); ExtractedData = PdfTextExtractor.GetTextFromPage(reader, page, strategy); try { string[] lineas = ExtractedData.Split('\n'); //StringBuilder db = new StringBuilder(); if (lineas.Length > 0) { foreach (string line in lineas) { texto += line + "\n"; } lblTexto.Text = texto.Replace("\n", "<br/>"); } else { lblTexto.Text = ExtractedData; } } catch (Exception) { lblTexto.Text = ExtractedData; } StreamWriter writer = new StreamWriter(Server.MapPath("~/LibrosPortadas/" + Session["Usuario"] + "/Reading/" + tit + ".txt")); writer.WriteLine(page); writer.Close(); } }
protected void getNextPage(string book, int page) { if (File.Exists(book)) { string texto = ""; lblTexto.Text = ""; string ExtractedData = string.Empty; PdfReader reader = new PdfReader(book); ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); ExtractedData = PdfTextExtractor.GetTextFromPage(reader, page, strategy); string[] lineas = ExtractedData.Split('\n'); StringBuilder db = new StringBuilder(); foreach (string line in lineas) { texto += line + "\n"; } lblTexto.Text = texto.Replace("\n", "<br/>"); StreamWriter writer = new StreamWriter(Server.MapPath("~/LibrosPortadas/" + Session["Usuario"] + "/Reading/" + tit + ".txt")); writer.WriteLine(page); writer.Close(); } }
protected string getNextPage(string book, int page, string UserName, string NameBook) { string texto = ""; if (File.Exists(book)) { string ExtractedData = string.Empty; PdfReader reader = new PdfReader(book); ITextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); ExtractedData = PdfTextExtractor.GetTextFromPage(reader, page, strategy); string[] lineas = ExtractedData.Split('\n'); StringBuilder db = new StringBuilder(); foreach (string line in lineas) { texto += line + Environment.NewLine; } StreamWriter writer = new StreamWriter(Server.MapPath("~/LibrosPortadas/" + UserName + "/Reading/" + NameBook.Replace(" ", "_") + ".txt")); writer.WriteLine(page); writer.Close(); } return(texto); }
private void button1_Click(object sender, EventArgs e) { OpenFileDialog dlg = new OpenFileDialog(); string filePath; dlg.Filter = "PDF Files(*.PDF)|*.PDF|All files (*.*)|*.*"; if (dlg.ShowDialog() == DialogResult.OK) { filePath = dlg.FileName.ToString(); string strText = string.Empty; try { PdfReader reader = new PdfReader(filePath); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; richTextBox1.Text = strText; } reader.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } } StreamWriter File = new StreamWriter("PDF_to_Text.rtf"); File.Write(richTextBox1.Text); File.Close(); }
public static void ReadPDFLog(string id, RichTextBox rt) { string strx = string.Empty; try { //adding the pdf to the rich text box PdfReader reader = new PdfReader(id + ".pdf"); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strx = strx + s; rt.Text = strx; } reader.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } }
private static Dictionary <string, string> GetFileContentsThroughIText(string[] files) { Dictionary <string, string> contents = new Dictionary <string, string>(); foreach (var file in files) { StringBuilder text = new StringBuilder(); using (PdfReader reader = new PdfReader(file)) { ITextExtractionStrategy Strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); for (int i = 1; i <= reader.NumberOfPages; i++) { string page = PdfTextExtractor.GetTextFromPage(reader, i, Strategy); text.Append(page); } } contents.Add(file, text.ToString()); } return(contents); }
static void Main(string[] args) { string filePath = @"Your said path\the file name.pdf"; string outPath = @"the output said path\the text file name.txt"; int pagesToScan = 2; string strText = string.Empty; try { PdfReader reader = new PdfReader(filePath); for (int page = 1; page <= pagesToScan; page++) //(int page = 1; page <= reader.NumberOfPages; page++) <- for scanning all the pages in A PDF { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); strText = PdfTextExtractor.GetTextFromPage(reader, page, its); strText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strText))); //creating the string array and storing the PDF line by line string[] lines = strText.Split('\n'); foreach (string line in lines) { //Creating and appending to a text file using (System.IO.StreamWriter file = new System.IO.StreamWriter(outPath, true)) { file.WriteLine(line); } } } reader.Close(); } catch (Exception ex) { Console.Write(ex); } }
public static void Test_GetPdfText_05(String file) { string outputFile = zpath.PathSetFileNameWithExtension(file, Path.GetFileNameWithoutExtension(file) + "_blocks.txt"); Trace.WriteLine("export pdf file \"{0}\" to \"{1}\"", file, outputFile); FileStream fs = new FileStream(outputFile, FileMode.Create, FileAccess.Write, FileShare.Read); StreamWriter sw = new StreamWriter(fs, Encoding.Default); sw.WriteLine("export pdf text blocks of \"{0}\"", file); sw.WriteLine(); //_tr.WriteLine("read pdf file \"{0}\"", file); iTextSharp.text.pdf.PdfReader reader = new iTextSharp.text.pdf.PdfReader(file); // Error 8 'LocationTextExtractionStrategy' is an ambiguous reference between 'iTextSharp.text.pdf.parser.LocationTextExtractionStrategy' and 'Test_iTextSharp.LocationTextExtractionStrategy' C:\pib\dropbox\pbeuz\Dropbox\dev\project\Source\Source_01\Source\Test\Test_iTextSharp\Test_iTextSharp_f.cs 649 13 Source_01 iTextSharp.text.pdf.parser.LocationTextExtractionStrategy strategy = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); for (int page = 1; page <= reader.NumberOfPages; page++) { sw.WriteLine("================ page {0} ================", page); //GetTextFromPage(reader, page, strategy); Test_iTextSharp.PdfTools.ProcessContentPage(reader, page, strategy); PrintTextBlocks(sw, strategy.textBlocks); sw.WriteLine(); } //string s = strategy.GetResultantText(); //_tr.WriteLine("LocationTextExtractionStrategy()"); reader.Close(); //List<TextChunk> locationalResult = strategy.locationalResult; //string s = GetResultantText(locationalResult); //_tr.WriteLine(s); //PrintResultantText(locationalResult); sw.Close(); }
public static OFXDocument Import(string pathFilename) { OFXDocument document = new OFXDocument(); document.Account = new Account(); document.Account.BankID = "004"; document.Transactions = new List <Transaction>(); document.SignOn = new SignOn(); string strText = string.Empty; bool tratamento = false; bool dataGeracaoExtrato = false; string stringTratamento1 = String.Empty; string stringTratamento2 = String.Empty; string stringTratamento3 = String.Empty; string textTemp = String.Empty; PdfReader reader = new PdfReader((string)pathFilename); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String cipherText = PdfTextExtractor.GetTextFromPage(reader, page, its); cipherText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(cipherText))); strText = strText + "\n" + cipherText; } reader.Close(); List <string> temp = strText.Split('\n').ToList(); bool head = false; Transaction transaction = new Transaction(); for (int i = 0; i < temp.Count; i++) { string value = String.Empty; try { if (i == 1) { // [Data de geração do extrato] if (iniciaComData(temp[i].ToString())) { value = temp[i].ToString().TrimStart().Substring(0, 10); try { document.SignOn.DTServer = Convert.ToDateTime(value); dataGeracaoExtrato = true; } catch { // falha ao obter data de geração do extrato } } } else if (!head && temp[i].ToString().Trim().StartsWith("Agência:")) { value = temp[i].ToString().Substring(temp[i].ToString().IndexOf("Agência:") + 8); document.Account.BranchID = value.Trim().Substring(0, value.Trim().IndexOf(" ")); if (value.Contains("Conta Corrente:")) { value = temp[i].ToString().Substring((temp[i].ToString().IndexOf("Corrente:") + 9), 9); document.Account.AccountID = value.Trim(); } } else if (!head && temp[i].ToString().Trim().StartsWith("Conta Corrente:")) { value = temp[i].ToString().Substring((temp[i].ToString().IndexOf("Corrente:") + 9), 9); document.Account.AccountID = value.Trim(); } else if (!head && temp[i].ToString().Trim().Contains("Período:")) { value = temp[i].ToString().TrimEnd(); value = value.Substring(value.IndexOf("Período:") + 8); int indiceAte = value.IndexOf("até"); try { document.StatementStart = Convert.ToDateTime(value.Substring(0, indiceAte).Trim()); } catch { // falha ao obter data início das movimentações } try { document.StatementEnd = Convert.ToDateTime(value.Substring(indiceAte + 3).Trim()); } catch { // falha ao obter data final das movimentações } } else if (!head && temp[i].ToString().Trim().Contains("Data Histórico Documento Valor R$ Saldo R$")) { head = true; } else if (head && temp[i].ToString().IndexOf("https") < 0 && !temp[i].ToString().EndsWith("Nordeste Eletrônico") && !temp[i].ToString().StartsWith("Nordeste Eletrônico Page") && !temp[i].ToString().StartsWith("Importante:") && !temp[i].ToString().StartsWith("Não constam valores de aplicações e resgates efetuados no dia.") && !temp[i].ToString().StartsWith("Banco do Nordeste - Cliente Consulta | Ouvidoria:")) { String cipherText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(temp[i].ToString()))); if (iniciaComData(cipherText)) { try { transaction.Date = DateTime.ParseExact(cipherText.Substring(0, 10) + " 00:00:00.000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } catch { throw new Exception("'" + cipherText.Substring(0, 10) + "' não corresponde a uma data válida (1)"); } // Procura valor e nrDocumento string auxiliar = cipherText.Substring(10).TrimEnd(); int index = auxiliar.LastIndexOf(" "); if (index > 1 && auxiliar[index - 1] == '-') { auxiliar = auxiliar.Substring(0, index) + auxiliar.Substring(index + 1); index -= 2; // valor negativo vem o '-' separado por espaço } decimal amount = new decimal(0.0); try { amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); } catch { throw new Exception("'" + auxiliar.Substring(index + 1) + "' não corresponde a um valor monetário (1)"); } auxiliar = auxiliar.Substring(0, index).TrimEnd(); index = auxiliar.LastIndexOf(" "); if (index > 1 && auxiliar[index - 1] == '-') { auxiliar = auxiliar.Substring(0, index) + auxiliar.Substring(index + 1); index -= 2; // valor negativo vem o '-' separado por espaço } try { Convert.ToInt32(auxiliar.Substring(index + 1)); } catch { amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); auxiliar = auxiliar.Substring(0, index).TrimEnd(); index = auxiliar.LastIndexOf(" "); if (index > 1 && auxiliar[index - 1] == '-') { auxiliar = auxiliar.Substring(0, index) + auxiliar.Substring(index + 1); index -= 2; // valor negativo vem o '-' separado por espaço } Convert.ToInt32(auxiliar.Substring(index + 1)); } transaction.CheckNum = auxiliar.Substring(index + 1); transaction.Amount = amount; transaction.Memo = auxiliar.Substring(0, index).TrimEnd(); transaction.TransType = transaction.Amount > 0 ? OFXTransactionType.CREDIT : OFXTransactionType.DEBIT; if (!tratamento && !transaction.Memo.Trim().Equals("")) { document.Transactions.Add(transaction); transaction = new Transaction(); } } else { textTemp += " " + cipherText; if (!tratamento) { tratamento = true; } else if (transaction.Memo != null) { tratamento = false; transaction.Memo = textTemp.Trim(); document.Transactions.Add(transaction); textTemp = String.Empty; transaction = new Transaction(); } } } else if (!dataGeracaoExtrato && temp[i].ToString().IndexOf("https") >= 0) { value = temp[i].ToString().Trim(); value = value.Substring(value.LastIndexOf(" ") + 1); try { document.SignOn.DTServer = Convert.ToDateTime(value); dataGeracaoExtrato = true; } catch { // falha ao obter data de geração do extrato } } } catch (Exception e) { throw new Exception(i.ToString() + " -> " + e.Message); } } return(document); }
private void upload_Click(object sender, EventArgs e) { string path = @"F:\civil CV\"; // rename pdf (kono pdf name age theke digit thakte parbe na such as 1,2,3,....) /* int count = 1; * foreach (string file in Directory.EnumerateFiles(path, "*.pdf")) * { * string destinationFilename = Convert.ToString(count); * string getFileName1 = file.Substring(file.LastIndexOf("\\")); * string sourceFilename = Regex.Replace(getFileName1, @"\\", ""); * * sourceFilename = path + sourceFilename; * destinationFilename = path + destinationFilename + ".pdf"; * //MessageBox.Show(sourceFilename); * //MessageBox.Show(destinationFilename); * if (File.Exists(destinationFilename)) * { * File.Delete(destinationFilename); * } * File.Move(sourceFilename, destinationFilename); * count++; * * }*/ //string path = @"F:\civil CV\"; foreach (string file in Directory.EnumerateFiles(path, "*.pdf")) { string getFileName = file.Substring(file.LastIndexOf("\\")); string getFileWithoutExtras = Regex.Replace(getFileName, @"\\", ""); string getFileWihtoutExtension = Regex.Replace(getFileWithoutExtras, @".pdf", ""); int id = Convert.ToInt32(getFileWihtoutExtension); String strText = string.Empty; try { PdfReader reader = new PdfReader(file); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; } strText = Regex.Replace(strText, @"[^A-Za-z0-9#:+.@ ]+", " "); strText = Regex.Replace(strText, @"[0-9]+\.[0-9]+\.[0-9]+", " "); if (Regex.IsMatch(strText, "\\bAutoCAD\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bCAD\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bComputer Aided Design\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { cad = 1; } else { cad = 0; } if (Regex.IsMatch(strText, "\\bWord\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bword\\b", RegexOptions.IgnoreCase)) { word = 2; } else { word = 0; } if (Regex.IsMatch(strText, "\\bExcel\\b", RegexOptions.IgnoreCase)) { excel = 2; } else { excel = 0; } if (Regex.IsMatch(strText, "\\bPower Point\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bPower point\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bpower point\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { powerpoint = 2; } else { powerpoint = 0; } if (Regex.IsMatch(strText, "\\bReal Estate\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bReal estate\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\breal estate\\b", RegexOptions.IgnoreCase)) { realestate = 1; } else { realestate = 0; } if (Regex.IsMatch(strText, "\\bDeveloper\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bdeveloper\\b", RegexOptions.IgnoreCase)) { developer = 1; } else { developer = 0; } if (Regex.IsMatch(strText, "\\bEngineering Firms\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bengineering firm\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bEngineering firm\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bConsulting firms\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bconsulting firm\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bConsulting Firm\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { firm = 1; } else { firm = 0; } if (Regex.IsMatch(strText, "\\bNGO\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bNgo\\b", RegexOptions.IgnoreCase)) { ngo = 1; } else { ngo = 0; } if (Regex.IsMatch(strText, "\\bMultinational Companies\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bMultinational company\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bmultinational company\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { multinationalcompany = 1; } else { multinationalcompany = 0; } if (Regex.IsMatch(strText, "\\bBOQ\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bBoq\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bBill of quantity\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { boq = 1; } else { boq = 0; } if (Regex.IsMatch(strText, "\\bCivil\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bcivil\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { dept = 1; } else { dept = 0; } exp = 0; for (int x = 0; x < strText.Length; x++) { if ((strText[x] == 'E' || strText[x] == 'e') && strText[x + 1] == 'x' && strText[x + 2] == 'p' && strText[x + 3] == 'e' && strText[x + 4] == 'r' && strText[x + 5] == 'i' && strText[x + 6] == 'e' && strText[x + 7] == 'n' && strText[x + 8] == 'c' && strText[x + 9] == 'e' && strText[x + 10] == ':') { for (int y = x + 10; y < x + 10 + 4; y++) { if (strText[y] == '0' || strText[y] == '1' || strText[y] == '2' || strText[y] == '3' || strText[y] == '4' || strText[y] == '5' || strText[y] == '6' || strText[y] == '7' || strText[y] == '8') { exp = (int)Char.GetNumericValue(strText[y]); break; } exp = 0; } break; } } string InputString = strText; var r = new Regex(@"[2-3]\.[0-9]+"); var mc = r.Matches(InputString); var matches = new Match[mc.Count]; mc.CopyTo(matches, 0); var myFloats = new float[matches.Length]; var ndx = 0; foreach (Match m in matches) { myFloats[ndx] = float.Parse(m.Value); ndx++; } float f = myFloats[0]; cgpa = f; reader.Close(); string myConnection = "datasource=localhost;port=3306;username=root;password=root"; string Query = "insert into test.skillcivil(id,autocad,word,excel,powerpoint,realestate,developer,engineeringfirm,ngo,multinationalcompany,boq) values(?id,?autocad,?word,?excel,?powerpoint,?realestate,?developer,?engineeringfirm,?ngo,?multinationalcompany,?boq);"; MySqlConnection MyConn = new MySqlConnection(myConnection); MySqlCommand cmd = new MySqlCommand(Query, MyConn); MyConn.Open(); cmd.Parameters.AddWithValue("?id", id); cmd.Parameters.AddWithValue("?autocad", cad); cmd.Parameters.AddWithValue("?word", word); cmd.Parameters.AddWithValue("?excel", excel); cmd.Parameters.AddWithValue("?powerpoint", powerpoint); cmd.Parameters.AddWithValue("?realestate", realestate); cmd.Parameters.AddWithValue("?developer", developer); cmd.Parameters.AddWithValue("?engineeringfirm", firm); cmd.Parameters.AddWithValue("?ngo", ngo); cmd.Parameters.AddWithValue("?multinationalcompany", multinationalcompany); cmd.Parameters.AddWithValue("?boq", boq); cmd.ExecuteNonQuery(); MyConn.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); MessageBox.Show(id.ToString()); } try { string myConnection1 = "datasource=localhost;port=3306;username=root;password=root"; string Query1 = "insert into test.qualificationcivil(id,department,experience,cgpa) values(?id,?department,?experience,?cgpa);"; MySqlConnection MyConn1 = new MySqlConnection(myConnection1); MySqlCommand cmd1 = new MySqlCommand(Query1, MyConn1); MyConn1.Open(); cmd1.Parameters.AddWithValue("?id", id); cmd1.Parameters.AddWithValue("?department", dept); cmd1.Parameters.AddWithValue("?experience", exp); cmd1.Parameters.AddWithValue("?cgpa", cgpa); cmd1.ExecuteNonQuery(); MyConn1.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } } MessageBox.Show("All info Saved"); }
//upload CV info private void upload_Click(object sender, EventArgs e) { string path = @"F:\eee CV\"; // rename pdf (kono pdf name age theke digit thakte parbe na such as 1,2,3,....) /* int count = 1; * foreach (string file in Directory.EnumerateFiles(path, "*.pdf")) * { * string destinationFilename = Convert.ToString(count); * string getFileName1 = file.Substring(file.LastIndexOf("\\")); * string sourceFilename = Regex.Replace(getFileName1, @"\\", ""); * * sourceFilename = path + sourceFilename; * destinationFilename = path + destinationFilename + ".pdf"; * //MessageBox.Show(sourceFilename); * //MessageBox.Show(destinationFilename); * if (File.Exists(destinationFilename)) * { * File.Delete(destinationFilename); * } * File.Move(sourceFilename, destinationFilename); * count++; * * }*/ //string path = @"F:\civil CV\"; foreach (string file in Directory.EnumerateFiles(path, "*.pdf")) { string getFileName = file.Substring(file.LastIndexOf("\\")); string getFileWithoutExtras = Regex.Replace(getFileName, @"\\", ""); string getFileWihtoutExtension = Regex.Replace(getFileWithoutExtras, @".pdf", ""); int id = Convert.ToInt32(getFileWihtoutExtension); String strText = string.Empty; try { PdfReader reader = new PdfReader(file); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; } strText = Regex.Replace(strText, @"[^A-Za-z0-9#:+.@ ]+", " "); strText = Regex.Replace(strText, @"[0-9]+\.[0-9]+\.[0-9]+", " "); if (Regex.IsMatch(strText, "\\bAutoCAD\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bCAD\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bComputer Aided Design\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { cad = 1; } else { cad = 0; } if (Regex.IsMatch(strText, "\\bMicrosoft Office\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bMs Office\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bWord\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bword\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bExcel\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bPower Point\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bPower point\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bpower point\\b", RegexOptions.IgnoreCase)) { office = 2; } else { office = 0; } if (Regex.IsMatch(strText, "\\bFactory\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bPlant\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bFacility Management\\b", RegexOptions.IgnoreCase)) { factory = 1; } else { factory = 0; } if (Regex.IsMatch(strText, "\\bGarments\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bGarment\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\btextiles\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\btextile\\b", RegexOptions.IgnoreCase)) { garment = 1; } else { garment = 0; } if (Regex.IsMatch(strText, "\\boperation and maintenance\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bOperation and Maintenance\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { op = 1; } else { op = 0; } if (Regex.IsMatch(strText, "\\bPIC\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bPIC Mirocontroller\\b", RegexOptions.IgnoreCase)) { pic = 1; } else { pic = 0; } if (Regex.IsMatch(strText, "\\bArduino\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { ardunio = 1; } else { ardunio = 0; } if (Regex.IsMatch(strText, "\\bRaspberry Pi\\b", RegexOptions.IgnoreCase)) { raspberry = 1; } else { raspberry = 0; } if (Regex.IsMatch(strText, "\\binstallation\\b", RegexOptions.IgnoreCase)) { installation = 1; } else { installation = 0; } if (Regex.IsMatch(strText, "\\bgenerator\\b", RegexOptions.IgnoreCase)) { generator = 1; } else { generator = 0; } if (Regex.IsMatch(strText, "\\blift\\b", RegexOptions.IgnoreCase)) { lift = 1; } else { lift = 0; } if (Regex.IsMatch(strText, "\\bsubstation\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bSub station\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\bSub-station\\b", RegexOptions.IgnoreCase)) { substation = 1; } else { substation = 0; } if (Regex.IsMatch(strText, "\\bElectrical\\b", RegexOptions.IgnoreCase) || Regex.IsMatch(strText, "\\belectrical\\b", RegexOptions.IgnoreCase)) // \\b...\\b boundary delimiter { dept = 1; } else { dept = 0; } exp = 0; for (int x = 0; x < strText.Length; x++) { if ((strText[x] == 'E' || strText[x] == 'e') && strText[x + 1] == 'x' && strText[x + 2] == 'p' && strText[x + 3] == 'e' && strText[x + 4] == 'r' && strText[x + 5] == 'i' && strText[x + 6] == 'e' && strText[x + 7] == 'n' && strText[x + 8] == 'c' && strText[x + 9] == 'e' && strText[x + 10] == ':') { for (int y = x + 10; y < x + 10 + 4; y++) { if (strText[y] == '0' || strText[y] == '1' || strText[y] == '2' || strText[y] == '3' || strText[y] == '4' || strText[y] == '5' || strText[y] == '6' || strText[y] == '7' || strText[y] == '8') { exp = (int)Char.GetNumericValue(strText[y]); break; } exp = 0; } break; } } string InputString = strText; var r = new Regex(@"[2-3]\.[0-9]+"); var mc = r.Matches(InputString); var matches = new Match[mc.Count]; mc.CopyTo(matches, 0); var myFloats = new float[matches.Length]; var ndx = 0; foreach (Match m in matches) { myFloats[ndx] = float.Parse(m.Value); ndx++; } float f = myFloats[0]; cgpa = f; reader.Close(); string myConnection = "datasource=localhost;port=3306;username=root;password=root"; string Query = "insert into test.skillee(id,autocad,office,picmicrocontroller,arduino,raspberrypi,factory,garments,operationandmaintenance,installation,generator,lift,substation) values(?id,?autocad,?office,?picmicrocontroller,?arduino,?raspberrypi,?factory,?garments,?operationandmaintenance,?installation,?generator,?lift,?substation);"; MySqlConnection MyConn = new MySqlConnection(myConnection); MySqlCommand cmd = new MySqlCommand(Query, MyConn); MyConn.Open(); cmd.Parameters.AddWithValue("?id", id); cmd.Parameters.AddWithValue("?autocad", cad); cmd.Parameters.AddWithValue("?office", office); cmd.Parameters.AddWithValue("?picmicrocontroller", pic); cmd.Parameters.AddWithValue("?arduino", ardunio); cmd.Parameters.AddWithValue("?raspberrypi", raspberry); cmd.Parameters.AddWithValue("?factory", factory); cmd.Parameters.AddWithValue("?garments", garment); cmd.Parameters.AddWithValue("?operationandmaintenance", op); cmd.Parameters.AddWithValue("?installation", installation); cmd.Parameters.AddWithValue("?generator", generator); cmd.Parameters.AddWithValue("?lift", lift); cmd.Parameters.AddWithValue("?substation", substation); cmd.ExecuteNonQuery(); MyConn.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); MessageBox.Show(id.ToString()); } try { string myConnection1 = "datasource=localhost;port=3306;username=root;password=root"; string Query1 = "insert into test.qualificationee(id,department,experience,cgpa) values(?id,?department,?experience,?cgpa);"; MySqlConnection MyConn1 = new MySqlConnection(myConnection1); MySqlCommand cmd1 = new MySqlCommand(Query1, MyConn1); MyConn1.Open(); cmd1.Parameters.AddWithValue("?id", id); cmd1.Parameters.AddWithValue("?department", dept); cmd1.Parameters.AddWithValue("?experience", exp); cmd1.Parameters.AddWithValue("?cgpa", cgpa); cmd1.ExecuteNonQuery(); MyConn1.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } } MessageBox.Show("All info Saved"); }
/// <summary> /// Extracts a text from a PDF file. /// </summary> /// <param name="inFileName">the full path to the pdf file.</param> /// <param name="outFileName">the output file name.</param> /// <returns>the extracted text</returns> public bool ExtractText(string inFileName, string outFileName) { StreamWriter outFile = null; try { // Create a reader for the given PDF file PdfReader reader = new PdfReader(inFileName); //outFile = File.CreateText(outFileName); outFile = new StreamWriter(outFileName, false, System.Text.Encoding.UTF8); #if true ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); #endif Console.Write("Processing: "); Int64 totalLen = 68; float charUnit = ((float)totalLen) / (float)reader.NumberOfPages; Int64 totalWritten = 0; float curUnit = 0; for (int page = 1; page <= reader.NumberOfPages; page++) { #if false outFile.Write(ExtractTextFromPDFBytes(reader.GetPageContent(page)) + " "); #else try { var result = PdfTextExtractor.GetTextFromPage(reader, page) + " "; outFile.Write(result); } catch (Exception e) { Console.WriteLine(e); } #endif // Write the progress. if (charUnit >= 1.0f) { for (Int64 i = 0; i < (Int64)charUnit; i++) { Console.Write("#"); totalWritten++; } } else { curUnit += charUnit; if (curUnit >= 1.0f) { for (Int64 i = 0; i < (Int64)curUnit; i++) { Console.Write("#"); totalWritten++; } curUnit = 0; } } } if (totalWritten < totalLen) { for (Int64 i = 0; i < (totalLen - totalWritten); i++) { Console.Write("#"); } } return(true); } catch (Exception e) { Console.WriteLine(e.Message); return(false); } finally { if (outFile != null) { outFile.Close(); } } }
static void Main(string[] args) { string filePath; DirectoryInfo d = new DirectoryInfo(@"E:\cats project\CEDR_I_71_06_P"); //Assuming Test is your Folder FileInfo[] Files = d.GetFiles("*.pdf"); //Getting Text files string str = ""; var value = ""; var resultString = ""; List <PageDetails> Pagedetails = new List <PageDetails>(); Dictionary <string, string> PageDetails = new Dictionary <string, string>(); for (int i = 0; i < Files.Length; i++) { str = Files[i].FullName.ToString(); filePath = str; string strText = string.Empty; try { PdfReader reader = new PdfReader(filePath); for (int page = 2; page <= 2; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); strText = PdfTextExtractor.GetTextFromPage(reader, page, its); string TotalPages = reader.NumberOfPages.ToString(); strText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(strText))); string[] lines = strText.Split('\n'); int address = Convert.ToInt32(Regex.Match(lines[0], @"\d+").Value) - 1; string firstpageno = address.ToString(); int lastpagenostr = address + reader.NumberOfPages - 1; string lastpageno = lastpagenostr.ToString(); Pagedetails.Add(new PDFApp2.PageDetails { firstpage = firstpageno, LastPage = lastpageno, Pagecount = TotalPages }); } reader.Close(); } catch (Exception ex) { //Console.WriteLine("Error"); } } List <string> add_list = new List <string>(); List <string> add_list1 = new List <string>(); for (int i = 0; i < Files.Length; i++) { str = "E:/cats project/CEDR_I_71_06_P/" + Files[i]; value = Files[i].ToString(); resultString = Regex.Match(value, @"\d+").Value; add_list.Add(resultString); } DataSet ds = new DataSet(); ds.ReadXml(@"E:\cats project\CEDR_I_71_06_P\cats.xml"); DataTable dt = ds.Tables["content"]; int j = 0; foreach (string s in add_list) { var contents = from content in dt.AsEnumerable() where content.Field <string>("idnumber") == s select new { firstpage = content.Field <string>("firstpage"), lastpage = content.Field <string>("lastpage"), numtypesetpages = content.Field <string>("numtypesetpages") }; foreach (var item in contents) { for (j = j; j < Pagedetails.Count; j++) { Console.WriteLine(Files[j]); if (item.firstpage == Pagedetails[j].firstpage && item.lastpage == Pagedetails[j].LastPage && item.numtypesetpages == Pagedetails[j].Pagecount) { Console.WriteLine("page Details Matched"); j = j + 1; break; } else { Console.WriteLine("page Details Not Matched"); j = j + 1; break; } } } } Console.ReadLine(); }
public static OFXDocument Import(string pathFilename) { OFXDocument document = new OFXDocument(); document.Account = new Account(); document.Account.BankID = "033"; document.Transactions = new List <Transaction>(); document.SignOn = new SignOn(); string strText = string.Empty; bool tratamento = false; int rowsTratamento = 0; string stringTratamento1 = String.Empty; string stringTratamento2 = String.Empty; string stringTratamento3 = String.Empty; int iof = 0; PdfReader reader = new PdfReader((string)pathFilename); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String cipherText = PdfTextExtractor.GetTextFromPage(reader, page, its); cipherText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(cipherText))); strText = strText + "\n" + cipherText; } reader.Close(); List <string> temp = strText.Split('\n').ToList(); int contAjuste = 0; bool head = false; for (int i = 0; i < temp.Count; i++) { try { //if (i == 149) // Console.WriteLine("Line: " + i + " Total: " + temp.Count + " | " + temp[i].ToString()); string value = String.Empty; if (i == 1) { // [Agência e Conta Corrente] if (!temp[i].ToString().Contains("Agência:")) { i++; contAjuste++; } value = temp[i].ToString().Substring((temp[i].ToString().IndexOf("Agência:") + 8), 6); // Agência document.Account.BranchID = value.Trim().TrimStart().TrimEnd(); // Conta value = temp[i].ToString().Substring((temp[i].ToString().IndexOf("Corrente:") + 9), 12); document.Account.AccountID = value.Trim().TrimStart().TrimEnd(); // Avalia se tem o código da operação antes if (document.Account.AccountID.IndexOf("-") != document.Account.AccountID.LastIndexOf("-")) { document.Account.AccountID = document.Account.AccountID.Substring(document.Account.AccountID.IndexOf("-") + 1); } } else if ((i == 3 && contAjuste == 0) || (contAjuste == 1 && i == 4)) { // Período do extrato if (temp[i].ToString().Contains("Período:")) { int index = temp[i].ToString().IndexOf("Período:") + "Periodo:".Length; string periodo = temp[i].ToString().Substring(index); if (periodo.Contains("Data/Hora")) { // DTSERVER string dtServer = periodo.Substring(periodo.IndexOf("Data/Hora") + 10); if (dtServer.Contains("às")) { dtServer = dtServer.Substring(0, dtServer.IndexOf("às")); } document.SignOn.DTServer = Convert.ToDateTime(dtServer.Trim()); // Período periodo = periodo.Substring(0, periodo.IndexOf("Data/Hora")); } index = periodo.IndexOf(" a "); string dtInicio = periodo.Substring(0, index).Trim(); string dtFim = periodo.Substring(index + 3).Trim(); document.StatementStart = DateTime.ParseExact(dtInicio + " 00:00:00.000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); document.StatementEnd = DateTime.ParseExact(dtFim + " 00:00:00.000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } } else if ((i == 4 && contAjuste == 0) || (contAjuste == 1 && i == 5)) { // [ Início do Extrato] if (temp[i].ToString().Contains("Data Histórico Docto. Valor R$"))// Saldo R$")) { head = true; } } else { // [ Linhas do Extrato] if (head && temp[i].ToString().IndexOf("SALDO ANTERIOR") < 0) //[ NÃO LEVA EM CONSIDERAÇÂO SALDOS ] { //LinhaExtrato row = new LinhaExtrato(); String cipherText = String.Empty; try { cipherText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(temp[i].ToString()))); } catch { cipherText = String.Empty; } if ( cipherText.IndexOf("Posição em:") < 0 && cipherText.IndexOf("Provisão Encargos") < 0 && cipherText.IndexOf("Limite Santander Master") < 0 && cipherText.IndexOf("a = Bloqueio Dia") < 0 && cipherText.IndexOf("b = Bloqueado") < 0 && cipherText.IndexOf("p = Lançamento Provisionado") < 0 && cipherText.IndexOf("Superlinha") < 0 && cipherText.IndexOf("Saldo Bloqueado") < 0 && cipherText.IndexOf("Saldo Bloqueio Dia") < 0 && cipherText.IndexOf("Saldo Total de Conta Corrente") < 0 && cipherText.IndexOf("Saldo de Conta Corrente") < 0 && cipherText.IndexOf("Saldo em Investimentos com Resgate") < 0 && cipherText.IndexOf("Saldo Disponível (") < 0 && cipherText.IndexOf("Saldo Disponível Conta Corrente") < 0 && cipherText.IndexOf("Saldo Disponível Total") < 0 && cipherText.IndexOf("Ouvidoria 0800") < 0 ) { //if (cipherText.IndexOf("IOF") < 0 && iof == 0) //{ //if (cipherText.Contains("01/12/2015")) // cipherText = cipherText + ""; if (iniciaComData(cipherText) && !tratamento) { Transaction transaction = new Transaction(); try { transaction.Date = DateTime.ParseExact(cipherText.Substring(0, 10) + " 00:00:00.000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } catch { throw new Exception("'" + cipherText.Substring(0, 10) + "' não corresponde a uma data válida (1)"); } // Procura valor e nrDocumento string auxiliar = cipherText.Substring(10).TrimEnd(); int index = auxiliar.LastIndexOf(" "); decimal amount = new decimal(0.0); try { amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); } catch { throw new Exception("'" + auxiliar.Substring(index + 1) + "' não corresponde a um valor monetário (1)"); } auxiliar = auxiliar.Substring(0, index).TrimEnd(); index = auxiliar.LastIndexOf(" "); try { Convert.ToInt32(auxiliar.Substring(index + 1)); } catch { amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); auxiliar = auxiliar.Substring(0, index).TrimEnd(); index = auxiliar.LastIndexOf(" "); Convert.ToInt32(auxiliar.Substring(index + 1)); } transaction.CheckNum = auxiliar.Substring(index + 1); transaction.Amount = amount; transaction.Memo = auxiliar.Substring(0, index).TrimEnd(); // Legenda? if (transaction.Memo.EndsWith(" a")) // Bloqueio Dia / ADM { transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" a")); } else if (transaction.Memo.EndsWith(" b")) // Bloqueado { transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" b")); } else if (transaction.Memo.EndsWith(" p")) // Lançamento Provisionado { transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" p")); } //row.dsTipo = row.vlMovimento > 0 ? "CREDIT" : "DEBIT"; transaction.TransType = transaction.Amount > 0 ? OFXTransactionType.CREDIT : OFXTransactionType.DEBIT; // Adiciona a transaction document.Transactions.Add(transaction); } else if (cipherText.StartsWith("https://www.") || cipherText.StartsWith("Internet Banking Página")) { continue; } else { tratamento = true; rowsTratamento++; if (rowsTratamento == 1)// && tratamento) { stringTratamento1 = cipherText.Replace('/', '|'); } else if (rowsTratamento == 2)// && tratamento) { stringTratamento2 = cipherText; } else if (rowsTratamento == 3)// && tratamento) { if (stringTratamento1.Length > 10) { stringTratamento3 = cipherText.Replace('/', '|'); cipherText = stringTratamento2.Substring(0, 11) + stringTratamento1 + " " + stringTratamento3 + " " + stringTratamento2.Substring(11, stringTratamento2.Length - 11); Transaction transaction = new Transaction(); try { transaction.Date = DateTime.ParseExact(cipherText.Substring(0, 10) + " 00:00:00.000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } catch { throw new Exception("'" + cipherText.Substring(0, 10) + "' não corresponde a uma data válida (2)"); } // Procura valor e nrDocumento string auxiliar = cipherText.Substring(10).TrimEnd(); int index = auxiliar.LastIndexOf(" "); decimal amount = new decimal(0.0); try { amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); } catch { throw new Exception("'" + auxiliar.Substring(index + 1) + "' não corresponde a um valor monetário (2)"); } auxiliar = auxiliar.Substring(0, index).TrimEnd(); index = auxiliar.LastIndexOf(" "); try { Convert.ToInt32(auxiliar.Substring(index + 1)); } catch { amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); auxiliar = auxiliar.Substring(0, index).TrimEnd(); index = auxiliar.LastIndexOf(" "); Convert.ToInt32(auxiliar.Substring(index + 1)); } transaction.CheckNum = auxiliar.Substring(index + 1); transaction.Amount = amount; transaction.Memo = auxiliar.Substring(0, index).TrimEnd(); // Legenda? if (transaction.Memo.EndsWith(" a")) // Bloqueio Dia / ADM { transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" a")); } else if (transaction.Memo.EndsWith(" b")) // Bloqueado { transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" b")); } else if (transaction.Memo.EndsWith(" p")) // Lançamento Provisionado { transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" p")); } transaction.TransType = transaction.Amount > 0 ? OFXTransactionType.CREDIT : OFXTransactionType.DEBIT; // Adiciona a transaction document.Transactions.Add(transaction); rowsTratamento = 0; tratamento = false; } else { rowsTratamento = 0; tratamento = false; } } } //} //else //{ // if (iof == 0) // { // iof++; // if (cipherText.IndexOf('/') > 0) // { // string[] tempFilter = cipherText.Split(':'); // stringTratamento1 = tempFilter[0]; // } // else // stringTratamento1 = cipherText; // } // else if (iof == 1) // { // iof++; // stringTratamento2 = cipherText; // } // else if (iof == 2) // { // iof = 0; // stringTratamento3 = cipherText.Replace('/', '-'); // cipherText = stringTratamento2.Substring(0, 11) + stringTratamento1 + " " + stringTratamento3 + " " + stringTratamento2.Substring(11, stringTratamento2.Length - 11); // Transaction transaction = new Transaction(); // try // { // transaction.Date = DateTime.ParseExact(cipherText.Substring(0, 10) + " 00:00:00.000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); // } // catch // { // throw new Exception("'" + cipherText.Substring(0, 10) + "' não corresponde a uma data válida (3)"); // } // // Procura valor e nrDocumento // string auxiliar = cipherText.Substring(10).TrimEnd(); // int index = auxiliar.LastIndexOf(" "); // decimal amount = new decimal(0.0); // try // { // amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); // } // catch // { // //amount = new decimal(0.0); // throw new Exception("'" + auxiliar.Substring(index + 1) + "' não corresponde a um valor monetário (3)"); // } // auxiliar = auxiliar.Substring(0, index).TrimEnd(); // index = auxiliar.LastIndexOf(" "); // try // { // Convert.ToInt32(auxiliar.Substring(index + 1)); // } // catch // { // amount = Convert.ToDecimal(auxiliar.Substring(index + 1)); // auxiliar = auxiliar.Substring(0, index).TrimEnd(); // index = auxiliar.LastIndexOf(" "); // Convert.ToInt32(auxiliar.Substring(index + 1)); // } // transaction.CheckNum = auxiliar.Substring(index + 1); // transaction.Amount = amount; // transaction.Memo = auxiliar.Substring(0, index).TrimEnd(); // // Legenda? // if (transaction.Memo.EndsWith(" a")) // Bloqueio Dia / ADM // transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" a")); // else if (transaction.Memo.EndsWith(" b")) // Bloqueado // transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" b")); // else if (transaction.Memo.EndsWith(" p")) // Lançamento Provisionado // transaction.Memo = transaction.Memo.Substring(0, transaction.Memo.IndexOf(" p")); // transaction.TransType = transaction.Amount > 0 ? OFXTransactionType.CREDIT : OFXTransactionType.DEBIT; // // Adiciona a transaction // document.Transactions.Add(transaction); // } //} } } } } catch (Exception e) { throw new Exception(i.ToString() + " -> " + e.Message); } } return(document); }
public void gestiscoPDF() { //var text = new TikaOnDotNet.TextExtraction.TextExtractor().Extract(path).Text.Trim(); PdfReader reader = new PdfReader(mainI.pdfPath); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); PdfTextExtractor.GetTextFromPage(reader, page, its); string strPage = its.GetResultantText(); IDictionary <string, string> res = this.findDataPdf(strPage); if (!string.IsNullOrEmpty((string)res["nameCognome"]) && !string.IsNullOrEmpty((string)res["data"])) { //Salvo nome e cognome + pdf string nameCognome = ((string)res["nameCognome"].ToLower()).Trim(); string data = ((string)res["data"]).Trim(); if (!mainI.result.ContainsKey(nameCognome)) { mainI.result[nameCognome] = importPage(reader, page, nameCognome); ListViewItem item1 = new ListViewItem("2", 0); item1.SubItems.Add(nameCognome); item1.SubItems.Add("1"); lv1.Items.AddRange(new ListViewItem[] { item1 }); lv1.EnsureVisible(lv1.Items.Count - 1); } else { // PDF DUPLICATO ListViewItem item1 = new ListViewItem("2", 0); item1.SubItems.Add(nameCognome); item1.SubItems.Add("0"); lv1.Items.AddRange(new ListViewItem[] { item1 }); lv1.EnsureVisible(lv1.Items.Count - 1); } if (string.IsNullOrEmpty(mainI.data)) { mainI.data = data.Trim(); } if (mainI.data != data) { // DATE DIVERSE NEL PDF } } } reader.Close(); // EVENTUALMENTE DRIVE if (this.cbDrive.Checked) { var a = GoogleDrive.uploadOnDrive(mainI.result, lv1, checkBoxCreateFolder.Checked, pb); } MessageBox.Show("Procedimento terminato", "PdfDrive"); }
public string SplitCourier(string pdfFilePath, string outputPath) { StringBuilder text = new StringBuilder(); PdfReader reader = new PdfReader(pdfFilePath); ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); for (int pageNumber = 5; pageNumber <= 9; pageNumber += 1) { Document document = new Document(); string currSegment = pdfNames[pageNumber]; //Total Industry Unit Demand: string thePage = PdfTextExtractor.GetTextFromPage(reader, pageNumber); string toBeSearched2 = "Total Industry Unit Demand "; string industryUnitDemand = thePage.Substring(thePage.IndexOf(toBeSearched2) + toBeSearched2.Length); string tempIndustryUnitDemand = industryUnitDemand.Substring(0, industryUnitDemand.IndexOf(" ")); tempIndustryUnitDemand = tempIndustryUnitDemand.Remove(tempIndustryUnitDemand.Length - 1); tempIndustryUnitDemand = Regex.Replace(tempIndustryUnitDemand, "[^0-9]", ""); IndustryUnitDemand[currSegment] = Convert.ToInt32(tempIndustryUnitDemand); //Next Year's Segment Growth Rate : string toBeSearched = "Next Year's Segment Growth Rate |"; string grothRate = thePage.Substring(thePage.IndexOf(toBeSearched) + toBeSearched.Length); GrothRate[currSegment] = grothRate.Split(new[] { '\r', '\n' }).FirstOrDefault(); //age: toBeSearched = "Age = "; string age = thePage.Substring(thePage.IndexOf(toBeSearched) + toBeSearched.Length); age = age.Substring(0, age.IndexOf(" ")); d_Age[currSegment] = age.Split(new[] { '\r', '\n' }).FirstOrDefault(); //mtbf: toBeSearched = "MTBF "; string mtbf = thePage.Substring(thePage.IndexOf(toBeSearched) + toBeSearched.Length); mtbf = mtbf.Substring(0, mtbf.IndexOf(" ")); mtbf = Regex.Replace(mtbf, "0", ""); d_mtbf[currSegment] = mtbf.Split(new[] { '\r', '\n' }).FirstOrDefault(); //pfmn: toBeSearched = "Pfmn "; string Pfmn = thePage.Substring(thePage.IndexOf(toBeSearched) + toBeSearched.Length); Pfmn = Pfmn.Substring(0, Pfmn.IndexOf(" ")); d_pfmn[currSegment] = Convert.ToDouble(Pfmn.Split(new[] { '\r', '\n' }).FirstOrDefault()); //size: toBeSearched = " Size "; string Size = thePage.Substring(thePage.IndexOf(toBeSearched) + toBeSearched.Length); Size = Size.Substring(0, Size.IndexOf(" ")); d_size[currSegment] = Convert.ToDouble(Size.Split(new[] { '\r', '\n' }).FirstOrDefault()); //get round: toBeSearched = " Round: "; string Round = thePage.Substring(thePage.IndexOf(toBeSearched) + toBeSearched.Length); round = Convert.ToInt32(Round.Split(new[] { '\r', '\n' }).FirstOrDefault()); if (Button2Click) { Dictionary <string, int> ourUserProducts = new Dictionary <string, int>(); Dictionary <string, double> tempOurUserProducts = new Dictionary <string, double>(); toBeSearched = "Survey\n"; string str = thePage.Substring(thePage.IndexOf(toBeSearched) + toBeSearched.Length); totalSurvey = SumSurvey(str, ourUserProducts); foreach (KeyValuePair <string, int> entry in ourUserProducts) { tempOurUserProducts.Add(entry.Key, (double)entry.Value / (double)totalSurvey); } SegmentSurveyRate.Add(currSegment, tempOurUserProducts); } } return(text.ToString()); }
private void button1_Click(object sender, EventArgs e) { string bookText = "", fileName = ""; if (comboBox1.Text == "Text(.txt)") { try { OpenFileDialog ofd = new OpenFileDialog(); ofd.CheckFileExists = true; ofd.CheckPathExists = true; ofd.DefaultExt = "txt"; ofd.DereferenceLinks = true; ofd.Filter = "Text files (*.txt)|*.txt|" + "RTF files (*.rtf)|*.rtf|" + "Works 6 and 7 (*.wps)|*.wps|" + "Windows Write (*.wri)|*.wri|" + "WordPerfect document (*.wpd)|*.wpd"; ofd.Multiselect = false; ofd.RestoreDirectory = true; ofd.ShowHelp = true; ofd.ShowReadOnly = false; ofd.Title = "select a file"; ofd.ValidateNames = true; if (ofd.ShowDialog() == DialogResult.OK) { StreamReader sr = new StreamReader(ofd.OpenFile()); //richTextBox1.Text = sr.ReadToEnd(); bookText = sr.ReadToEnd(); frmPreviewText pvt = new frmPreviewText(); pvt.setRichBox(bookText); pvt.Show(); fileName = System.IO.Path.GetFileName(ofd.FileName); lblSaveFileName.Text = "File Name: " + fileName; } } catch (Exception) { MessageBox.Show("can not open the file", "Text to Speak", MessageBoxButtons.OK, MessageBoxIcon.Error); } } else if (comboBox1.Text == "PDF(.PDF)") { ispdf = true; OpenFileDialog dlg = new OpenFileDialog(); String filePath; dlg.Filter = "PDF Files(*.PDF)|*.PDF|ALL Files(*.*)|*.*"; if (dlg.ShowDialog() == DialogResult.OK) { filePath = dlg.FileName.ToString(); try { PdfReader reder = new PdfReader(filePath); for (int page = 1; page <= reder.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string s = PdfTextExtractor.GetTextFromPage(reder, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); convertedTextFromPdf.Add(s); } } catch (Exception ex) { MessageBox.Show(ex.Message); } } } }
private void open_btn_Click(object sender, EventArgs e) { OpenFileDialog ofd = new OpenFileDialog(); ofd.Filter = "PDF Files(*.pdf)|*.pdf|DOC Files(*.doc)|*.doc|DOCX Files(*.docx)|*.docx"; ofd.Title = "Select a Document"; string file, extension, outputFile; if (ofd.ShowDialog() == DialogResult.OK) { reader.Dispose(); file = ofd.FileName; double size = file.Length; extension = System.IO.Path.GetExtension(file); FolderBrowserDialog fbd = new FolderBrowserDialog(); outputFile = fbd.SelectedPath + "\\\file.pdf";//"C:\\\file.pdf"; MessageBox.Show(size.ToString()); if (extension == ".doc" || extension == ".docx") { speak("This is a Microsoft Word Document, It'll take me some minutes to convert it to my readable format!!!"); Microsoft.Office.Interop.Word.Application app = new Microsoft.Office.Interop.Word.Application(); Microsoft.Office.Interop.Word.Document doc = null; doc = app.Documents.Open(file, Type.Missing, false); // convert doc to pdf doc.ExportAsFixedFormat(outputFile, WdExportFormat.wdExportFormatPDF); // close doc file and quit app word doc.Close(false, Type.Missing, Type.Missing); app.Quit(false, false, false); System.Runtime.InteropServices.Marshal.ReleaseComObject(app); axAcroPDF1.src = outputFile; } else { string text = string.Empty; try { PdfReader pdf = new PdfReader(file); for (int page = 1; page <= pdf.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); String s = PdfTextExtractor.GetTextFromPage(pdf, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); text = text + s; text.Replace("\n", ""); pdfText = text; /*reader.Dispose(); * reader = new SpeechSynthesizer(); * reader.SpeakAsync(text);*/ } pdf.Close(); } catch (Exception ex) { MessageBox.Show(ex.Message); } axAcroPDF1.src = file; } } }
public static OFXDocument Import(string pathFilename) { OFXDocument document = new OFXDocument(); document.Account = new Account(); document.Account.BankID = "341"; document.Transactions = new List <Transaction>(); document.SignOn = new SignOn(); string strText = string.Empty; string anoFixo = String.Empty; PdfReader reader = new PdfReader(pathFilename); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string cipherText = PdfTextExtractor.GetTextFromPage(reader, page, its); cipherText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(cipherText))); strText = strText + "\n" + cipherText; } int maxLength = Convert.ToString(reader.NumberOfPages).Length; reader.Close(); string[] temp = strText.Split('\n'); bool head = false; Transaction transaction = new Transaction(); for (int i = 0; i < temp.Length; i++) { string value = string.Empty; try { if (!head && temp[i].Contains("Nome") && temp[i].Contains("Agência/Conta")) { value = temp[i].ToString().Trim().Substring(temp[i].IndexOf("Agência/Conta:") + 14); document.Account.BranchID = value.ToString().Trim().Substring(0, value.IndexOf("/") - 1); document.Account.AccountID = value.ToString().Trim().Substring(value.IndexOf("/")); } else if (!head && temp[i].Contains("Data") && temp[i].Contains("Horário")) { value = temp[i].ToString().Trim(); //DateTime.ParseExact(data.Substring(0, 10) + " 00:00:00.000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); string auxDt = value.ToString().Trim().Substring(temp[i].IndexOf("Data") + 5, 12).Trim(); string auxHora = value.ToString().Trim().Substring(temp[i].IndexOf("Horário:") + 9).Trim(); try { document.SignOn.DTServer = DateTime.ParseExact(auxDt + " " + auxHora + ".000", "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } catch { } } else if (!head && temp[i].Contains("Extrato de") && temp[i].Contains("até")) { value = temp[i].ToString().Trim(); string auxInicio = value.ToString().Trim().Substring(temp[i].IndexOf("Extrato de") + 10).Trim().Substring(0, 10) + " 00:00:00.000"; try { document.StatementStart = DateTime.ParseExact(auxInicio, "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } catch { throw new Exception("'" + auxInicio + "' não corresponde a uma data válida (1)"); } anoFixo = document.StatementStart.ToString("yyyy"); string auxFinal = value.ToString().Trim().Substring(value.Length - 10).Trim() + " 00:00:00.000"; try { document.StatementEnd = DateTime.ParseExact(auxFinal, "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } catch { throw new Exception("'" + auxFinal + "' não corresponde a uma data válida (2)"); } } else if (!head && temp[i].Contains("Data") && temp[i].Contains("Lançamento") && temp[i].Contains("Ag./Origem") && temp[i].Contains("Valor") && temp[i].Contains("Saldo") && temp[i].Contains("(R$)")) { head = true; } else if (head && !temp[i].Contains("ItaúEmpresas") && !(temp[i] == "") && !(temp[i].Trim().Length <= maxLength) && !(temp[i].ToString().Trim().Contains("SALDO"))) { value = temp[i].ToString().Trim(); //Guardando data da linha //string dsds = temp[i].Trim(); string aux = value.ToString().Trim().Substring(0, 5).Trim() + "/" + document.StatementStart.ToString("yyyy") + " 00:00:00.000"; try { transaction.Date = DateTime.ParseExact(aux, "dd/MM/yyyy HH:mm:ss.fff", CultureInfo.InvariantCulture); } catch { throw new Exception("'" + aux + "' não corresponde a uma data válida (3)"); } value = value.ToString().Trim().Substring(5).Trim(); //List<string> colunas2 = value.Split(' ').ToList(); string[] colunas = value.Split(' '); //Guardando valor da linha if (temp[i + 1].Contains("SALDO FINAL")) { for (int j = colunas.Length - 2; j >= 0; j--) { if (colunas[j] != "") { try { transaction.Amount = Convert.ToDecimal(colunas[j]); } catch { throw new Exception("'" + colunas[j] + "' não corresponde a um número válido (1)"); } value = value.Trim().Substring(0, value.IndexOf(colunas[j])).Trim(); break; } } } else if (temp[i + 2].Contains("(-) SALDO A LIBERAR") || (temp[i + 1].Contains("SALDO PARCIAL") && temp[i + 4].Contains("(-) SALDO A LIBERAR")) || (temp[i + 2].Contains("ItaúEmpresas") && temp[i + 5].Contains("(-) SALDO A LIBERAR")) || (temp[i + 3].Contains("ItaúEmpresas") && temp[i + 5].Contains("(-) SALDO A LIBERAR")) || (temp[i + 2].Contains("ItaúEmpresas") && temp[i + 4].Contains("SALDO PARCIAL") && temp[i + 7].Contains("(-) SALDO A LIBERAR")) || (temp[i + 1].Contains("SALDO PARCIAL") && temp[i + 3].Contains("ItaúEmpresas") && temp[i + 7].Contains("(-) SALDO A LIBERAR")) || (temp[i + 1].Contains("SALDO PARCIAL") && temp[i + 4].Contains("ItaúEmpresas") && temp[i + 7].Contains("(-) SALDO A LIBERAR")) || (temp[i + 1].Contains("SALDO PARCIAL") && temp[i + 5].Contains("ItaúEmpresas") && temp[i + 7].Contains("(-) SALDO A LIBERAR"))) { for (int j = colunas.Length - 2; j >= 0; j--) { if (colunas[j] != "") { try { transaction.Amount = Convert.ToDecimal(colunas[j]); } catch { throw new Exception("'" + colunas[j] + "' não corresponde a um número válido (2)"); } value = value.Trim().Substring(0, value.IndexOf(colunas[j])).Trim(); break; } } } else if (temp[i + 1] != "" && !(temp[i + 1].Trim().Length <= maxLength) && ((temp[i].Substring(0, 5).Trim() != temp[i + 1].Substring(0, 5).Trim()) || (temp[i + 2] == "ItaúEmpresas" && temp[i].Substring(0, 5).Trim() != temp[i + 4].Substring(0, 5).Trim()) || (temp[i + 1].Contains("SALDO PARCIAL") && temp[i].Substring(0, 5).Trim() != temp[i + 3].Substring(0, 5).Trim()) || (temp[i + 2] == "ItaúEmpresas" && temp[i + 4].Contains("SALDO PARCIAL") && temp[i].Substring(0, 5).Trim() != temp[i + 6].Substring(0, 5).Trim()) || (temp[i + 1].Contains("SALDO PARCIAL") && temp[i + 3] == "ItaúEmpresas" && temp[i].Substring(0, 5).Trim() != temp[i + 6].Substring(0, 5).Trim()) || (temp[i + 1].Contains("SALDO PARCIAL") && temp[i + 4] == "ItaúEmpresas" && temp[i].Substring(0, 5).Trim() != temp[i + 6].Substring(0, 5).Trim()))) { for (int j = colunas.Length - 2; j >= 0; j--) { if (colunas[j] != "") { try { transaction.Amount = Convert.ToDecimal(colunas[j]); } catch { throw new Exception("'" + colunas[j] + "' não corresponde a um número válido (3)"); } value = value.Trim().Substring(0, value.IndexOf(colunas[j])).Trim(); break; } } } else { try { transaction.Amount = Convert.ToDecimal(colunas[colunas.Length - 1]); } catch { throw new Exception("'" + colunas[colunas.Length - 1] + "' não corresponde a um número válido (4)"); } value = value.Replace(colunas[colunas.Length - 1], "").Trim(); } // Sem número do documento transaction.CheckNum = String.Empty; //Guardando descrição transaction.Memo = value; //Guardando tipo transaction.TransType = transaction.Amount > 0 ? OFXTransactionType.CREDIT : OFXTransactionType.DEBIT; // Adiciona a tranction document.Transactions.Add(transaction); transaction = new Transaction(); } } catch (Exception e) { throw new Exception(i.ToString() + " -> " + e.Message); } } return(document); }
private void button1_Click(object sender, EventArgs e) { OpenFileDialog dlg = new OpenFileDialog(); dlg.Multiselect = true; dlg.Filter = "PDF Files(*.PDF)|*.PDF|All Files(*.*)|*.*"; if (dlg.ShowDialog() == DialogResult.OK) { foreach (String file in dlg.FileNames) { string strText = string.Empty; try { PdfReader reader = new PdfReader(file); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); string s = PdfTextExtractor.GetTextFromPage(reader, page, its); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); strText = strText + s; richTextBox1.Text = strText; } //<======== Adjust string location below ========> int driverIndex = strText.IndexOf("Driver Pay Report") + 18; string driver = strText.Substring(driverIndex); //Sets driver equal to substring after index. driver = driver.Remove(driver.IndexOf("Address") - 1); //removes the rest of the string. driver = driver.Remove(driver.IndexOf("Driver: "), 8); //Removes "Driver: " from string int rateIndex = strText.IndexOf("Grand Total:") + 14; string rate = strText.Substring(rateIndex); //Sets rate equal to substring after index. rate = rate.Remove(rate.IndexOf(" USD")); //removes the rest of the string. reader.Close(); xl.WriteToCell(driverRow, 0, driver); //Column is 0 xl.WriteToCell(rateRow, 1, rate); //Column is 1 } catch (Exception ex) { MessageBox.Show(ex.Message + file); } driverRow++; rateRow++; } xl.Save(); xl.Close(); } }
private void button1_Click(object sender, EventArgs e) { iTextSharp.text.FontFactory.RegisterDirectories(); timer1.Stop(); button5.Text = "P L A Y"; OpenFileDialog dlg = new OpenFileDialog(); string filepath; dlg.Filter = "PDF files(*.PDF)|*.PDF|All files(*.*)|*.*"; // axFoxitCtl1.OpenFile(""); if (dlg.ShowDialog() == DialogResult.OK) { filepath = dlg.FileName.ToString(); string strText = string.Empty; try { PdfReader reader = new PdfReader(filepath); for (int page = 1; page <= reader.NumberOfPages; page++) { ITextExtractionStrategy its ;//= new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); //LocationTextExtractionStrategy if (radioButton3.Checked) { its = new iTextSharp.text.pdf.parser.SimpleTextExtractionStrategy(); //LocationTextExtractionStrategy } else { its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); //LocationTextExtractionStrategy } String s; s = PdfTextExtractor.GetTextFromPage(reader, page, its); if (false) { var fTahoma = FontFactory.GetFont("Tahoma", BaseFont.IDENTITY_H); var pi = new Phrase(s, fTahoma); s = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Unicode, Encoding.UTF8, Encoding.Unicode.GetBytes(pi.Content))); char[] charArray = s.ToCharArray(); Array.Reverse(charArray); s = new string(charArray); } var ss = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(s))); //s = s.ToString; strText = strText + " " +ss; MyText.Text = strText; //MyText.RightToLeft = MyText.Fon } reader.Close(); i = 0; strText = strText.Replace("\t", " "); strText = strText.Replace(" ", " "); strText = strText.Replace("\u2000", " "); strText = strText.Replace(Environment.NewLine, "\n"); if (mychar == '\n') { strText = strText.Replace("\n", " "); strText = strText.Replace(".", ". " + mychar); } words = strText.Split(new char[] { mychar}, StringSplitOptions.RemoveEmptyEntries); progressBar1.Maximum = words.Length; timer1.Stop(); button5.Text = "P L A Y"; System.Diagnostics.Process.Start(filepath); //axFoxitCtl1.OpenFile(); //axAcroPDF2.LoadFile(filepath); //axAcroPDF2.src = filepath; //axAcroPDF2.Show(); } catch (Exception ex) { MessageBox.Show(ex.Message); } } }
private void btnProcesar_Click(object sender, EventArgs e) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); int nro = 0; DataTable workTable = new DataTable("Persona"); DataTable personaTable = new DataTable("Persona"); personaTable.Columns.Add("Id", typeof(Int32)); personaTable.Columns.Add("Nombre", typeof(String)); personaTable.Columns.Add("Rut", typeof(String)); personaTable.Columns.Add("Genero", typeof(String)); personaTable.Columns.Add("Direccion", typeof(String)); personaTable.Columns.Add("Circunscripcion", typeof(String)); personaTable.Columns.Add("Region", typeof(String)); personaTable.Columns.Add("Provincia", typeof(String)); personaTable.Columns.Add("Ciudad", typeof(String)); string rutaArchivos = @"C:\Users\BrunoAlonsoPalmaÁvil\Desktop\Escritorio\Leer PDF excel\Nueva carpeta\Datos comunas\asd\"; foreach (string file in Directory.EnumerateFiles(rutaArchivos, "*.pdf")) { Application.DoEvents(); //string contents = File.ReadAllText(file); var nombreArchivo = System.IO.Path.GetFileNameWithoutExtension(file); PdfReader reader = new PdfReader(file); StringWriter output = null; StringWriter outputBytesPage = null; string ciudad = ""; string region = ""; string provincia = ""; for (int i = 1; i <= reader.NumberOfPages; i++) { ITextExtractionStrategy its = new iTextSharp.text.pdf.parser.LocationTextExtractionStrategy(); PdfObject obj = reader.GetPdfObject(i); //var asd = ExtractTextFromPDFBytes(reader.GetPageContent(i)); byte[] contentBytes = iTextSharp.text.pdf.parser.ContentByteUtils.GetContentBytesForPage(reader, i); //outputBytes.WriteLine(System.Text.Encoding.UTF8.GetString(contentBytes)); outputBytesPage = new StringWriter(); outputBytesPage.WriteLine(System.Text.Encoding.UTF8.GetString(contentBytes)); // Bt\\n[A-Z a-z \s\d-\\/()'Ñ.\[\]]*scStr // BT\\n[A-Z a-z \s\d-\\\/()'Ñ.\[\]]*sc[A-Z a-z \s\d-\\\/()'Ñ.\[\]]*ET // BT\\n[A-Z a-z \s\d-\\\/()'Ñ.\[\]�]*sc if (i == 1) { output = new StringWriter(); output.WriteLine(PdfTextExtractor.GetTextFromPage(reader, i, new SimpleTextExtractionStrategy())); try { ciudad = output.ToString().Split('\n')[3].Substring(13); region = output.ToString().Split('\n')[7]; provincia = output.ToString().Split('\n')[4]; //^(\s\d)([a-zA-Z\s]*)(\d{1,2}.\d{3}.\d{3}-\d{1})\s(MUJ|VAR)\s([a-zA-Z0-9,.!? ]*)(ALTO HOSPICIO)(\s\d*\sM|V)$ //^(\s\d)([a-zA-Z\s]*)(\d{1,2}.\d{3}.\d{3}-\d{1})\s(MUJ|VAR)\s([a-zA-Z0-9,.!? ]*.+?)(ALTO HOSPICIO) //^(\s\d)([a-zA-Z\s]*)(\d{1,2}.\d{3}.\d{3}-\d{1})\s(MUJ|VAR)\s([a-zA-Z0-9,.!? ]*)(ALTO HOSPICIO)\s(\d*\s{1}V{1})$ //^(\s\d)([a-zA-Z\s]*)(\d{1,2}.\d{3}.\d{3}-\d{1})\s(MUJ|VAR)\s([a-zA-Z0-9,.!? ]*)(ALTO HOSPICIO)\s(\d*\s*\s(V|M))$ } catch (Exception ex) { Console.WriteLine(ex.Message); } } string resultadoPaginaBytes = outputBytesPage.ToString(); //(?=BT).*?(?<=sc\\n) //(?=BT).*?(?<=sc\\n) Regex regexPag = new Regex("(?=BT).*?(?<=sc\\n)", RegexOptions.IgnoreCase | RegexOptions.Singleline); Match matchpag = regexPag.Match(resultadoPaginaBytes); if (matchpag.Success) { //.WriteLine(matchpag.Value); } string pattern = "(?=BT).{200,}?(?<=sc\\n)"; MatchCollection matches = Regex.Matches(resultadoPaginaBytes, pattern, RegexOptions.IgnoreCase | RegexOptions.Singleline); foreach (Match match in matches) { if (!match.ToString().Contains("CIRCUNSCRIPCION") && !match.ToString().Contains("VICIO ELEC")) { //var nombre = match.ToString(); string pattern2 = "(?=BT).*?(?<=ET\\n)"; DataRow personaRow = personaTable.NewRow(); personaRow["Region"] = region.Substring(0, region.IndexOf(":") - 1); personaRow["Provincia"] = provincia; personaRow["Ciudad"] = ciudad; MatchCollection matches2 = Regex.Matches(match.ToString(), pattern2, RegexOptions.IgnoreCase | RegexOptions.Singleline); int contador = 0; foreach (Match match2 in matches2) { contador++; string pattern3 = @"\((.*?)\)"; MatchCollection matches3 = Regex.Matches(match2.ToString(), pattern3, RegexOptions.IgnoreCase | RegexOptions.Singleline); string valor = ""; foreach (Match match3 in matches3) { valor += match3.Groups[1].ToString().Replace("�", "Ñ"); } Boolean flag = false; if (matches2.Count != 7) { flag = true; } Application.DoEvents(); if (contador == 1) { if (valor == "") { valor = match2.ToString().Substring(match2.ToString().IndexOf("(") + 1); } if (valor == "") { } if (valor.Length < 10) { } personaRow["Nombre"] = valor; } else if (contador == 2) { personaRow["Rut"] = valor; } else if (contador == 3) { personaRow["Genero"] = valor; } else if (contador == 4) { if (valor == "") { } personaRow["Direccion"] = valor; } else if (contador == 5) { if (flag && valor == "EL TRANSITO") { Console.WriteLine(valor); } if (valor.Length < 5 && matches2.Count == 6 && ciudad != "CAMIÑA") { } else if (valor.Length < 5 && matches2.Count == 7) { } personaRow["Circunscripcion"] = valor; } } personaTable.Rows.Add(personaRow); if (personaTable.Rows.Count == 500000) { InsertarBBDD(personaTable); personaTable.Rows.Clear(); } } } } } InsertarBBDD(personaTable); stopwatch.Stop(); lblTiempoTranscurrido.Text = stopwatch.Elapsed.TotalMinutes.ToString();; }