public Statement ExtractStatement(string filename, bool generateInterimFiles) { var blocklist = Routines.ExtractBlockList(filename); var textStream = Routines.GetStream(Routines.BlockListToString(blocklist)); //var textStream = Routines.ExtractPdfText(filename, new TextExtractStrategy()); if (generateInterimFiles) Routines.WriteStreamToFile(textStream, filename + ".txt"); using (var textReader = new StreamReader(textStream)) { var statement = new Statement(); var statementDateNoted = false; while (!textReader.EndOfStream) { var line = textReader.ReadLine(); var items = line?.Split('\t'); if (!statementDateNoted && line == "transactions") { line = textReader.ReadLine(); items = line?.Split('\t'); statement.StatementDate = new DateTime(Routines.StrToInt(items[2]), Routines.StrToInt(items[1]), Routines.StrToInt(items[0])); statementDateNoted = true; } else if ((items?.Length == 7) && (Routines.IsNumeric(items[0]) && Routines.IsNumeric(items[1]) && Routines.IsNumeric(items[2]) && Routines.IsNumeric(items[3]) && Routines.IsNumeric(items[4]) && Routines.IsNumeric(items[6]))) { var month = Routines.StrToInt(items[1]); var day = Routines.StrToInt(items[0]); var year = (statement.StatementDate.Month == 1 && month == 12) ? statement.StatementDate.Year - 1 : statement.StatementDate.Year; var t = new Transaction { DateTime = new DateTime(year, month, day), Description = items[5], Value = -Routines.StrToNumber(items[6]) }; // Ignoring payments if (!string.Equals(t.Description, "PAYMENT THANK YOU", StringComparison.InvariantCultureIgnoreCase)) statement.Transactions.Add(t); } } return statement; } }
public Statement ExtractStatement(string filename, bool generateInterimFiles) { var textStream = Routines.ExtractPdfText(filename, new TextExtractStrategy()); if (generateInterimFiles) Routines.WriteStreamToFile(textStream, filename + ".txt"); using (var textReader = new StreamReader(textStream)) { var statement = new Statement(); var statementDateNoted = false; while (!textReader.EndOfStream) { var line = textReader.ReadLine(); var items = line?.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries); if (!statementDateNoted && items[0].StartsWith("Statement From", StringComparison.InvariantCultureIgnoreCase)) { var dates = items[0].Substring("Statement From ".Length).Split(new[] { " TO " }, StringSplitOptions.RemoveEmptyEntries); if (dates.Length == 2) { statement.StatementDate = DateTime.Parse(dates[1]); statementDateNoted = true; } } else if (items.Length == 4 && Routines.IsNumeric(items[3]) && Routines.IsShortMonthAndDay(items[0]) && Routines.IsShortMonthAndDay(items[1])) // Process date { var t = new Transaction(); var tokens = items[0].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var month = Routines.ShortMonthToInt(tokens[0]); var day = Routines.StrToInt(tokens[1]); var year = statement.StatementDate.Year - ((statement.StatementDate.Month == 1 && month == 12) ? 1 : 0); t.DateTime = new DateTime(year, month, day); t.Description = items[2]; t.Value = -Routines.StrToNumber(items[3]); statement.Transactions.Add(t); } } return statement; } }
public Statement ExtractStatement(string filename, bool generateInterimFiles) { var blocklist = Routines.ExtractBlockList(filename); foreach (var blocks in blocklist) { // Removing barcode and code on the left side var item1 = Routines.FindTextBlock(blocks, "Details of your account activity", Routines.TextSearchOptions.StartsWith); if (item1 != null) blocks.Where(block => block.Left < item1.Left).ToList().ForEach(block => blocks.Remove(block)); // Removing the numbers on the right side of the form var item2 = Routines.FindTextBlock(blocks, "Balance ($)", Routines.TextSearchOptions.StartsWith); if (item2 != null) blocks.Where(block => block.Left > item2.Right).ToList().ForEach(block => blocks.Remove(block)); // Adding minus to Withdrawals var withdrawals = Routines.FindTextBlock(blocks, "Withdrawals ($)", Routines.TextSearchOptions.ExactMatch); if (withdrawals != null) blocks.Where(block => block.Top < withdrawals.Top && Math.Abs(block.Right - withdrawals.Right) < 2).ToList().ForEach(block => block.Text = $"-{block.Text}"); // Removing Balance amounts var balance = Routines.FindTextBlock(blocks, "Balance ($)", Routines.TextSearchOptions.ExactMatch); if (balance != null) blocks.Where(block => block.Top < balance.Top && Math.Abs(block.Right - balance.Right) < 2).ToList().ForEach(block => blocks.Remove(block)); } var textStream = Routines.GetStream(Routines.BlockListToString(blocklist)); if (generateInterimFiles) Routines.WriteStreamToFile(textStream, filename + ".txt"); using (var textReader = new StreamReader(textStream)) { var statement = new Statement(); var previousTransaction = new Transaction(); var statementDateNoted = false; var readTransactions = false; while (!textReader.EndOfStream) { var line = textReader.ReadLine(); if (line == null || line.Length == 1) // Ignoring 1 character lines continue; var items = line.Split(new[] { '\t' }, StringSplitOptions.RemoveEmptyEntries); if (!statementDateNoted && items[0].StartsWith("From ", StringComparison.InvariantCultureIgnoreCase)) { var s = items[0].Substring(items[0].IndexOf("to ", StringComparison.Ordinal) + 3); statement.StatementDate = DateTime.Parse(s); statementDateNoted = true; } else if (!readTransactions && items.Last() == "Balance ($)") { readTransactions = true; } else if (readTransactions && Routines.IsNumeric(items.Last()) && (items.Length == 2 || (items.Length == 3 && Routines.IsDayAndShortMonth(items.First())))) { var t = new Transaction(); if (items.Length == 3) // Process date { var tokens = items[0].Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); var month = Routines.ShortMonthToInt(tokens[1]); var day = Routines.StrToInt(tokens[0]); var year = statement.StatementDate.Year - ((statement.StatementDate.Month == 1 && month == 12) ? 1 : 0); t.DateTime = new DateTime(year, month, day); t.Description = items[1]; t.Value = Routines.StrToNumber(items[2]); } else { t.DateTime = previousTransaction.DateTime; t.Description = items[0]; t.Value = Routines.StrToNumber(items[1]); } // Ignoring payments statement.Transactions.Add(t); previousTransaction = t; } } return statement; } }