public override IOnlineStatement Parse( OnlineAccount account, StreamReader reader ) { // Format of this data is HTML string html = reader.ReadToEnd(); OnlineStatement statement = new OnlineStatement(); Regex regex = new Regex( @"<td>\s*<a class=""blue noul m_link m_type""\s*onclick="".*?""\s*href=""/myaccount/.*?&historySequenceNumber=(\d+?)&.*?"">(\d\d/\d\d/\d\d\d\d)</a>\s*</td>\s*<td>\s*<div class=""s16 mb0 mr0"">\s*<div>\s*<a class=""blue m_link m_type"" onclick="".*?""\s*href=""/myaccount/.*?"">\s*<span class=""transaction_description"">\s*(.*?)\s*</span>\s*</a>\s*<div>\s*</div>\s*</td>\s*<td class=""right"">\s*<a class=""blue noul m_link m_type""\s*onclick="".*?""\s*href=""/myaccount/.*?"">(.*?)</a>\s*</td>\s*<td class=""right"">\s*<a class=""blue noul m_link m_type""\s*onclick="".*?""\s*href=""/myaccount/.*?"">(.*?)</a>\s*</td>\s*<td class=""right"">\s*<a class=""blue noul m_link m_type""\s*onclick="".*?""\s*href=""/myaccount/.*?"">(.*?)</a>\s*</td>", RegexOptions.Singleline ); foreach( Match match in regex.Matches( html ) ) { // 1 = "history sequence number" - hopefully a unique ID // 2 = date MM/YY/DDDD // 3 = description // 4 = debit (or ) // 5 = credit (or ) // 6 = balance OnlineTransaction record = new OnlineTransaction(); } //// Columns: //// 0 = bank id //// 1 = account number //// 2 = account type (CHECKING) //// 3 = balance //// 4 = start date (YYYY-MM-DD) //// 5 = end date (YYYY-MM-DD) //// 6 = transaction type (CREDIT or DEBIT) //// 7 = transaction date (YYYY-MM-DD) //// 8 = transaction amount //// 9 = transaction id //// 10 = transaction description //reader.ReadLine(); // Skip headers //DateTime lastDate = DateTime.MinValue; //int lastSequence = 0; //string line; //while( (line = reader.ReadLine()) != null ) //{ // string[] columns = line.Split( ',' ); // OnlineTransaction record = new OnlineTransaction(); // record.Amount = decimal.Parse( columns[8] ); // // Form BankId from account number and transaction id // record.BankId = columns[1] + "-" + columns[9]; // record.Date = DateTime.Parse( columns[7] ); // record.Description = columns[10]; // record.Number = null; // // Strip the "- Debit" suffix ING puts on the description // if( record.Description.EndsWith( " - Debit" ) ) // record.Description = record.Description.Substring( 0, record.Description.Length - 8 ); // if( record.Date > lastDate ) lastSequence = -1; // record.Sequence = ++lastSequence; // // Transactions are given to us in descending order. // // Reverse it so they are in ascending order // // by inserting at the top of the list each time. // statement.Transactions.Insert( 0, record ); //} // TODO: Parse balances return statement; }
private IOnlineStatement ParseMortgagePage( string html ) { OnlineStatement statement = new OnlineStatement(); string balance = this.browser.RegexCapture( @"<td><label for=""litBalance"">.*?</label></td>\s+<td class=""currency"">\$([\d,.]*?)</td>", html ); statement.Balance = decimal.Parse( balance ); statement.AvailableBalance = statement.Balance; return statement; }
public IOnlineStatement ParseVisa( OnlineAccount account, StreamReader reader ) { // Format is HTML string html = reader.ReadToEnd(); OnlineStatement statement = new OnlineStatement(); // Rather complex regex expression... // This regex only matches purchases, not payments Regex regex = new Regex( @"<TR class="".*?"">\s+<TD vAlign=""middle"" height=""6"" width=""13%"">\s+<FONT color=""#000000"" size=""1"" face=""Arial"">(\d{1,2}/\d\d/\d\d)</FONT>\s+</TD><TD vAlign=""middle"" height=""6"" width=""13%"">\s+<FONT color=""#000000"" size=""1"" face=""Arial"">(\d{1,2}/\d\d/\d\d)</FONT>\s+</TD><TD vAlign=""middle"" height=""6"" width=""55%"">\s+<FONT color=""#000000"" size=""1"" face=""Arial"">(.*?)</FONT>\s+</TD><TD vAlign=""middle"" height=""6"" width=""12%"">\s+<FONT color=""#000000"" size=""1"" face=""Arial"">\s+\$([\d,.]+)</FONT>" ); MatchCollection matches = regex.Matches( html ); // Resulting list should be in order from oldest to newest. // Fortunately that is how the transactions are given to us. // TODO: Pick out payments and finance charges, too! foreach( Match match in matches ) { OnlineTransaction otrans = new OnlineTransaction(); try { // First column is the date otrans.Date = DateTime.Parse( Cleanse( match.Groups[2].Value ) ); otrans.Description = Cleanse( match.Groups[3].Value.Trim() ).Trim(); otrans.Amount = Decimal.Parse( Cleanse( match.Groups[4].Value ) ); otrans.BankId = this.CreateBankId( otrans, otrans.Description ); statement.Transactions.Add( otrans ); } catch( FormatException ex ) { // Cannot parse transaction - just skip it. this.LogWrite( OnlineServiceEventType.Warning, string.Format( "Skipping {0}, cannot parse: {1}", match.Groups[0], ex.Message ) ); } } // TODO: Parse balances return statement; }
public override IOnlineStatement Parse( OnlineAccount account, StreamReader reader ) { string html = reader.ReadToEnd(); // Mortgage is a special case if( account.Name.StartsWith( "Mortgage" ) ) return this.ParseMortgagePage( html ); OnlineStatement statement = new OnlineStatement(); //string balance = this.browser.RegexCapture( @"Balance: \$[\d,.]+?</div>", html ); //statement.Balance = decimal.Parse( balance ); string regex; if( account.Name.StartsWith( "Checking" ) ) regex = @"<td\s?>\s+(\d\d/\d\d/\d\d\d\d|PENDING)</td>\s+<td>.*?</td>\s+<td><span id="".*?""><a class="""" ID="".*?"" href='.*?ss_tempTransaction=(PD94.*?)&.*?'>(.*?)\s*</a></span></td>\s+<td class=""currency""\s*?>\s+\$?([\d.,]*?)</td>\s+<td class=""currency""\s*?>\s+\$?([\d.,]*?)</td>\s+<td class=""currency""\s*?>\s+(\$?[\d.,]*?)</td>"; else if( account.Name.StartsWith( "Equity" ) ) regex = @"<td>(\d\d/\d\d/\d\d\d\d)</td>\s+<td>.*?</td>\s+<td><span id="".*?""><a class="""" ID="".*?"" href='.*?ss_tempTransaction=(PD94.*?)&.*?'>(.*?)\s*</a></span></td>\s+<td class=""currency"">\$?([\d.,]*?)</td>\s+<td class=""currency"">\$?([\d.,]*?)</td>"; else throw new OnlineServiceException( "Parsing the " + account.Name + " account is not supported at this time." ); MatchCollection matches = Regex.Matches( html, regex ); foreach( Match match in matches ) { if( match.Groups[1].Value != "PENDING" ) { OnlineTransaction otrans = new OnlineTransaction(); otrans.Date = DateTime.Parse( match.Groups[1].Value ); otrans.Description = this.Cleanse( match.Groups[3].Value ); // Parse check numbers if( account.Name.StartsWith( "Checking" ) && otrans.Description.StartsWith( "CHECK #" ) ) { otrans.Number = otrans.Description.Substring( 7 ).Trim(); } if( account.Name.StartsWith( "Equity" ) ) { // Parse out any 3+ digit number beginning with hash mark (#110) Match checkmatch = Regex.Match( otrans.Description, @"[^#]*?#(\d\d\d+)[^#]*?" ); if( checkmatch.Success ) otrans.Number = checkmatch.Groups[1].Value; } // Compute positive or negative amount based on position in debit or credit column. if( match.Groups[4].Value.Length > 0 ) otrans.Amount = -Decimal.Parse( match.Groups[4].Value ); else if( match.Groups[5].Value.Length > 0 ) otrans.Amount = Decimal.Parse( match.Groups[5].Value ); if( account.AccountType == OnlineAccountType.Liability ) otrans.Amount = -otrans.Amount; // Add to beginning of list to reverse the order from oldest to newest statement.Transactions.Insert( 0, otrans ); } } DateTime lastDate = DateTime.MinValue; int lastSequence = 0; foreach( OnlineTransaction otrans in statement.Transactions ) { // Calculate the Sequence if( otrans.Date > lastDate ) lastSequence = -1; otrans.Sequence = ++lastSequence; lastDate = otrans.Date; } return statement; }
public override IOnlineStatement Parse( OnlineAccount account, StreamReader reader ) { // Format is actually CSV, not HTML if( account.Name == "Visa" ) return this.ParseVisa( account, reader ); string text = reader.ReadToEnd(); OnlineStatement statement = new OnlineStatement(); // 1 = date // 2 = description // 3 = comments, with newlines // 4 = check number // 5 = debit "(" or credit "" // 6 = amount // 7 = balance string regexString = @"""(\d{1,2}/\d\d/\d\d\d\d)"",""(.*?)"",""(.*?)"",""(.*?)"",""(\(?)\$([\d,.-]*?)\)?"",""(.*?)"""; Regex regex = new Regex( regexString, RegexOptions.Singleline ); MatchCollection matches = regex.Matches( text ); // Resulting list should be in order from oldest to newest. // Fortunately that is how the transactions are given to us. DateTime lastDate = DateTime.MinValue; int lastSequence = 0; foreach( Match match in matches ) { OnlineTransaction otrans = new OnlineTransaction(); try { // First column is the date otrans.Date = DateTime.Parse( Cleanse( match.Groups[1].Value ) ); // HACK: Reject dates before 12/5 because that is when the new Vacu // site came online. if( otrans.Date < new DateTime( 2009, 12, 5 ) ) continue; if( match.Groups[2].Value.StartsWith( "DBT PURCHASE" ) || match.Groups[2].Value.StartsWith( "POS PURCHASE" ) ) // Leave off the "DBT"/"POS" information as it is somewhat useless. otrans.Description = Cleanse( match.Groups[3].Value ).Trim(); else otrans.Description = (Cleanse( match.Groups[2].Value ) + " " + Cleanse( match.Groups[3].Value )).Trim(); int i = otrans.Description.IndexOf( "CARD NBR:" ); if( i >= 0 ) otrans.Description = otrans.Description.Substring( 0, i ).Trim(); if( match.Groups[4].Value.Length > 0 ) otrans.Number = match.Groups[4].Value; otrans.Amount = Decimal.Parse( Cleanse( match.Groups[6].Value ) ); // Values displayed in parens are negative (debits). if( match.Groups[5].Value == "(" ) otrans.Amount = -otrans.Amount; // We use a hash of the running bank balance as a unique identifier otrans.BankId = this.CreateBankId( otrans, Cleanse( match.Groups[7].Value ) ); // Calculate the Sequence if( otrans.Date > lastDate ) lastSequence = -1; otrans.Sequence = ++lastSequence; lastDate = otrans.Date; statement.Transactions.Add( otrans ); } catch( FormatException ex ) { // Cannot parse transaction for some reason - just skip it. this.LogWrite( OnlineServiceEventType.Warning, string.Format( "Skipping {0}, cannot parse: {1}", match.Groups[0], ex.Message ) ); } } // TODO: Parse balances return statement; }