protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = default(DateTime?), DateTime?toDate = default(DateTime?)) { var statementItems = new List <IBankovniPolozka>(); var tmpPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName()); Directory.CreateDirectory(tmpPath); try { var pdfUrls = GetBankStatementLinks(); foreach (var statementUrl in pdfUrls.Keys) { var monthOfPdf = pdfUrls[statementUrl]; var localRoot = Path.Combine(tmpPath, monthOfPdf.ToString("MM-yyyy")); Directory.CreateDirectory(localRoot); var pdfFile = DownloadStatement(statementUrl, localRoot); var textFile = ConvertStatementToText(pdfFile, localRoot); statementItems.AddRange(ParseStatement(textFile, monthOfPdf, statementUrl)); } } catch (Exception e) { TULogger.Error("CSOB Parse", e); } finally { Directory.Delete(tmpPath, true); } return(statementItems); }
private DateTime ParseDate(string value) { var dat = ParseTools.ToDateTime(value, "d. M. yyyy"); if (dat.HasValue) { return(dat.Value); } TULogger.Error($"KB: chybejici datum pro ucet {Ucet.CisloUctu}"); throw new ApplicationException($"KB: chybejici datum pro ucet {Ucet.CisloUctu}"); }
private decimal ParsePrice(string value, DateTime date) { var price = ParseTools.ToDecimal(WebUtility.HtmlDecode(value).Replace(" CZK", "").Replace(" ", "")); if (price.HasValue) { return(price.Value); } TULogger.Error($"KB: chybejici castka pro ucet {Ucet.CisloUctu} dne {date}"); throw new ApplicationException($"KB: chybejici castka pro ucet {Ucet.CisloUctu} dne {date}"); }
protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { var polozky = new List <IBankovniPolozka>(); var accountDetail = GetAccountDetail(); if (accountDetail?.accountNumber == null) { TULogger.Warning($"Account ${Ucet.CisloUctu} was probably canceled"); return(polozky); } var from = GetNewest(fromDate, accountDetail.transparencyFrom); var page = 0; var totalRecords = 0; do { var content = GetContent(GetTransactionsPageUrl(accountDetail.accountNumber, from, page)); var result = JsonConvert.DeserializeObject <CSResult>(content); totalRecords = result.recordCount; page = result.nextPage; foreach (var t in result.transactions ?? new Transaction[0]) { polozky.Add(new SimpleBankovniPolozka { CisloUctu = Ucet.CisloUctu, Castka = t.amount.value, CisloProtiuctu = t.sender.accountNumber + "/" + t.sender.bankCode, Datum = t.processingDate, KS = t.sender.constantSymbol, NazevProtiuctu = t.sender.name, PopisTransakce = t.typeDescription, SS = t.sender.specificSymbol, VS = t.sender.variableSymbol, ZdrojUrl = "https://www.csas.cz/cs/transparentni-ucty#/" + accountDetail.accountNumber, ZpravaProPrijemce = t.sender.description }); } } while (page > 0); if (totalRecords != polozky.Count) { TULogger.Error($"WE read {polozky.Count} records for account {Ucet.CisloUctu} instead of {totalRecords}"); throw new ApplicationException($"We read {polozky.Count} records for account {Ucet.CisloUctu} instead of {totalRecords}"); } return(polozky); }
protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = default(DateTime?), DateTime?toDate = default(DateTime?)) { var domain = Ucet.Url?.ToLower()?.Split(new[] { "/" }, StringSplitOptions.RemoveEmptyEntries)[1] ?? ""; BanksFactories.TryGetValue(domain, out var factory); if (factory != null) { return(factory(Ucet).GetPolozky(fromDate, toDate)); } else { TULogger.Fatal("Not parser for " + Ucet.Url, new NotImplementedException()); return(new SimpleBankovniPolozka[] { }); } }
private IEnumerable <IBankovniPolozka> ProcessStatement(DateTime actFromDate, DateTime actToDate, int splitDays = 30) { var url = $"{Ucet.Url}&f={actFromDate:dd.MM.yyyy}&t={actToDate:dd.MM.yyyy}"; try { return(ParseStatement(url)); } catch (StatementTooLongException) { var statementItems = new List <IBankovniPolozka>(); while (actFromDate <= actToDate) { statementItems.AddRange(ProcessStatement(actFromDate, actFromDate.AddDays(splitDays), splitDays / 2)); actFromDate = actFromDate.AddDays(splitDays + 1); } return(statementItems); } catch (Exception e) { TULogger.Error("FIO parse", e); return(new List <IBankovniPolozka>()); } }
protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { List <IBankovniPolozka> polozky = new List <IBankovniPolozka>(); if (!fromDate.HasValue) { fromDate = DateTime.Now.Date.AddYears(-1).AddDays(1); } if (!toDate.HasValue) { toDate = DateTime.Now.Date; } int page = 0; //https://www.rb.cz/o-nas/povinne-uverejnovane-informace/transparentni-ucty?p_p_id=Transparentaccountportlet_WAR_Transparentaccountportlet_INSTANCE_e6cf4781&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=nextTransactions&p_p_cacheability=cacheLevelPage&p_p_col_id=_DynamicNestedPortlet_INSTANCE_f5c4beca__column-1-1&p_p_col_count=1&idBankAccount=24389217&fromIndex=51&dateFrom=2016-3-1&dateTo=2018-3-9&q= //https://www.rb.cz/o-nas/povinne-uverejnovane-informace/transparentni-ucty?p_p_id=Transparentaccountportlet_WAR_Transparentaccountportlet_INSTANCE_e6cf4781&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=nextTransactions&p_p_cacheability=cacheLevelPage&p_p_col_id=_DynamicNestedPortlet_INSTANCE_f5c4beca__column-1-1&p_p_col_count=1&idBankAccount=24389217&fromIndex=0&dateFrom=2016-3-1&dateTo=2018-3-17&q= using (Devmasters.Net.Web.URLContent baseUrl = new Devmasters.Net.Web.URLContent(this.Ucet.Url)) { baseUrl.IgnoreHttpErrors = true; var html = baseUrl.GetContent(Encoding.UTF8); var webReqInstance = HlidacStatu.Util.ParseTools.GetRegexGroupValue(html.Text, "Transparentaccountportlet_INSTANCE_(?<inst>[a-z0-9]*)_", "inst"); var dynamicInst = HlidacStatu.Util.ParseTools.GetRegexGroupValue(html.Text, "p_p_id_DynamicNestedPortlet_INSTANCE_(?<inst>[a-z0-9]*)_", "inst"); var internalIdBankAccount = HlidacStatu.Util.ParseTools.GetRegexGroupValue(html.Text, @"idBankAccount=(?<id>\d*)", "id"); if (!string.IsNullOrEmpty(webReqInstance)) { bool getSomeData = true; string cisloUctu = this.Ucet.CisloUctu.Split('/')[0]; do { string url = string.Format(@"https://www.rb.cz/o-nas/povinne-uverejnovane-informace/transparentni-ucty?" + "p_p_id=Transparentaccountportlet_WAR_Transparentaccountportlet_INSTANCE_{0}&p_p_lifecycle=2&p_p_state=normal" + "&p_p_mode=view&p_p_resource_id=nextTransactions&p_p_cacheability=cacheLevelPage" + "&p_p_col_id=_DynamicNestedPortlet_INSTANCE_{1}__column-1-1&p_p_col_count=1" + "&idBankAccount={2}&fromIndex={3}&dateFrom={4}&dateTo={5}&q=" , webReqInstance, dynamicInst, internalIdBankAccount, page * 20 + 1, fromDate.Value.ToString("yyyy-M-d"), toDate.Value.ToString("yyyy-M-d")); using (Devmasters.Net.Web.URLContent net = new Devmasters.Net.Web.URLContent(url, html.Context)) { net.IgnoreHttpErrors = true; var json = net.GetContent().Text; try { RBData data = Newtonsoft.Json.JsonConvert.DeserializeObject <RBData>(json); page++; if (data.transactions != null && data.transactions.Count() > 0) { getSomeData = true; polozky.AddRange( data.transactions .Select(m => new SimpleBankovniPolozka() { Castka = HlidacStatu.Util.ParseTools.ToDecimal(m.amount) ?? 0, CisloProtiuctu = "", CisloUctu = this.Ucet.CisloUctu, Datum = HlidacStatu.Util.ParseTools.ToDateTime(m.datumDate, "dd.MM.yyyy").Value, KS = m.constSymbol, NazevProtiuctu = m.accountName, PopisTransakce = m.type, SS = m.specSymbol, VS = m.varSymbol, ZdrojUrl = baseUrl.Url, ZpravaProPrijemce = m.info, }) ); } else { getSomeData = false; } } catch (Exception e) { TULogger.Error("RB parser JSON error", e); return(polozky); } } } while (getSomeData); } } return(polozky); }
protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { TULogger.Info($"Zpracovavam ucet {Ucet.CisloUctu} s url {Ucet.Url}"); var polozky = new List <IBankovniPolozka>(); var page = 0; var duplications = 0; var httpClient = new HttpClient(); do { var doc = new HtmlDocument(); doc.LoadHtml(MakeRequest(++page, httpClient)); var rows = GetTransactionItems(doc); if (rows == null || rows.Length == 0) { TULogger.Warning($"Nenalezeny zadne zaznamy pro ucet {Ucet.CisloUctu}"); return(polozky); } foreach (var row in rows) { var cells = row.Descendants("td").Select(c => c.InnerHtml).ToArray(); if (cells.Length == 0) { continue; //skip this, it's not row with data } IBankovniPolozka p = new SimpleBankovniPolozka(); p.CisloUctu = Ucet.CisloUctu; p.Datum = ParseDate(cells[0]); p.Castka = ParsePrice(cells[1], p.Datum); var symbols = cells[2].Split('/').Select(TextUtil.NormalizeToBlockText).ToArray(); p.VS = symbols.Length > 0 && symbols[0] != "—" ? symbols[0] : string.Empty; p.KS = symbols.Length > 1 && symbols[1] != "—" ? symbols[1] : string.Empty; p.SS = symbols.Length > 2 && symbols[2] != "—" ? symbols[2] : string.Empty; var descriptions = cells[3].Split(new[] { "<br>" }, StringSplitOptions.None) .Select(d => TextUtil.NormalizeToBlockText(WebUtility.HtmlDecode(d))).ToArray(); if (descriptions.Length > 0) { var account = descriptions[0].Split(new[] { "(", ")" }, StringSplitOptions.None) .Select(TextUtil.NormalizeToBlockText) .ToArray(); p.NazevProtiuctu = account.Length > 0 ? account[0] : string.Empty; p.CisloProtiuctu = account.Length > 1 ? account[1] : string.Empty; } p.PopisTransakce = descriptions.Length > 1 ? descriptions[1] : string.Empty; p.ZpravaProPrijemce = descriptions.Length > 2 ? string.Join("; ", descriptions.Skip(2)) : string.Empty; p.ZdrojUrl = Ucet.Url; if (fromDate.HasValue && p.Datum < fromDate) { return(polozky); } if (IsAlreadyExist(polozky, p)) { duplications++; if (duplications > 5) { return(polozky); } } else if (!(toDate.HasValue && p.Datum > toDate.Value)) { duplications = 0; polozky.Add(p); } } TULogger.Debug($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}"); Console.WriteLine($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}"); } while (true); }
private IEnumerable <IBankovniPolozka> ParseStatement(string url) { var polozky = new HashSet <IBankovniPolozka>(); using (var net = new Devmasters.Net.HttpClient.URLContent(url)) { net.IgnoreHttpErrors = true; var content = net.GetContent(Encoding.UTF8).Text; if (content.Contains("Některé pohyby nemusí být zobrazeny. Zmenšete datumový rozsah.")) { throw new StatementTooLongException(); } var doc = new Devmasters.XPath(content); var xoverviewRows = "//div[contains(@class, 'pohybySum')]/table/tbody/tr"; var overviewRows = doc.GetNodes(xoverviewRows)?.Count ?? 0; if (overviewRows == 0) { TULogger.Warning($"FIO: Account statement page was not found for account {Ucet.CisloUctu}. Account has been probably canceled. Url: {url}"); return(new List <IBankovniPolozka>()); } var overview = new StatementOverview { OpeningBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[1]")), FinalBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[2]")), CreditSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[3]")), DebitSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[4]")) }; var xrows = "//table[@class='table' and starts-with(@id,'id')]/tbody/tr"; var rows = doc.GetNodes(xrows)?.Count ?? 0; for (var row = 1; row <= rows; row++) { var xroot = xrows + "[" + row + "]"; var p = new SimpleBankovniPolozka { CisloUctu = Ucet.CisloUctu, Datum = Devmasters.DT.Util.ToDateTime(doc.GetNodeText(xroot + "/td[1]"), "dd.MM.yyyy").Value, Castka = parseAmount(System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[2]"))), PopisTransakce = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[3]")), NazevProtiuctu = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[4]")), ZpravaProPrijemce = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[5]")) ?.Replace("<br>", " \n") ) }; var poznamka = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[9]")) ?.Replace("<br>", " \n") ); if (poznamka != p.ZpravaProPrijemce) { p.ZpravaProPrijemce += " " + poznamka; } p.KS = doc.GetNodeText(xroot + "/td[6]"); p.VS = doc.GetNodeText(xroot + "/td[7]"); p.SS = doc.GetNodeText(xroot + "/td[8]"); p.ZdrojUrl = net.Url; p.CisloProtiuctu = ""; //neni k dispozici if (!polozky.Contains(p)) { polozky.Add(p); } } ValidateParsedItems(polozky, overview); } return(polozky); }