protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { var polozky = new List <IBankovniPolozka>(); var accountDetail = GetAccountDetail(); if (accountDetail?.accountNumber == null) { TULogger.Warning($"Account ${Ucet.CisloUctu} was probably canceled"); return(polozky); } var from = GetNewest(fromDate, accountDetail.transparencyFrom); var page = 0; var totalRecords = 0; do { var content = GetContent(GetTransactionsPageUrl(accountDetail.accountNumber, from, page)); var result = JsonConvert.DeserializeObject <CSResult>(content); totalRecords = result.recordCount; page = result.nextPage; foreach (var t in result.transactions ?? new Transaction[0]) { polozky.Add(new SimpleBankovniPolozka { CisloUctu = Ucet.CisloUctu, Castka = t.amount.value, CisloProtiuctu = t.sender.accountNumber + "/" + t.sender.bankCode, Datum = t.processingDate, KS = t.sender.constantSymbol, NazevProtiuctu = t.sender.name, PopisTransakce = t.typeDescription, SS = t.sender.specificSymbol, VS = t.sender.variableSymbol, ZdrojUrl = "https://www.csas.cz/cs/transparentni-ucty#/" + accountDetail.accountNumber, ZpravaProPrijemce = t.sender.description }); } } while (page > 0); if (totalRecords != polozky.Count) { TULogger.Error($"WE read {polozky.Count} records for account {Ucet.CisloUctu} instead of {totalRecords}"); throw new ApplicationException($"We read {polozky.Count} records for account {Ucet.CisloUctu} instead of {totalRecords}"); } return(polozky); }
protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { TULogger.Info($"Zpracovavam ucet {Ucet.CisloUctu} s url {Ucet.Url}"); var polozky = new List <IBankovniPolozka>(); var page = 0; var duplications = 0; var httpClient = new HttpClient(); do { var doc = new HtmlDocument(); doc.LoadHtml(MakeRequest(++page, httpClient)); var rows = GetTransactionItems(doc); if (rows == null || rows.Length == 0) { TULogger.Warning($"Nenalezeny zadne zaznamy pro ucet {Ucet.CisloUctu}"); return(polozky); } foreach (var row in rows) { var cells = row.Descendants("td").Select(c => c.InnerHtml).ToArray(); if (cells.Length == 0) { continue; //skip this, it's not row with data } IBankovniPolozka p = new SimpleBankovniPolozka(); p.CisloUctu = Ucet.CisloUctu; p.Datum = ParseDate(cells[0]); p.Castka = ParsePrice(cells[1], p.Datum); var symbols = cells[2].Split('/').Select(TextUtil.NormalizeToBlockText).ToArray(); p.VS = symbols.Length > 0 && symbols[0] != "—" ? symbols[0] : string.Empty; p.KS = symbols.Length > 1 && symbols[1] != "—" ? symbols[1] : string.Empty; p.SS = symbols.Length > 2 && symbols[2] != "—" ? symbols[2] : string.Empty; var descriptions = cells[3].Split(new[] { "<br>" }, StringSplitOptions.None) .Select(d => TextUtil.NormalizeToBlockText(WebUtility.HtmlDecode(d))).ToArray(); if (descriptions.Length > 0) { var account = descriptions[0].Split(new[] { "(", ")" }, StringSplitOptions.None) .Select(TextUtil.NormalizeToBlockText) .ToArray(); p.NazevProtiuctu = account.Length > 0 ? account[0] : string.Empty; p.CisloProtiuctu = account.Length > 1 ? account[1] : string.Empty; } p.PopisTransakce = descriptions.Length > 1 ? descriptions[1] : string.Empty; p.ZpravaProPrijemce = descriptions.Length > 2 ? string.Join("; ", descriptions.Skip(2)) : string.Empty; p.ZdrojUrl = Ucet.Url; if (fromDate.HasValue && p.Datum < fromDate) { return(polozky); } if (IsAlreadyExist(polozky, p)) { duplications++; if (duplications > 5) { return(polozky); } } else if (!(toDate.HasValue && p.Datum > toDate.Value)) { duplications = 0; polozky.Add(p); } } TULogger.Debug($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}"); Console.WriteLine($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}"); } while (true); }
private IEnumerable <IBankovniPolozka> ParseStatement(string url) { var polozky = new HashSet <IBankovniPolozka>(); using (var net = new Devmasters.Net.HttpClient.URLContent(url)) { net.IgnoreHttpErrors = true; var content = net.GetContent(Encoding.UTF8).Text; if (content.Contains("Některé pohyby nemusí být zobrazeny. Zmenšete datumový rozsah.")) { throw new StatementTooLongException(); } var doc = new Devmasters.XPath(content); var xoverviewRows = "//div[contains(@class, 'pohybySum')]/table/tbody/tr"; var overviewRows = doc.GetNodes(xoverviewRows)?.Count ?? 0; if (overviewRows == 0) { TULogger.Warning($"FIO: Account statement page was not found for account {Ucet.CisloUctu}. Account has been probably canceled. Url: {url}"); return(new List <IBankovniPolozka>()); } var overview = new StatementOverview { OpeningBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[1]")), FinalBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[2]")), CreditSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[3]")), DebitSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[4]")) }; var xrows = "//table[@class='table' and starts-with(@id,'id')]/tbody/tr"; var rows = doc.GetNodes(xrows)?.Count ?? 0; for (var row = 1; row <= rows; row++) { var xroot = xrows + "[" + row + "]"; var p = new SimpleBankovniPolozka { CisloUctu = Ucet.CisloUctu, Datum = Devmasters.DT.Util.ToDateTime(doc.GetNodeText(xroot + "/td[1]"), "dd.MM.yyyy").Value, Castka = parseAmount(System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[2]"))), PopisTransakce = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[3]")), NazevProtiuctu = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[4]")), ZpravaProPrijemce = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[5]")) ?.Replace("<br>", " \n") ) }; var poznamka = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[9]")) ?.Replace("<br>", " \n") ); if (poznamka != p.ZpravaProPrijemce) { p.ZpravaProPrijemce += " " + poznamka; } p.KS = doc.GetNodeText(xroot + "/td[6]"); p.VS = doc.GetNodeText(xroot + "/td[7]"); p.SS = doc.GetNodeText(xroot + "/td[8]"); p.ZdrojUrl = net.Url; p.CisloProtiuctu = ""; //neni k dispozici if (!polozky.Contains(p)) { polozky.Add(p); } } ValidateParsedItems(polozky, overview); } return(polozky); }