protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { TULogger.Info($"Zpracovavam ucet {Ucet.CisloUctu} s url {Ucet.Url}"); var polozky = new List <IBankovniPolozka>(); var page = 0; var duplications = 0; var httpClient = new HttpClient(); do { var doc = new HtmlDocument(); doc.LoadHtml(MakeRequest(++page, httpClient)); var rows = GetTransactionItems(doc); if (rows == null || rows.Length == 0) { TULogger.Warning($"Nenalezeny zadne zaznamy pro ucet {Ucet.CisloUctu}"); return(polozky); } foreach (var row in rows) { var cells = row.Descendants("td").Select(c => c.InnerHtml).ToArray(); if (cells.Length == 0) { continue; //skip this, it's not row with data } IBankovniPolozka p = new SimpleBankovniPolozka(); p.CisloUctu = Ucet.CisloUctu; p.Datum = ParseDate(cells[0]); p.Castka = ParsePrice(cells[1], p.Datum); var symbols = cells[2].Split('/').Select(TextUtil.NormalizeToBlockText).ToArray(); p.VS = symbols.Length > 0 && symbols[0] != "—" ? symbols[0] : string.Empty; p.KS = symbols.Length > 1 && symbols[1] != "—" ? symbols[1] : string.Empty; p.SS = symbols.Length > 2 && symbols[2] != "—" ? symbols[2] : string.Empty; var descriptions = cells[3].Split(new[] { "<br>" }, StringSplitOptions.None) .Select(d => TextUtil.NormalizeToBlockText(WebUtility.HtmlDecode(d))).ToArray(); if (descriptions.Length > 0) { var account = descriptions[0].Split(new[] { "(", ")" }, StringSplitOptions.None) .Select(TextUtil.NormalizeToBlockText) .ToArray(); p.NazevProtiuctu = account.Length > 0 ? account[0] : string.Empty; p.CisloProtiuctu = account.Length > 1 ? account[1] : string.Empty; } p.PopisTransakce = descriptions.Length > 1 ? descriptions[1] : string.Empty; p.ZpravaProPrijemce = descriptions.Length > 2 ? string.Join("; ", descriptions.Skip(2)) : string.Empty; p.ZdrojUrl = Ucet.Url; if (fromDate.HasValue && p.Datum < fromDate) { return(polozky); } if (IsAlreadyExist(polozky, p)) { duplications++; if (duplications > 5) { return(polozky); } } else if (!(toDate.HasValue && p.Datum > toDate.Value)) { duplications = 0; polozky.Add(p); } } TULogger.Debug($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}"); Console.WriteLine($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}"); } while (true); }
protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null) { var polozky = new List <IBankovniPolozka>(); var onPage = new List <IBankovniPolozka>(); var currentUrl = Ucet.Url; do { onPage = new List <IBankovniPolozka>(); using (var url = new URLContent(currentUrl)) { url.Referer = Ucet.Url; url.IgnoreHttpErrors = true; var s = url.GetContent(Encoding.UTF8).Text; var doc = new XPath(s); var rows = doc.GetNodes("//table[@id='transparentAccountTable']/tbody/tr") ?? doc.GetNodes("//tr") ?? new List <HtmlNode>(); foreach (var row in rows) { var cols = row.ChildNodes.Where(n => n.Name == "td").Select(n => n.InnerHtml).ToArray(); var p = new SimpleBankovniPolozka(); p.CisloUctu = Ucet.CisloUctu; var date = ParseTools.ToDateTime(cols[0], "d.M.yyyy"); if (!date.HasValue || (fromDate.HasValue && date.Value < fromDate.Value) || (toDate.HasValue && date.Value > toDate.Value)) { continue; //skip this, it's not row with data } p.Datum = date.Value; var parts = cols[1].Split(new string[] { "</br>", "<br>" }, StringSplitOptions.None)?.Select(v => WebUtility.HtmlDecode(v)).ToArray() ?? new string[] { "" }; p.NazevProtiuctu = TextUtil.NormalizeToBlockText(WebUtility.HtmlDecode(parts[0])); if (parts.Length == 3) { p.ZpravaProPrijemce = TextUtil.NormalizeToBlockText(parts[2]); } p.VS = TextUtil.NormalizeToBlockText(cols[3]); if (p.VS.Contains("---------")) { p.VS = ""; } p.Castka = ParseTools.ToDecimal( WebUtility.HtmlDecode(cols[4]) .Replace(" CZK", "") .Replace(" ", "") ).Value; p.ZdrojUrl = Ucet.Url; onPage.Add(p); } polozky.AddRange(onPage); var lineWithLastTransactionDate = s.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault(l => l.Contains("lastTransactionDate")); if (lineWithLastTransactionDate != null) { var parts = lineWithLastTransactionDate.Trim().Split(':'); if (parts.Length >= 2) { var lastTransactionDate = parts[1].Replace("'", "").Trim(); currentUrl = $"https://transparentniucty.moneta.cz/homepage?p_p_id=TransparentAccountPortlet_WAR_monetaportletsportlet&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=serveTableData&p_p_cacheability=cacheLevelPage&p_p_col_id=column-8&p_p_col_count=1&_TransparentAccountPortlet_WAR_monetaportletsportlet_accountNumber={Ucet.CisloUctu.Replace("/0600", "")}&_TransparentAccountPortlet_WAR_monetaportletsportlet_reverse=NORMAL&_TransparentAccountPortlet_WAR_monetaportletsportlet_transactionNumber=1&_TransparentAccountPortlet_WAR_monetaportletsportlet_transactionDate={lastTransactionDate}&_={(long)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalMilliseconds}"; } } } } while (onPage.Count > 0); return(polozky); }
private IEnumerable <IBankovniPolozka> ParseStatement(string url) { var polozky = new HashSet <IBankovniPolozka>(); using (var net = new Devmasters.Net.HttpClient.URLContent(url)) { net.IgnoreHttpErrors = true; var content = net.GetContent(Encoding.UTF8).Text; if (content.Contains("Některé pohyby nemusí být zobrazeny. Zmenšete datumový rozsah.")) { throw new StatementTooLongException(); } var doc = new Devmasters.XPath(content); var xoverviewRows = "//div[contains(@class, 'pohybySum')]/table/tbody/tr"; var overviewRows = doc.GetNodes(xoverviewRows)?.Count ?? 0; if (overviewRows == 0) { TULogger.Warning($"FIO: Account statement page was not found for account {Ucet.CisloUctu}. Account has been probably canceled. Url: {url}"); return(new List <IBankovniPolozka>()); } var overview = new StatementOverview { OpeningBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[1]")), FinalBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[2]")), CreditSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[3]")), DebitSum = parseAmount(doc.GetNodeText(xoverviewRows + "/td[4]")) }; var xrows = "//table[@class='table' and starts-with(@id,'id')]/tbody/tr"; var rows = doc.GetNodes(xrows)?.Count ?? 0; for (var row = 1; row <= rows; row++) { var xroot = xrows + "[" + row + "]"; var p = new SimpleBankovniPolozka { CisloUctu = Ucet.CisloUctu, Datum = Devmasters.DT.Util.ToDateTime(doc.GetNodeText(xroot + "/td[1]"), "dd.MM.yyyy").Value, Castka = parseAmount(System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[2]"))), PopisTransakce = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[3]")), NazevProtiuctu = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[4]")), ZpravaProPrijemce = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[5]")) ?.Replace("<br>", " \n") ) }; var poznamka = Devmasters.TextUtil.NormalizeToBlockText( System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[9]")) ?.Replace("<br>", " \n") ); if (poznamka != p.ZpravaProPrijemce) { p.ZpravaProPrijemce += " " + poznamka; } p.KS = doc.GetNodeText(xroot + "/td[6]"); p.VS = doc.GetNodeText(xroot + "/td[7]"); p.SS = doc.GetNodeText(xroot + "/td[8]"); p.ZdrojUrl = net.Url; p.CisloProtiuctu = ""; //neni k dispozici if (!polozky.Contains(p)) { polozky.Add(p); } } ValidateParsedItems(polozky, overview); } return(polozky); }