Exemple #1
0
        protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null)
        {
            TULogger.Info($"Zpracovavam ucet {Ucet.CisloUctu} s url {Ucet.Url}");
            var polozky      = new List <IBankovniPolozka>();
            var page         = 0;
            var duplications = 0;
            var httpClient   = new HttpClient();

            do
            {
                var doc = new HtmlDocument();
                doc.LoadHtml(MakeRequest(++page, httpClient));

                var rows = GetTransactionItems(doc);
                if (rows == null || rows.Length == 0)
                {
                    TULogger.Warning($"Nenalezeny zadne zaznamy pro ucet {Ucet.CisloUctu}");
                    return(polozky);
                }

                foreach (var row in rows)
                {
                    var cells = row.Descendants("td").Select(c => c.InnerHtml).ToArray();
                    if (cells.Length == 0)
                    {
                        continue;                    //skip this, it's not row with data
                    }
                    IBankovniPolozka p = new SimpleBankovniPolozka();
                    p.CisloUctu = Ucet.CisloUctu;
                    p.Datum     = ParseDate(cells[0]);
                    p.Castka    = ParsePrice(cells[1], p.Datum);

                    var symbols = cells[2].Split('/').Select(TextUtil.NormalizeToBlockText).ToArray();
                    p.VS = symbols.Length > 0 && symbols[0] != "—" ? symbols[0] : string.Empty;
                    p.KS = symbols.Length > 1 && symbols[1] != "—" ? symbols[1] : string.Empty;
                    p.SS = symbols.Length > 2 && symbols[2] != "—" ? symbols[2] : string.Empty;

                    var descriptions = cells[3].Split(new[] { "<br>" }, StringSplitOptions.None)
                                       .Select(d => TextUtil.NormalizeToBlockText(WebUtility.HtmlDecode(d))).ToArray();

                    if (descriptions.Length > 0)
                    {
                        var account = descriptions[0].Split(new[] { "(", ")" }, StringSplitOptions.None)
                                      .Select(TextUtil.NormalizeToBlockText)
                                      .ToArray();
                        p.NazevProtiuctu = account.Length > 0 ? account[0] : string.Empty;
                        p.CisloProtiuctu = account.Length > 1 ? account[1] : string.Empty;
                    }
                    p.PopisTransakce    = descriptions.Length > 1 ? descriptions[1] : string.Empty;
                    p.ZpravaProPrijemce = descriptions.Length > 2 ? string.Join("; ", descriptions.Skip(2)) : string.Empty;

                    p.ZdrojUrl = Ucet.Url;

                    if (fromDate.HasValue && p.Datum < fromDate)
                    {
                        return(polozky);
                    }
                    if (IsAlreadyExist(polozky, p))
                    {
                        duplications++;
                        if (duplications > 5)
                        {
                            return(polozky);
                        }
                    }
                    else if (!(toDate.HasValue && p.Datum > toDate.Value))
                    {
                        duplications = 0;
                        polozky.Add(p);
                    }
                }
                TULogger.Debug($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}");
                Console.WriteLine($"[{page}] {Ucet.CisloUctu} - {polozky.Last().Datum} / celkem {polozky.Count}");
            } while (true);
        }
Exemple #2
0
        protected override IEnumerable <IBankovniPolozka> DoParse(DateTime?fromDate = null, DateTime?toDate = null)
        {
            var polozky    = new List <IBankovniPolozka>();
            var onPage     = new List <IBankovniPolozka>();
            var currentUrl = Ucet.Url;

            do
            {
                onPage = new List <IBankovniPolozka>();

                using (var url = new URLContent(currentUrl))
                {
                    url.Referer          = Ucet.Url;
                    url.IgnoreHttpErrors = true;
                    var s    = url.GetContent(Encoding.UTF8).Text;
                    var doc  = new XPath(s);
                    var rows = doc.GetNodes("//table[@id='transparentAccountTable']/tbody/tr")
                               ?? doc.GetNodes("//tr")
                               ?? new List <HtmlNode>();

                    foreach (var row in rows)
                    {
                        var cols = row.ChildNodes.Where(n => n.Name == "td").Select(n => n.InnerHtml).ToArray();
                        var p    = new SimpleBankovniPolozka();
                        p.CisloUctu = Ucet.CisloUctu;
                        var date = ParseTools.ToDateTime(cols[0], "d.M.yyyy");
                        if (!date.HasValue || (fromDate.HasValue && date.Value < fromDate.Value) || (toDate.HasValue && date.Value > toDate.Value))
                        {
                            continue;                             //skip this, it's not row with data
                        }
                        p.Datum = date.Value;

                        var parts = cols[1].Split(new string[] { "</br>", "<br>" }, StringSplitOptions.None)?.Select(v => WebUtility.HtmlDecode(v)).ToArray() ?? new string[] { "" };
                        p.NazevProtiuctu = TextUtil.NormalizeToBlockText(WebUtility.HtmlDecode(parts[0]));

                        if (parts.Length == 3)
                        {
                            p.ZpravaProPrijemce = TextUtil.NormalizeToBlockText(parts[2]);
                        }

                        p.VS = TextUtil.NormalizeToBlockText(cols[3]);
                        if (p.VS.Contains("---------"))
                        {
                            p.VS = "";
                        }


                        p.Castka = ParseTools.ToDecimal(
                            WebUtility.HtmlDecode(cols[4])
                            .Replace(" CZK", "")
                            .Replace(" ", "")
                            ).Value;
                        p.ZdrojUrl = Ucet.Url;

                        onPage.Add(p);
                    }
                    polozky.AddRange(onPage);

                    var lineWithLastTransactionDate = s.Split(new[] { "\n" }, StringSplitOptions.RemoveEmptyEntries).FirstOrDefault(l => l.Contains("lastTransactionDate"));
                    if (lineWithLastTransactionDate != null)
                    {
                        var parts = lineWithLastTransactionDate.Trim().Split(':');
                        if (parts.Length >= 2)
                        {
                            var lastTransactionDate = parts[1].Replace("'", "").Trim();
                            currentUrl = $"https://transparentniucty.moneta.cz/homepage?p_p_id=TransparentAccountPortlet_WAR_monetaportletsportlet&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_resource_id=serveTableData&p_p_cacheability=cacheLevelPage&p_p_col_id=column-8&p_p_col_count=1&_TransparentAccountPortlet_WAR_monetaportletsportlet_accountNumber={Ucet.CisloUctu.Replace("/0600", "")}&_TransparentAccountPortlet_WAR_monetaportletsportlet_reverse=NORMAL&_TransparentAccountPortlet_WAR_monetaportletsportlet_transactionNumber=1&_TransparentAccountPortlet_WAR_monetaportletsportlet_transactionDate={lastTransactionDate}&_={(long)DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1)).TotalMilliseconds}";
                        }
                    }
                }
            } while (onPage.Count > 0);

            return(polozky);
        }
Exemple #3
0
        private IEnumerable <IBankovniPolozka> ParseStatement(string url)
        {
            var polozky = new HashSet <IBankovniPolozka>();

            using (var net = new Devmasters.Net.HttpClient.URLContent(url))
            {
                net.IgnoreHttpErrors = true;
                var content = net.GetContent(Encoding.UTF8).Text;
                if (content.Contains("Některé pohyby nemusí být zobrazeny. Zmenšete datumový rozsah."))
                {
                    throw new StatementTooLongException();
                }
                var doc = new Devmasters.XPath(content);

                var xoverviewRows = "//div[contains(@class, 'pohybySum')]/table/tbody/tr";
                var overviewRows  = doc.GetNodes(xoverviewRows)?.Count ?? 0;
                if (overviewRows == 0)
                {
                    TULogger.Warning($"FIO: Account statement page was not found for account {Ucet.CisloUctu}. Account has been probably canceled. Url: {url}");
                    return(new List <IBankovniPolozka>());
                }

                var overview = new StatementOverview
                {
                    OpeningBalance = parseAmount(doc.GetNodeText(xoverviewRows + "/td[1]")),
                    FinalBalance   = parseAmount(doc.GetNodeText(xoverviewRows + "/td[2]")),
                    CreditSum      = parseAmount(doc.GetNodeText(xoverviewRows + "/td[3]")),
                    DebitSum       = parseAmount(doc.GetNodeText(xoverviewRows + "/td[4]"))
                };

                var xrows = "//table[@class='table' and starts-with(@id,'id')]/tbody/tr";
                var rows  = doc.GetNodes(xrows)?.Count ?? 0;
                for (var row = 1; row <= rows; row++)
                {
                    var xroot = xrows + "[" + row + "]";

                    var p = new SimpleBankovniPolozka
                    {
                        CisloUctu         = Ucet.CisloUctu,
                        Datum             = Devmasters.DT.Util.ToDateTime(doc.GetNodeText(xroot + "/td[1]"), "dd.MM.yyyy").Value,
                        Castka            = parseAmount(System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[2]"))),
                        PopisTransakce    = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[3]")),
                        NazevProtiuctu    = System.Net.WebUtility.HtmlDecode(doc.GetNodeText(xroot + "/td[4]")),
                        ZpravaProPrijemce = Devmasters.TextUtil.NormalizeToBlockText(
                            System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[5]"))
                            ?.Replace("<br>", " \n")
                            )
                    };

                    var poznamka = Devmasters.TextUtil.NormalizeToBlockText(
                        System.Net.WebUtility.HtmlDecode(doc.GetNodeHtml(xroot + "/td[9]"))
                        ?.Replace("<br>", " \n")
                        );

                    if (poznamka != p.ZpravaProPrijemce)
                    {
                        p.ZpravaProPrijemce += " " + poznamka;
                    }

                    p.KS       = doc.GetNodeText(xroot + "/td[6]");
                    p.VS       = doc.GetNodeText(xroot + "/td[7]");
                    p.SS       = doc.GetNodeText(xroot + "/td[8]");
                    p.ZdrojUrl = net.Url;


                    p.CisloProtiuctu = ""; //neni k dispozici

                    if (!polozky.Contains(p))
                    {
                        polozky.Add(p);
                    }
                }

                ValidateParsedItems(polozky, overview);
            }

            return(polozky);
        }