Beispiel #1
0
        private void ScrapeCurrentStateImpl(GenericScraper scraper, ScraperConfigurationModel scraperConfig)
        {
            var currentState = _objectRepository.Set <MoneyStateModel>();

            var accountCount = currentState.Where(s =>
                                                  s.Column.Provider == scraper.ProviderName && s.When.Date == DateTime.UtcNow.Date.AddDays(-1))
                               .Select(s => s.Column.AccountName).Distinct()
                               .ToList();

            var todayState = currentState.Where(s =>
                                                s.Column.Provider == scraper.ProviderName && s.When.Date == DateTime.UtcNow.Date)
                             .Select(s => s.Column.AccountName).Distinct()
                             .ToList();

            var toScrape = accountCount.Count == 0 || accountCount.Except(todayState).Any();

            if (toScrape)
            {
                _logger.LogInformation("No cached items, scraping...");

                var items = scraper.Scrape(scraperConfig, _chrome);

                _logger.LogInformation($"Found {items.Count()} items, indexing...");

                foreach (var item in items)
                {
                    _logger.LogInformation(
                        $" - {item.Column.Provider} / {item.Column.AccountName}: {item.Amount} ({item.Ccy})");
                    if (!string.IsNullOrWhiteSpace(item.Column.Provider))
                    {
                        if (todayState.Contains(item.Column.AccountName))
                        {
                            continue;
                        }

                        _objectRepository.Add(item);
                    }
                }

                scraperConfig.LastSuccessfulBalanceScraping = DateTime.Now;
                _logger.LogInformation("Indexed...");
            }
            else
            {
                _logger.LogInformation("For today there are already scraped items, continuing...");
            }
        }
Beispiel #2
0
        private void ScrapeStatementsImpl(GenericScraper scraper, ScraperConfigurationModel scraperConfig)
        {
            var minDates = new[]
            {
                _objectRepository.Set <PaymentModel>()
                .Where(v => v.Column?.Provider == scraper.ProviderName)
                .OrderByDescending(v => v.When)
                .FirstOrDefault()?.When,
                _objectRepository.Set <MoneyStateModel>().OrderBy(v => v.When)
                .FirstOrDefault()?.When,
                _objectRepository.Set <PaymentModel>().OrderBy(v => v.When)
                .FirstOrDefault()?.When
            };

            var lastPayment = minDates.Where(v => v != null).OrderBy(v => v).FirstOrDefault() ??
                              DateTime.MinValue;

            var scrapingSince = lastPayment.AddDays(-21);

            if (scraperConfig.LastSuccessfulStatementScraping != default)
            {
                lastPayment = scraperConfig.LastSuccessfulStatementScraping;
            }

            // Let's not scrape statements too often - it's hard
            if (lastPayment.AddHours(24) < DateTime.Now)
            {
                _logger.LogInformation(
                    $"Scraping statement for {scraper.ProviderName} since {scrapingSince}...");

                var statements = scraper.ScrapeStatement(scraperConfig, _chrome, scrapingSince)
                                 .Where(v => v != null)
                                 .ToList();

                _logger.LogInformation($"Got statement of {statements.Count} items...");

                if (statements.Count == 0)
                {
                    _logger.LogWarning("Suspicious statement count. For the sake of not deleting existing statements - not doing anything for now...");
                    return;
                }

                var excessiveStatements = _objectRepository.Set <PaymentModel>()
                                          .Where(v =>
                                                 v.When > scrapingSince &&
                                                 v.Column?.Provider == scraper.ProviderName &&
                                                 !string.IsNullOrEmpty(v.StatementReference))
                                          .ToList();

                var gotStatementReferences = statements.Select(v => v.StatementReference).Where(v => v != null)
                                             .Distinct().ToHashSet();

                // Need to reimport changes done to this item from bank statement. i.e., if user edited statement which was HOLD - now there can be another statement about the same transaction with other reference
                foreach (var item in excessiveStatements.Where(v => v.UserEdited && !gotStatementReferences.Contains(v.StatementReference)))
                {
                    item.StatementReference = null;
                }

                foreach (var s in statements)
                {
                    var existingItem =
                        excessiveStatements.FirstOrDefault(v => v.StatementReference == s.StatementReference)
                        ?? _objectRepository.Set <PaymentModel>().OrderBy(v => v.When)
                        .FirstOrDefault(v =>
                                        Math.Abs((v.When.Date - s.When.Date).TotalDays) <= 4 &&
                                        Math.Abs(v.Amount - s.Amount) < 0.01 &&
                                        v.Ccy == s.Ccy &&
                                        v.StatementReference == null);


                    if (existingItem == null)
                    {
                        _objectRepository.Add(s);
                    }
                    else
                    {
                        excessiveStatements.Remove(existingItem);

                        if (existingItem.Column == null)
                        {
                            existingItem.Column = _objectRepository.Set <MoneyColumnMetadataModel>()
                                                  .FirstOrDefault(v => v.Id == s.ColumnId);
                        }

                        if (existingItem.StatementReference == null)
                        {
                            existingItem.StatementReference = s.StatementReference;
                        }
                    }
                }

                _objectRepository.RemoveRange(excessiveStatements);

                if (statements.Any())
                {
                    scraperConfig.LastSuccessfulStatementScraping = DateTime.Now;
                }
            }
        }