private void ScrapeCurrentStateImpl(GenericScraper scraper, ScraperConfigurationModel scraperConfig) { var currentState = _objectRepository.Set <MoneyStateModel>(); var accountCount = currentState.Where(s => s.Column.Provider == scraper.ProviderName && s.When.Date == DateTime.UtcNow.Date.AddDays(-1)) .Select(s => s.Column.AccountName).Distinct() .ToList(); var todayState = currentState.Where(s => s.Column.Provider == scraper.ProviderName && s.When.Date == DateTime.UtcNow.Date) .Select(s => s.Column.AccountName).Distinct() .ToList(); var toScrape = accountCount.Count == 0 || accountCount.Except(todayState).Any(); if (toScrape) { _logger.LogInformation("No cached items, scraping..."); var items = scraper.Scrape(scraperConfig, _chrome); _logger.LogInformation($"Found {items.Count()} items, indexing..."); foreach (var item in items) { _logger.LogInformation( $" - {item.Column.Provider} / {item.Column.AccountName}: {item.Amount} ({item.Ccy})"); if (!string.IsNullOrWhiteSpace(item.Column.Provider)) { if (todayState.Contains(item.Column.AccountName)) { continue; } _objectRepository.Add(item); } } scraperConfig.LastSuccessfulBalanceScraping = DateTime.Now; _logger.LogInformation("Indexed..."); } else { _logger.LogInformation("For today there are already scraped items, continuing..."); } }
private void ScrapeStatementsImpl(GenericScraper scraper, ScraperConfigurationModel scraperConfig) { var minDates = new[] { _objectRepository.Set <PaymentModel>() .Where(v => v.Column?.Provider == scraper.ProviderName) .OrderByDescending(v => v.When) .FirstOrDefault()?.When, _objectRepository.Set <MoneyStateModel>().OrderBy(v => v.When) .FirstOrDefault()?.When, _objectRepository.Set <PaymentModel>().OrderBy(v => v.When) .FirstOrDefault()?.When }; var lastPayment = minDates.Where(v => v != null).OrderBy(v => v).FirstOrDefault() ?? DateTime.MinValue; var scrapingSince = lastPayment.AddDays(-21); if (scraperConfig.LastSuccessfulStatementScraping != default) { lastPayment = scraperConfig.LastSuccessfulStatementScraping; } // Let's not scrape statements too often - it's hard if (lastPayment.AddHours(24) < DateTime.Now) { _logger.LogInformation( $"Scraping statement for {scraper.ProviderName} since {scrapingSince}..."); var statements = scraper.ScrapeStatement(scraperConfig, _chrome, scrapingSince) .Where(v => v != null) .ToList(); _logger.LogInformation($"Got statement of {statements.Count} items..."); if (statements.Count == 0) { _logger.LogWarning("Suspicious statement count. For the sake of not deleting existing statements - not doing anything for now..."); return; } var excessiveStatements = _objectRepository.Set <PaymentModel>() .Where(v => v.When > scrapingSince && v.Column?.Provider == scraper.ProviderName && !string.IsNullOrEmpty(v.StatementReference)) .ToList(); var gotStatementReferences = statements.Select(v => v.StatementReference).Where(v => v != null) .Distinct().ToHashSet(); // Need to reimport changes done to this item from bank statement. i.e., if user edited statement which was HOLD - now there can be another statement about the same transaction with other reference foreach (var item in excessiveStatements.Where(v => v.UserEdited && !gotStatementReferences.Contains(v.StatementReference))) { item.StatementReference = null; } foreach (var s in statements) { var existingItem = excessiveStatements.FirstOrDefault(v => v.StatementReference == s.StatementReference) ?? _objectRepository.Set <PaymentModel>().OrderBy(v => v.When) .FirstOrDefault(v => Math.Abs((v.When.Date - s.When.Date).TotalDays) <= 4 && Math.Abs(v.Amount - s.Amount) < 0.01 && v.Ccy == s.Ccy && v.StatementReference == null); if (existingItem == null) { _objectRepository.Add(s); } else { excessiveStatements.Remove(existingItem); if (existingItem.Column == null) { existingItem.Column = _objectRepository.Set <MoneyColumnMetadataModel>() .FirstOrDefault(v => v.Id == s.ColumnId); } if (existingItem.StatementReference == null) { existingItem.StatementReference = s.StatementReference; } } } _objectRepository.RemoveRange(excessiveStatements); if (statements.Any()) { scraperConfig.LastSuccessfulStatementScraping = DateTime.Now; } } }