private IEntry ReadEntry(Directive directive, IEnumerator <string> lines) { var entryItemRegex = new Regex(@"^\s*((?<book>.*?)\s+)?(?<account>.*?)\s+(?<asset>.*?)\s+(?<amount>-?[\d,.]+)\s*$"); var entry = new Entry(); entry.Index = directive.Data; while (true) { if (!lines.MoveNext() || IsNullOrEmpty(lines.Current)) { return(entry); } if (IsComment(lines.Current)) { continue; } directive = ReadDirective(lines.Current); if (directive != null) { switch (directive.Name) { case "@note": // TODO break; case "@payee": // TODO break; default: throw new JournalException($"Unknown directive: {directive.Name}"); } } else { var match = entryItemRegex.Match(lines.Current); var book = match.Groups["book"].Success ? new Book(match.Groups["book"].Value) : new Book("default"); var account = new Account(match.Groups["account"].Value); var asset = new Asset(match.Groups["asset"].Value); var amount = decimal.Parse(match.Groups["amount"].Value, NumberStyles.AllowLeadingSign | NumberStyles.AllowThousands | NumberStyles.AllowDecimalPoint); entry.AddItem(book, account, asset, amount); } } }
async Task RegisterItemsAsync( ApplicationDbContext dbContext, CrawlContext crawlContext, Entry entry, HttpResponseMessage response, CancellationToken cancellationToken) { if (response.StatusCode == HttpStatusCode.NotFound || response.StatusCode == HttpStatusCode.Found) { goto END_CRAWL; } if (!response.IsSuccessStatusCode) { logger.LogWarning($"{entry.Url} の取得に失敗しました。({response.StatusCode})"); return; } var html = await response.Content.ReadAsStringAsync(); var document = await parser.ParseAsync(html, cancellationToken); var newItems = ExtractItems(document); foreach (var newItem in newItems) { var existedItem = await FindByAsinAsync( dbContext, newItem.Asin, cancellationToken); Item item; if (existedItem != null) { item = existedItem; } else { // 1つのエントリで同じ商品を複数回紹介することがあるので、 // 新しい商品は即時登録しておく。 dbContext.Items.Add(newItem); await dbContext.SaveChangesAsync(cancellationToken); item = newItem; } if (!entry.HasItem(item)) { entry.AddItem(item); } crawlContext.AddCount(); logger.LogInformation($"{crawlContext.Count}\t{newItem.Title}\t{entry.Url}"); } END_CRAWL: entry.MarkAsCrawled(); dbContext.Entries.Update(entry); await dbContext.SaveChangesAsync(cancellationToken); logger.LogInformation($"{entry.Url} スクレイピング終了"); }