예제 #1
0
        private IEntry ReadEntry(Directive directive, IEnumerator <string> lines)
        {
            var entryItemRegex = new Regex(@"^\s*((?<book>.*?)\s+)?(?<account>.*?)\s+(?<asset>.*?)\s+(?<amount>-?[\d,.]+)\s*$");
            var entry          = new Entry();

            entry.Index = directive.Data;

            while (true)
            {
                if (!lines.MoveNext() || IsNullOrEmpty(lines.Current))
                {
                    return(entry);
                }

                if (IsComment(lines.Current))
                {
                    continue;
                }

                directive = ReadDirective(lines.Current);

                if (directive != null)
                {
                    switch (directive.Name)
                    {
                    case "@note":
                        // TODO
                        break;

                    case "@payee":
                        // TODO
                        break;

                    default:
                        throw new JournalException($"Unknown directive: {directive.Name}");
                    }
                }
                else
                {
                    var match   = entryItemRegex.Match(lines.Current);
                    var book    = match.Groups["book"].Success ? new Book(match.Groups["book"].Value) : new Book("default");
                    var account = new Account(match.Groups["account"].Value);
                    var asset   = new Asset(match.Groups["asset"].Value);
                    var amount  = decimal.Parse(match.Groups["amount"].Value, NumberStyles.AllowLeadingSign | NumberStyles.AllowThousands | NumberStyles.AllowDecimalPoint);

                    entry.AddItem(book, account, asset, amount);
                }
            }
        }
예제 #2
0
        async Task RegisterItemsAsync(
            ApplicationDbContext dbContext,
            CrawlContext crawlContext,
            Entry entry,
            HttpResponseMessage response,
            CancellationToken cancellationToken)
        {
            if (response.StatusCode == HttpStatusCode.NotFound ||
                response.StatusCode == HttpStatusCode.Found)
            {
                goto END_CRAWL;
            }
            if (!response.IsSuccessStatusCode)
            {
                logger.LogWarning($"{entry.Url} の取得に失敗しました。({response.StatusCode})");
                return;
            }

            var html = await response.Content.ReadAsStringAsync();

            var document = await parser.ParseAsync(html, cancellationToken);

            var newItems = ExtractItems(document);

            foreach (var newItem in newItems)
            {
                var existedItem = await FindByAsinAsync(
                    dbContext,
                    newItem.Asin,
                    cancellationToken);

                Item item;
                if (existedItem != null)
                {
                    item = existedItem;
                }
                else
                {
                    // 1つのエントリで同じ商品を複数回紹介することがあるので、
                    // 新しい商品は即時登録しておく。
                    dbContext.Items.Add(newItem);
                    await dbContext.SaveChangesAsync(cancellationToken);

                    item = newItem;
                }

                if (!entry.HasItem(item))
                {
                    entry.AddItem(item);
                }

                crawlContext.AddCount();

                logger.LogInformation($"{crawlContext.Count}\t{newItem.Title}\t{entry.Url}");
            }

END_CRAWL:
            entry.MarkAsCrawled();
            dbContext.Entries.Update(entry);
            await dbContext.SaveChangesAsync(cancellationToken);

            logger.LogInformation($"{entry.Url} スクレイピング終了");
        }