Ejemplo n.º 1
0
        protected override async Task HandleInternalUnstatable(TaobaoGetItemListTaskData taskData,
                                                               CancellationToken ct)
        {
            var client = await GetHttpClient();

            var url = string.Format(Options.Value.ListUrlTpl, taskData.Page);
            var rsp = await client.GetAsync(url, ct);

            var html = rsp.StatusCode == HttpStatusCode.Redirect
                ? await client.GetStringAsync(rsp.Headers.Location)
                : await rsp.Content.ReadAsStringAsync();

            var cq = new CQ(html.Replace("\\\"", "\""));
            var searchResultSpan = cq[".search-result span"];
            var count            = int.Parse(searchResultSpan.Text().Trim());

            if (count > 0)
            {
                var itemIds = cq[".shop-filter"].NextAll().Children(".item").Select(t => t.GetAttribute("data-id"))
                              .ToList();
                if (itemIds.Any())
                {
                    var newTaskData = new List <TaskData> {
                        new TaobaoGetItemListTaskData {
                            Page = taskData.Page + 1
                        }
                    };
                    if (DbContextProvider != null)
                    {
                        var db = await DbContextProvider.Get();

                        var soonestCheckDt = DateTime.Now.AddDays(-7);
                        var skippedItems   = await db.TaobaoItems
                                             .Where(t => itemIds.Contains(t.ItemId) && t.LastCheckDt > soonestCheckDt)
                                             .Select(a => a.ItemId).ToListAsync(ct);

                        itemIds.RemoveAll(t => skippedItems.Contains(t));
                    }

                    if (itemIds.Any())
                    {
                        newTaskData.AddRange(itemIds
                                             .Select(t => new TaobaoGetItemTaskData {
                            ItemId = t
                        }).ToList());
                    }

                    await TaskDistributor.Distribute(newTaskData);
                }
            }
        }
        protected override async Task HandleInternalUnstatable(TaobaoGetItemTaskData taskData,
                                                               CancellationToken ct)
        {
            var client = await GetHttpClient();

            var url  = string.Format(Options.Value.UrlTemplate, taskData.ItemId);
            var html = await client.GetStringAsync(url);

            var cq    = new CQ(html);
            var title = cq["#J_Title h3"].Attr("data-title").Trim();
            //china url
            var match = Regex.Match(html, @"descUrl\s*\:\slocation.*(?<url>\/\/.*?)',").Groups["url"];

            //world url
            if (!match.Success)
            {
                match = Regex.Match(html, "descUrlSSL\\s*\\:\\s*\"(?<url>\\/\\/.*?)\".*").Groups["url"];
            }

            var descUrl = match.Value;

            if (descUrl.StartsWith("//"))
            {
                descUrl = $"https:{descUrl}";
            }

            var descJsonp = await client.GetStringAsync(descUrl);

            var descHtml = Regex.Match(descJsonp, @"var\s*desc\s*=\s*'\s*(?<html>[\s\S]*)\s*'\s*;")
                           .Groups["html"].Value;
            var descCq  = new CQ(descHtml);
            var urlList = await ImageUrlListExtractor.ExtractImageUrlList(descCq);

            var index       = 0;
            var newTaskData = urlList.Select(t =>
            {
                var filename = $"{index++}_{t.Substring(t.LastIndexOf('/') + 1)}";
                if (filename.Contains("?"))
                {
                    filename = filename.Substring(0, filename.IndexOf('?'));
                }

                var data = new DownloadTaskData
                {
                    RelativeFilename = $"{title}/{filename}",
                    Url = t
                };
                return(data);
            }).ToList();

            if (DbContextProvider != null)
            {
                var db = await DbContextProvider.Get();

                var record = await db.TaobaoItems.FirstOrDefaultAsync(t => t.ItemId == taskData.ItemId, ct);

                if (record == null)
                {
                    record = new TaobaoItem
                    {
                        ItemId = taskData.ItemId
                    };
                    db.Add(record);
                }

                record.LastCheckDt = DateTime.Now;
                await db.SaveChangesAsync(ct);
            }

            await TaskDistributor.Distribute(newTaskData);
        }