protected override async Task HandleInternalUnstatable(TaobaoGetItemListTaskData taskData, CancellationToken ct) { var client = await GetHttpClient(); var url = string.Format(Options.Value.ListUrlTpl, taskData.Page); var rsp = await client.GetAsync(url, ct); var html = rsp.StatusCode == HttpStatusCode.Redirect ? await client.GetStringAsync(rsp.Headers.Location) : await rsp.Content.ReadAsStringAsync(); var cq = new CQ(html.Replace("\\\"", "\"")); var searchResultSpan = cq[".search-result span"]; var count = int.Parse(searchResultSpan.Text().Trim()); if (count > 0) { var itemIds = cq[".shop-filter"].NextAll().Children(".item").Select(t => t.GetAttribute("data-id")) .ToList(); if (itemIds.Any()) { var newTaskData = new List <TaskData> { new TaobaoGetItemListTaskData { Page = taskData.Page + 1 } }; if (DbContextProvider != null) { var db = await DbContextProvider.Get(); var soonestCheckDt = DateTime.Now.AddDays(-7); var skippedItems = await db.TaobaoItems .Where(t => itemIds.Contains(t.ItemId) && t.LastCheckDt > soonestCheckDt) .Select(a => a.ItemId).ToListAsync(ct); itemIds.RemoveAll(t => skippedItems.Contains(t)); } if (itemIds.Any()) { newTaskData.AddRange(itemIds .Select(t => new TaobaoGetItemTaskData { ItemId = t }).ToList()); } await TaskDistributor.Distribute(newTaskData); } } }
protected override async Task HandleInternalUnstatable(TaobaoGetItemTaskData taskData, CancellationToken ct) { var client = await GetHttpClient(); var url = string.Format(Options.Value.UrlTemplate, taskData.ItemId); var html = await client.GetStringAsync(url); var cq = new CQ(html); var title = cq["#J_Title h3"].Attr("data-title").Trim(); //china url var match = Regex.Match(html, @"descUrl\s*\:\slocation.*(?<url>\/\/.*?)',").Groups["url"]; //world url if (!match.Success) { match = Regex.Match(html, "descUrlSSL\\s*\\:\\s*\"(?<url>\\/\\/.*?)\".*").Groups["url"]; } var descUrl = match.Value; if (descUrl.StartsWith("//")) { descUrl = $"https:{descUrl}"; } var descJsonp = await client.GetStringAsync(descUrl); var descHtml = Regex.Match(descJsonp, @"var\s*desc\s*=\s*'\s*(?<html>[\s\S]*)\s*'\s*;") .Groups["html"].Value; var descCq = new CQ(descHtml); var urlList = await ImageUrlListExtractor.ExtractImageUrlList(descCq); var index = 0; var newTaskData = urlList.Select(t => { var filename = $"{index++}_{t.Substring(t.LastIndexOf('/') + 1)}"; if (filename.Contains("?")) { filename = filename.Substring(0, filename.IndexOf('?')); } var data = new DownloadTaskData { RelativeFilename = $"{title}/{filename}", Url = t }; return(data); }).ToList(); if (DbContextProvider != null) { var db = await DbContextProvider.Get(); var record = await db.TaobaoItems.FirstOrDefaultAsync(t => t.ItemId == taskData.ItemId, ct); if (record == null) { record = new TaobaoItem { ItemId = taskData.ItemId }; db.Add(record); } record.LastCheckDt = DateTime.Now; await db.SaveChangesAsync(ct); } await TaskDistributor.Distribute(newTaskData); }