private static void ScanCategoryPageUrl(string url, string cate, int current, int total, List <string> scans = null) { var htmlRes = JavCookieContanierHelper(url); if (htmlRes.Success) { HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(htmlRes.Content); var videoPath = "//div[@class='video']"; var videoNodes = htmlDocument.DocumentNode.SelectNodes(videoPath); if (videoNodes != null) { int unScanCount = 0; foreach (var node in videoNodes) { var urlAndTitle = node.ChildNodes[0]; if (urlAndTitle != null && urlAndTitle.ChildNodes.Count >= 3) { var id = urlAndTitle.ChildNodes[0].InnerText.Trim(); var name = FileUtility.ReplaceInvalidChar(urlAndTitle.ChildNodes[2].InnerText.Trim()); var avUrl = urlAndTitle.Attributes["href"].Value.Trim().Replace("./", "http://www.javlibrary.com/cn/"); if (!string.IsNullOrEmpty(avUrl) && !string.IsNullOrEmpty(name) && !string.IsNullOrWhiteSpace(id)) { ScanURL scan = new ScanURL { Category = url, ID = id, IsDownload = false, Title = name, URL = avUrl }; if (!JavDataBaseManager.HasScan(scan)) { unScanCount++; JavDataBaseManager.InsertScanURL(scan); if (scans != null) { scans.Add(avUrl); } } } } } Console.WriteLine(cate + " " + url + " 扫描了 " + unScanCount + " 未扫描, 进度" + current + " / " + total); } } }
private static void ScanCategoryPageUrlSingleThread(Dictionary <string, string> urls) { int index = 1; foreach (var url in urls) { int retry = 1; var htmlRes = new Utils.HtmlResponse(); //如果取不到cookie最多重试5次 while (retry <= 5) { htmlRes = HtmlManager.GetHtmlWebClientWithRenewCC("http://www.javlibrary.com/cn/", url.Key, cc); if (htmlRes.IsExpire) { GetJavCookie(); retry++; continue; } else { break; } } if (htmlRes.Success) { HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(htmlRes.Content); var videoPath = "//div[@class='video']"; var videoNodes = htmlDocument.DocumentNode.SelectNodes(videoPath); if (videoNodes != null) { int unScanCount = 0; foreach (var node in videoNodes) { var urlAndTitle = node.ChildNodes[0]; if (urlAndTitle != null && urlAndTitle.ChildNodes.Count >= 3) { var id = urlAndTitle.ChildNodes[0].InnerText.Trim(); var name = FileUtility.ReplaceInvalidChar(urlAndTitle.ChildNodes[2].InnerText.Trim()); var avUrl = urlAndTitle.Attributes["href"].Value.Trim().Replace("./", "http://www.javlibrary.com/cn/"); if (!string.IsNullOrEmpty(avUrl) && !string.IsNullOrEmpty(name) && !string.IsNullOrWhiteSpace(id)) { ScanURL scan = new ScanURL { Category = url.Value, ID = id, IsDownload = false, Title = name, URL = avUrl }; if (!JavDataBaseManager.HasScan(scan)) { unScanCount++; JavDataBaseManager.InsertScanURL(scan); } } } } Console.WriteLine(url.Value + " 第 " + index + " / " + urls.Count + " 页, 加入" + unScanCount + " 条未扫描AV"); index++; } } else { Console.WriteLine("获取列表页 " + url.Key + " 内容失败"); } } }
public static RecurModel RecursiveHelper(string url, string category, int currentCategory, int totalCategories, int currentPage, CookieContainer cc, bool isUpdate = false) { try { var ret = InitHelper.InitManager.UpdateCookie(cc, url); cc = ret.CC; var res = ret.Content; List <ScanURL> temp = new List <ScanURL>(); int totalPage = currentPage; if (res.Success) { MatchCollection m = null; if (isUpdate) { m = Regex.Matches(res.Content, updateLastPage, RegexOptions.Multiline | RegexOptions.IgnoreCase); } else { m = Regex.Matches(res.Content, listLastPage, RegexOptions.Multiline | RegexOptions.IgnoreCase); } if (m.Count > 0) { Match first = m[0]; var str = first.Groups[1].Value.Replace("\\\">", ""); totalPage = int.Parse(str.Substring(str.LastIndexOf("=") + 1)); } Console.WriteLine(string.Format("Start process list of {0}, page {1}/{2}, categories {3}/{4}", category, currentPage, totalPage, currentCategory, totalCategories)); if (res.Success) { m = Regex.Matches(res.Content, listPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { ScanURL s = new ScanURL { Category = category, CreateTime = DateTime.Now, ID = item.Groups[3].Value, IsDownload = false, Title = FileUtility.ReplaceInvalidChar(item.Groups[2].Value.Replace(item.Groups[3].Value + " ", "")), URL = item.Groups[1].Value }; temp.Add(s); } foreach (var scan in temp) { if (!JavDataBaseManager.HasScan(scan)) { JavDataBaseManager.InsertScanURL(scan); } } if (isUpdate) { m = Regex.Matches(res.Content, updatePageNext, RegexOptions.Multiline | RegexOptions.IgnoreCase); } else { m = Regex.Matches(res.Content, listPageNext, RegexOptions.Multiline | RegexOptions.IgnoreCase); } if (m.Count > 0) { Match first = m[0]; if (isUpdate) { ForUpdate.AddRange(temp); } return(new RecurModel { Url = prefix + first.Groups[1].Value, Cc = cc }); } else { return(new RecurModel { Url = "", Cc = cc }); } } } else { _logger.WriteExceptionLog(url, string.Format("Scan failed")); return(new RecurModel { Url = "", Cc = cc }); } } catch (Exception e) { _logger.WriteExceptionLog(url, string.Format("Scan error {0}", e.ToString())); } return(new RecurModel { Url = url, Cc = cc }); }