public static CookieContainer StartDownload(string url, string oriURL, int currentItem, int totalItem, CookieContainer cc) { var ret = InitHelper.InitManager.UpdateCookie(cc, url); cc = ret.CC; var res = ret.Content; try { if (res.Success) { _logger.WriteLog(url, string.IsNullOrWhiteSpace(res.Content) ? "空" : res.Content); Console.WriteLine(string.Format("Start to download {0}, {1}/{2}", oriURL, currentItem, totalItem)); AV av = new AV(); var m = Regex.Matches(res.Content, detailIDPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var data = item.Groups[1].Value; Console.WriteLine(string.Format("Get AV {0}, ID -> {1}", url, data)); av.ID = data; } m = Regex.Matches(res.Content, detailTitlePattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var data = item.Groups[2].Value.Replace(av.ID + " ", ""); Console.WriteLine(string.Format("Get AV {0}, Title -> {1}", url, data)); av.Name = FileUtility.ReplaceInvalidChar(data); } m = Regex.Matches(res.Content, detailImgPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var data = item.Groups[1].Value.StartsWith("http") ? item.Groups[1].Value : "http:" + item.Groups[1].Value; Console.WriteLine(string.Format("Get AV {0}, IMG -> {1}", url, data)); av.PictureURL = data; } m = Regex.Matches(res.Content, detailDatePattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var data = item.Groups[1].Value; Console.WriteLine(string.Format("Get AV {0}, Date -> {1}", url, data)); av.ReleaseDate = DateTime.Parse(data); } m = Regex.Matches(res.Content, detailLengthPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var data = item.Groups[1].Value; Console.WriteLine(string.Format("Get AV {0}, Length -> {1}", url, data)); av.AvLength = int.Parse(data); } m = Regex.Matches(res.Content, detailDirectorPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var u = item.Groups[1].Value; var data = item.Groups[2].Value; Console.WriteLine(string.Format("Get AV {0}, Director -> {1}", url, data)); av.Director += data + ","; Director d = new Director { CreateTime = DateTime.Now, Name = data, URL = prefix + directorPrefix + u }; if (!JavDataBaseManager.HasDirector(d.URL)) { JavDataBaseManager.InsertDirector(d); } } m = Regex.Matches(res.Content, detailCompanyPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var u = item.Groups[1].Value; var data = item.Groups[2].Value; Console.WriteLine(string.Format("Get AV {0}, Company -> {1}", url, data)); av.Company += data + ","; Company c = new Company { CreateTime = DateTime.Now, Name = data, URL = prefix + companyPrefix + u }; if (!JavDataBaseManager.HasCompany(c.URL)) { JavDataBaseManager.InsertCompany(c); } } m = Regex.Matches(res.Content, detailPublisherPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var u = item.Groups[1].Value; var data = item.Groups[2].Value; Console.WriteLine(string.Format("Get AV {0}, Publisher -> {1}", url, data)); av.Publisher += data + ","; Publisher p = new Publisher { CreateTime = DateTime.Now, Name = data, URL = prefix + publisherPrefix + u }; if (!JavDataBaseManager.HasPublisher(p.URL)) { JavDataBaseManager.InsertPublisher(p); } } m = Regex.Matches(res.Content, detailCategoryPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var data = item.Groups[2].Value; Console.WriteLine(string.Format("Get AV {0}, Category -> {1}", url, data)); av.Category += data + ","; } m = Regex.Matches(res.Content, detailActressPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); foreach (Match item in m) { var u = item.Groups[1].Value; var data = item.Groups[2].Value; Console.WriteLine(string.Format("Get AV {0}, Actress -> {1}", url, data)); av.Actress += data + ","; Actress a = new Actress { CreateTime = DateTime.Now, Name = data, URL = prefix + actressPrefix + u }; if (!JavDataBaseManager.HasActress(a.URL)) { JavDataBaseManager.InsertActress(a); } } //m = Regex.Matches(res.Content, detailCommentPattern, RegexOptions.Multiline | RegexOptions.IgnoreCase); //foreach (Match item in m) //{ // var data = item.Groups[1].Value; // Console.WriteLine(string.Format("Get AV {0}, Comments -> {1}", url, data)); // Comments c = new Comments // { // Comment = data, // AvID = av.ID, // AvTitle = av.Name, // CreateTime = DateTime.Now // }; // if (!JavDataBaseManager.HasComment(c)) // { // JavDataBaseManager.InsertComment(c); // } //} av.URL = url; if (!JavDataBaseManager.HasAv(av.URL)) { JavDataBaseManager.InsertAV(av); } string result = ""; if (!File.Exists(imgFolder + av.ID + av.Name + ".jpg")) { result = Utils.DownloadHelper.DownloadFile(av.PictureURL, imgFolder + av.ID + av.Name + ".jpg"); } JavDataBaseManager.UpdateScanURL(oriURL); if (!string.IsNullOrEmpty(result)) { _logger.WriteExceptionLog(url, string.Format("Download picture failed {0}", imgFolder + av.ID + av.Name + ".jpg")); } } else { _logger.WriteExceptionLog(url, string.Format("Download failed {0}", url)); } } catch (Exception e) { _logger.WriteExceptionLog(url, string.Format("Download failed {0}", e.ToString())); } return(cc); }
private static AV GenerateAVModel(string html, string avUrl) { AV av = new AV(); HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument(); htmlDocument.LoadHtml(html); var titlePath = "//h3[@class='post-title text']"; var picPath = "//img[@id='video_jacket_img']"; var releasdPath = "//div[@id='video_date']//td[@class='text']"; var lengthPath = "//div[@id='video_length']//span[@class='text']"; var dirPath = "//span[@class='director']//a"; var comPath = "//span[@class='maker']//a"; var pubPath = "//span[@class='label']//a"; var catPath = "//span[@class='genre']//a"; var staPath = "//span[@class='star']//a"; var titleNode = htmlDocument.DocumentNode.SelectSingleNode(titlePath); var title = titleNode.InnerText.Trim(); var id = title.Substring(0, title.IndexOf(" ")); title = FileUtility.ReplaceInvalidChar(title.Substring(title.IndexOf(" ") + 1)); var picUrl = htmlDocument.DocumentNode.SelectSingleNode(picPath); av.URL = avUrl; av.PictureURL = picUrl.Attributes["src"].Value; av.PictureURL = av.PictureURL.StartsWith("http") ? av.PictureURL : "http:" + av.PictureURL; av.Name = title; av.ID = id; var release = htmlDocument.DocumentNode.SelectSingleNode(releasdPath); DateTime rDate = new DateTime(2050, 1, 1); if (release != null && !string.IsNullOrEmpty(release.InnerText)) { DateTime.TryParse(release.InnerText.Trim(), out rDate); if (rDate <= DateTime.MinValue) { rDate = new DateTime(2050, 1, 1); } } av.ReleaseDate = rDate; var length = htmlDocument.DocumentNode.SelectSingleNode(lengthPath); if (length != null && !string.IsNullOrEmpty(length.InnerText)) { av.AvLength = int.Parse(length.InnerText.Trim()); } var dirNode = htmlDocument.DocumentNode.SelectNodes(dirPath); if (dirNode != null) { foreach (var dir in dirNode) { var name = dir.InnerHtml.Trim(); var url = "http://www.javlibrary.com/cn/" + dir.Attributes["href"].Value; Director d = new Director { CreateTime = DateTime.Now, Name = name, URL = url }; if (!JavDataBaseManager.HasDirector(d.URL)) { JavDataBaseManager.InsertDirector(d); } av.Director += name + ","; } } var comNode = htmlDocument.DocumentNode.SelectNodes(comPath); if (comNode != null) { foreach (var com in comNode) { var name = com.InnerHtml.Trim(); var url = "http://www.javlibrary.com/cn/" + com.Attributes["href"].Value; Company c = new Company { CreateTime = DateTime.Now, Name = name, URL = url }; if (!JavDataBaseManager.HasCompany(c.URL)) { JavDataBaseManager.InsertCompany(c); } av.Company += name + ","; } } var pubNode = htmlDocument.DocumentNode.SelectNodes(pubPath); if (pubNode != null) { foreach (var pub in pubNode) { var name = pub.InnerHtml.Trim(); var url = "http://www.javlibrary.com/cn/" + pub.Attributes["href"].Value; Publisher p = new Publisher { CreateTime = DateTime.Now, Name = name, URL = url }; if (!JavDataBaseManager.HasPublisher(p.URL)) { JavDataBaseManager.InsertPublisher(p); } av.Publisher += name + ","; } } var catNodes = htmlDocument.DocumentNode.SelectNodes(catPath); if (catNodes != null) { foreach (var cat in catNodes) { var name = cat.InnerHtml.Trim(); var url = "http://www.javlibrary.com/cn/" + cat.Attributes["href"].Value; av.Category += name + ","; } } var starNodes = htmlDocument.DocumentNode.SelectNodes(staPath); if (starNodes != null) { foreach (var star in starNodes) { var name = star.InnerHtml.Trim(); var url = "http://www.javlibrary.com/cn/" + star.Attributes["href"].Value; Actress a = new Actress { CreateTime = DateTime.Now, Name = name, URL = url }; if (!JavDataBaseManager.HasActress(a.URL)) { JavDataBaseManager.InsertActress(a); } av.Actress += name + ","; } } return(av); }