static void GetPicUrlsFromBeautyPersonalPage(ImageTag imgNode, int fileIndex, int type) { if (imgNode.Attributes.ContainsKey("SRC") || imgNode.Attributes.ContainsKey("DATA-CFSRC")) { string imgUrl = imgNode.Attributes.ContainsKey("SRC") ? imgNode.GetAttribute("SRC") : imgNode.GetAttribute("DATA-CFSRC"); //2014年5月16日根据网页结构修改 //if (imgUrl.Contains("/250x0/")) //{ // imgUrl = imgUrl.Substring(imgUrl.IndexOf("/250x0/") + 7); // imgUrl = "http://" + imgUrl; //} //int startIndex = imgUrl.IndexOf("/media.curator.im/images/"); //imgUrl = "http:/" + imgUrl.Substring(startIndex); if (!imgFileNameSet.Contains(imgUrl)) { string imgName = ""; if (imgNode.Attributes.ContainsKey("ALT")) { imgName = imgNode.GetAttribute("ALT"); if (type == 2)//type为2是爬取“正妹流”中的妹子的网页的情况 { imgName = imgName.Substring(4); } } else { Console.WriteLine("第" + fileIndex + "张图片无法获取alt属性!"); return; } imgFileNameSet.Add(imgUrl); //因为要把美女的名字作为文件夹名,所以要排除所有不能用于文件夹的字符 int invalideCharIndex = imgName.IndexOfAny(Path.GetInvalidPathChars()); while (invalideCharIndex != -1) { imgName = imgName.Remove(invalideCharIndex, 1); invalideCharIndex = imgName.IndexOfAny(Path.GetInvalidPathChars()); } //因为要把美女的名字作为文件名,所以要排除所有不能用于文件名的字符 invalideCharIndex = imgName.IndexOfAny(Path.GetInvalidFileNameChars()); while (invalideCharIndex != -1) { imgName = imgName.Remove(invalideCharIndex, 1); invalideCharIndex = imgName.IndexOfAny(Path.GetInvalidFileNameChars()); } string completeImgName = type == 1 ? saveOneDayOneBeautyBasePath + imgName : saveBeautyFlowBasePath + imgName;//和上面类似,用type来区别图片保存路径 if (!Directory.Exists(completeImgName)) { Directory.CreateDirectory(completeImgName); } currentImgFileNameSet.Add(imgUrl, completeImgName + "\\" + imgName + " (" + fileIndex + ").jpg"); thunderAgent.AddTask2(imgUrl, imgName + " (" + fileIndex + ").jpg", "D:\\Download\\" + completeImgName + "\\", "", "", 1, 0, 1); fileIndex++; } } else { Console.WriteLine("无法获取第" + fileIndex + "张图片!"); return; } }
private void ProcessImageTag(ImageTag obTag) { ImageData obImageData = new ImageData(this.m_obPageData, obTag); m_obPageData.m_ImageLinks.Add(obImageData); }