/// <summary> /// /// </summary> /// <param name="pURL">目标URL,必须参数</param> /// <param name="pFileName">存名称,默认为空,表示由迅雷处理,可选参数</param> /// <param name="pPath">存储目录,默认为空,表示由迅雷处理,可选参数</param> /// <param name="pComments">下载注释,默认为空,可选参数</param> /// <param name="pReferURL">引用页URL,默认为空,可选参数</param> /// <param name="nStartMode">开始模式,0手工开始,1立即开始,默认为 - 1,表示由迅雷处理,可选参数</param> /// <param name="nOnlyFromOrigin">是否只从原始URL下载,1只从原始URL下载,0多资源下载,默认为0,可选参数</param> /// <param name="nOriginThreadCount">原始地址下载线程数,范围1 - 10,默认为 - 1,表示由迅雷处理,可选参数</param> static void AddTask(string pURL, string pFileName, string pPath, string pComments, string pReferURL, int nStartMode, int nOnlyFromOrigin, int nOriginThreadCount) { ThunderAgentLib.AgentClass agentClass = new ThunderAgentLib.AgentClass(); //添加任务:下载http://www.baidu.com/index.html这个文件至C:\baidu.html, //没有注释,没有引用,立即开始,从多资源下载,原始资源线程5 agentClass.AddTask(pURL, pFileName, pPath, pComments, pReferURL, nStartMode, nOnlyFromOrigin, nOriginThreadCount); agentClass.CommitTasks2(1);//提交 }
static void Main(string[] args) { //2014年3月18日获取至4110 //2014年4月18日获取至4174 //2014年5月18日获取至4223 //2014年6月23日获取至4296 //2014年7月30日获取至4380 //2014年8月21日获取至4433 #region Step 1: 找出所有存在的页面(即返回代码为200的),把生成的程序放在多个文件夹下同时跑,程序运行结束后在文件夹下会得到url.txt,里面保存着所有存在的页面链接;2014年3月18日最新页面为4110 //StreamWriter fw = new StreamWriter("url.txt"); //string baseUrl = "http://www.meizitu.com/a/"; //for (int i = 4381; i < 4434; i++) //{ // string url = baseUrl + i + ".html"; // try // { // HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(url); // httpWebRequest.Method = "GET"; // HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); // if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK)) // { // fw.WriteLine(url); // } // httpWebResponse.Close(); // } // catch (WebException ex) // { // HttpWebResponse response = (HttpWebResponse)ex.Response; // if (response != null) //排除对象为空的错误 // { // Console.WriteLine(response.StatusCode); // response.Close(); // } // } // finally // { // Console.WriteLine(i); // } //} //fw.Close(); #endregion #region Step 2: 根据url.txt爬取页面中的妹子图片 ThunderAgentLib.AgentClass agent = new ThunderAgentLib.AgentClass(); StreamReader fr = new StreamReader("url.txt"); List <string> links = new List <string>(); while (!fr.EndOfStream) { links.Add(fr.ReadLine()); } fr.Close(); foreach (string link in links) { try { HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(link); httpWebRequest.Method = "GET"; HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024), Encoding.GetEncoding("gb2312")); string htmlContent = reader.ReadToEnd(); httpWebResponse.Close(); reader.Close(); int startIndex = 0; startIndex = htmlContent.IndexOf("<title>"); int endIndex = htmlContent.IndexOf(" | 妹子图"); string title = htmlContent.Substring(startIndex + 7, endIndex - startIndex - 7); List <string> picLinks = new List <string>(); do { startIndex = htmlContent.IndexOf("src=\"http://www.meizitu.com/wp-content/uploads/", startIndex); if (startIndex != -1) { endIndex = htmlContent.IndexOf(".jpg", startIndex); startIndex += 5; string picLink = htmlContent.Substring(startIndex, endIndex + 4 - startIndex); if (picLink.IndexOf("limg.jpg") == -1 && picLink.IndexOf("hezuo") == -1) { picLinks.Add(picLink); } } else { break; } } while (true); int picLinkIndex = 0; foreach (string picLink in picLinks) { //string fileName = basePath + title + "_" + picLinkIndex + ".jpg"; string fileName = title + "_" + picLinkIndex + ".jpg"; if (!shouldDownloadSet.ContainsKey(picLink)) { agent.AddTask(picLink, fileName, "D:\\Download\\", "", "", 1, 0, 1); shouldDownloadSet.Add(picLink, fileName); //WebClient wc = new WebClient(); //wc.DownloadFileAsync(new Uri(picLink), fileName); //wc.DownloadFileCompleted += wc_DownloadFileCompleted; } picLinkIndex++; } } catch (Exception ex) { Console.WriteLine(link + "出错!"); } } Console.WriteLine("共找到" + shouldDownloadSet.Count + "张图片!"); agent.CommitTasks2(1); Console.WriteLine("开始使用迅雷下载图片!等待下载完成……"); Console.Read(); //此部分代码作用是把要下载的图片链接和文件名的映射保存到文件Download.txt中,手工拷入迅雷中下载;在调用迅雷的API后可能没有必要了 //StreamWriter fw = new StreamWriter("Download.txt"); //foreach (KeyValuePair<string, string> fileName in shouldDownloadSet) //{ // fw.WriteLine(fileName.Key + " " + fileName.Value); //} //fw.Close(); //StreamWriter fw2 = new StreamWriter("Download2.txt"); //foreach (KeyValuePair<string, string> fileName in shouldDownloadSet) //{ // fw2.WriteLine(fileName.Key); //} //fw2.Close(); #endregion #region Step 3: 根据Download.txt和保存图片的文件夹,重新下载文件大小为0的图片 //string[] fileNameList = Directory.GetFiles("MeiZiTu/"); //List<string> needReDownloadFileList = new List<string>(); //foreach (string fileName in fileNameList) //{ // FileInfo file = new FileInfo(fileName); // if (file.Length == 0) // { // needReDownloadFileList.Add(file.Name); // } //} //StreamReader fr = new StreamReader("Download.txt"); //while (!fr.EndOfStream) //{ // string rawStr = fr.ReadLine(); // int splitIndex = rawStr.IndexOf(".jpg MeiZiTu"); // string key = rawStr.Substring(0, splitIndex + 4); // string value = rawStr.Substring(splitIndex + 13); // if (!shouldDownloadSet.ContainsKey(value)) // { // shouldDownloadSet.Add(value, key); // } //} //foreach (string fileName in needReDownloadFileList) //{ // string url = shouldDownloadSet[fileName]; // WebClient wc = new WebClient(); // wc.DownloadFileAsync(new Uri(url), fileName); // wc.DownloadFileCompleted += wc_DownloadFileCompleted; //} //Console.Read(); #endregion }
static void Main(string[] args) { //2014年3月18日获取至4110 //2014年4月18日获取至4174 //2014年5月18日获取至4223 //2014年6月23日获取至4296 //2014年7月30日获取至4380 //2014年8月21日获取至4433 #region Step 1: 找出所有存在的页面(即返回代码为200的),把生成的程序放在多个文件夹下同时跑,程序运行结束后在文件夹下会得到url.txt,里面保存着所有存在的页面链接;2014年3月18日最新页面为4110 //StreamWriter fw = new StreamWriter("url.txt"); //string baseUrl = "http://www.meizitu.com/a/"; //for (int i = 4381; i < 4434; i++) //{ // string url = baseUrl + i + ".html"; // try // { // HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(url); // httpWebRequest.Method = "GET"; // HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); // if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK)) // { // fw.WriteLine(url); // } // httpWebResponse.Close(); // } // catch (WebException ex) // { // HttpWebResponse response = (HttpWebResponse)ex.Response; // if (response != null) //排除对象为空的错误 // { // Console.WriteLine(response.StatusCode); // response.Close(); // } // } // finally // { // Console.WriteLine(i); // } //} //fw.Close(); #endregion #region Step 2: 根据url.txt爬取页面中的妹子图片 ThunderAgentLib.AgentClass agent = new ThunderAgentLib.AgentClass(); StreamReader fr = new StreamReader("url.txt"); List<string> links = new List<string>(); while (!fr.EndOfStream) { links.Add(fr.ReadLine()); } fr.Close(); foreach (string link in links) { try { HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(link); httpWebRequest.Method = "GET"; HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024), Encoding.GetEncoding("gb2312")); string htmlContent = reader.ReadToEnd(); httpWebResponse.Close(); reader.Close(); int startIndex = 0; startIndex = htmlContent.IndexOf("<title>"); int endIndex = htmlContent.IndexOf(" | 妹子图"); string title = htmlContent.Substring(startIndex + 7, endIndex - startIndex - 7); List<string> picLinks = new List<string>(); do { startIndex = htmlContent.IndexOf("src=\"http://www.meizitu.com/wp-content/uploads/", startIndex); if (startIndex != -1) { endIndex = htmlContent.IndexOf(".jpg", startIndex); startIndex += 5; string picLink = htmlContent.Substring(startIndex, endIndex + 4 - startIndex); if (picLink.IndexOf("limg.jpg") == -1 && picLink.IndexOf("hezuo") == -1) { picLinks.Add(picLink); } } else { break; } } while (true); int picLinkIndex = 0; foreach (string picLink in picLinks) { //string fileName = basePath + title + "_" + picLinkIndex + ".jpg"; string fileName = title + "_" + picLinkIndex + ".jpg"; if (!shouldDownloadSet.ContainsKey(picLink)) { agent.AddTask(picLink, fileName, "D:\\Download\\", "", "", 1, 0, 1); shouldDownloadSet.Add(picLink, fileName); //WebClient wc = new WebClient(); //wc.DownloadFileAsync(new Uri(picLink), fileName); //wc.DownloadFileCompleted += wc_DownloadFileCompleted; } picLinkIndex++; } } catch (Exception ex) { Console.WriteLine(link + "出错!"); } } Console.WriteLine("共找到" + shouldDownloadSet.Count + "张图片!"); agent.CommitTasks2(1); Console.WriteLine("开始使用迅雷下载图片!等待下载完成……"); Console.Read(); //此部分代码作用是把要下载的图片链接和文件名的映射保存到文件Download.txt中,手工拷入迅雷中下载;在调用迅雷的API后可能没有必要了 //StreamWriter fw = new StreamWriter("Download.txt"); //foreach (KeyValuePair<string, string> fileName in shouldDownloadSet) //{ // fw.WriteLine(fileName.Key + " " + fileName.Value); //} //fw.Close(); //StreamWriter fw2 = new StreamWriter("Download2.txt"); //foreach (KeyValuePair<string, string> fileName in shouldDownloadSet) //{ // fw2.WriteLine(fileName.Key); //} //fw2.Close(); #endregion #region Step 3: 根据Download.txt和保存图片的文件夹,重新下载文件大小为0的图片 //string[] fileNameList = Directory.GetFiles("MeiZiTu/"); //List<string> needReDownloadFileList = new List<string>(); //foreach (string fileName in fileNameList) //{ // FileInfo file = new FileInfo(fileName); // if (file.Length == 0) // { // needReDownloadFileList.Add(file.Name); // } //} //StreamReader fr = new StreamReader("Download.txt"); //while (!fr.EndOfStream) //{ // string rawStr = fr.ReadLine(); // int splitIndex = rawStr.IndexOf(".jpg MeiZiTu"); // string key = rawStr.Substring(0, splitIndex + 4); // string value = rawStr.Substring(splitIndex + 13); // if (!shouldDownloadSet.ContainsKey(value)) // { // shouldDownloadSet.Add(value, key); // } //} //foreach (string fileName in needReDownloadFileList) //{ // string url = shouldDownloadSet[fileName]; // WebClient wc = new WebClient(); // wc.DownloadFileAsync(new Uri(url), fileName); // wc.DownloadFileCompleted += wc_DownloadFileCompleted; //} //Console.Read(); #endregion }