static void Main(string[] args) { #region 获取包含所有日志链接的HTML文本 //string baseQueryUrl = "http://www.douban.com/people/oranjeruud/notes?start="; //for (int i = 0; i < 23; i++) //{ // string queryUrl = baseQueryUrl + (i * 10); // HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(queryUrl); // httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95"; // HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();//取得响应 // if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))//若响应状态码为200,说明成功,可以分析得到的数据 // { // StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024)); // GetBlogLink(reader.ReadToEnd()); // reader.Close(); // } // Thread.Sleep(3000); //} #endregion #region 根据获得的日志链接获取日志网页和mht文件 Message msg = new CDO.MessageClass(); CDO.Configuration c = new CDO.ConfigurationClass(); msg.Configuration = c; StreamReader reader = new StreamReader("links.txt"); int i = 0; while (!reader.EndOfStream) { i++; string link = reader.ReadLine(); HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(link); httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95"; HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); //取得响应 if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK)) //若响应状态码为200,说明成功,可以分析得到的数据 { StreamReader httpReader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024)); string htmlContent = httpReader.ReadToEnd(); httpReader.Close(); string title = GetBlogTitle(htmlContent); #region 保存为html文件 StreamWriter writer = new StreamWriter(i + ".html"); writer.Write(htmlContent); writer.Close(); #endregion #region 保存为mht文件 //msg.HTMLBody = htmlContent; msg.CreateMHTMLBody(link, CDO.CdoMHTMLFlags.cdoSuppressNone, "", ""); ADODB.Stream stream = msg.GetStream(); stream.SaveToFile(title + ".mht", ADODB.SaveOptionsEnum.adSaveCreateOverWrite); #endregion } if (i == 1) { break; } } reader.Close(); #endregion }
//下载并存为mht格式 public string DownToMHT(string url, string vpath) { if (!url.Contains("://")) { throw new Exception("地址必须以http或https开头"); } vpath = SafeSC.PathDeal(vpath).Replace("#", "井").Replace("-", "");//去除掉空格等,否则客户端打开会报错 string ppath = function.VToP(vpath); string dir = Path.GetDirectoryName(ppath); if (!Directory.Exists(dir)) { Directory.CreateDirectory(dir); } CDO.Message msg = new CDO.MessageClass(); CDO.Configuration c = new CDO.ConfigurationClass(); msg.Configuration = c; msg.CreateMHTMLBody(url, CdoMHTMLFlags.cdoSuppressNone, "", "");//cdoSuppressNone=将全部资源都打包进入,CdoMHTMLFlags.cdoSuppressAll=只保留纯页面 msg.GetStream().SaveToFile(ppath, ADODB.SaveOptionsEnum.adSaveCreateOverWrite); return(vpath); }
static void Main(string[] args) { #region 获取包含所有日志链接的HTML文本 //string baseQueryUrl = "http://www.douban.com/people/oranjeruud/notes?start="; //for (int i = 0; i < 23; i++) //{ // string queryUrl = baseQueryUrl + (i * 10); // HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(queryUrl); // httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95"; // HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();//取得响应 // if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))//若响应状态码为200,说明成功,可以分析得到的数据 // { // StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024)); // GetBlogLink(reader.ReadToEnd()); // reader.Close(); // } // Thread.Sleep(3000); //} #endregion #region 根据获得的日志链接获取日志网页和mht文件 Message msg = new CDO.MessageClass(); CDO.Configuration c = new CDO.ConfigurationClass(); msg.Configuration = c; StreamReader reader = new StreamReader("links.txt"); int i = 0; while (!reader.EndOfStream) { i++; string link = reader.ReadLine(); HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(link); httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95"; HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();//取得响应 if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))//若响应状态码为200,说明成功,可以分析得到的数据 { StreamReader httpReader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024)); string htmlContent = httpReader.ReadToEnd(); httpReader.Close(); string title = GetBlogTitle(htmlContent); #region 保存为html文件 StreamWriter writer = new StreamWriter(i + ".html"); writer.Write(htmlContent); writer.Close(); #endregion #region 保存为mht文件 //msg.HTMLBody = htmlContent; msg.CreateMHTMLBody(link, CDO.CdoMHTMLFlags.cdoSuppressNone, "", ""); ADODB.Stream stream = msg.GetStream(); stream.SaveToFile(title + ".mht", ADODB.SaveOptionsEnum.adSaveCreateOverWrite); #endregion } if (i == 1) break; } reader.Close(); #endregion }