Пример #1
0
        static void Main(string[] args)
        {
            #region 获取包含所有日志链接的HTML文本
            //string baseQueryUrl = "http://www.douban.com/people/oranjeruud/notes?start=";
            //for (int i = 0; i < 23; i++)
            //{
            //    string queryUrl = baseQueryUrl + (i * 10);
            //    HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(queryUrl);
            //    httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95";
            //    HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();//取得响应
            //    if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))//若响应状态码为200,说明成功,可以分析得到的数据
            //    {
            //        StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024));
            //        GetBlogLink(reader.ReadToEnd());
            //        reader.Close();
            //    }
            //    Thread.Sleep(3000);
            //}
            #endregion

            #region 根据获得的日志链接获取日志网页和mht文件
            Message           msg = new CDO.MessageClass();
            CDO.Configuration c   = new CDO.ConfigurationClass();
            msg.Configuration = c;
            StreamReader reader = new StreamReader("links.txt");
            int          i      = 0;
            while (!reader.EndOfStream)
            {
                i++;
                string         link           = reader.ReadLine();
                HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(link);
                httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95";
                HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); //取得响应
                if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))                        //若响应状态码为200,说明成功,可以分析得到的数据
                {
                    StreamReader httpReader  = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024));
                    string       htmlContent = httpReader.ReadToEnd();
                    httpReader.Close();
                    string title = GetBlogTitle(htmlContent);
                    #region 保存为html文件
                    StreamWriter writer = new StreamWriter(i + ".html");
                    writer.Write(htmlContent);
                    writer.Close();
                    #endregion
                    #region 保存为mht文件
                    //msg.HTMLBody = htmlContent;
                    msg.CreateMHTMLBody(link, CDO.CdoMHTMLFlags.cdoSuppressNone, "", "");
                    ADODB.Stream stream = msg.GetStream();
                    stream.SaveToFile(title + ".mht", ADODB.SaveOptionsEnum.adSaveCreateOverWrite);
                    #endregion
                }
                if (i == 1)
                {
                    break;
                }
            }
            reader.Close();
            #endregion
        }
Пример #2
0
        //下载并存为mht格式
        public string DownToMHT(string url, string vpath)
        {
            if (!url.Contains("://"))
            {
                throw new Exception("地址必须以http或https开头");
            }
            vpath = SafeSC.PathDeal(vpath).Replace("#", "井").Replace("-", "");//去除掉空格等,否则客户端打开会报错
            string ppath = function.VToP(vpath);
            string dir   = Path.GetDirectoryName(ppath);

            if (!Directory.Exists(dir))
            {
                Directory.CreateDirectory(dir);
            }
            CDO.Message       msg = new CDO.MessageClass();
            CDO.Configuration c   = new CDO.ConfigurationClass();
            msg.Configuration = c;
            msg.CreateMHTMLBody(url, CdoMHTMLFlags.cdoSuppressNone, "", "");//cdoSuppressNone=将全部资源都打包进入,CdoMHTMLFlags.cdoSuppressAll=只保留纯页面
            msg.GetStream().SaveToFile(ppath, ADODB.SaveOptionsEnum.adSaveCreateOverWrite);
            return(vpath);
        }
Пример #3
0
        static void Main(string[] args)
        {
            #region 获取包含所有日志链接的HTML文本
            //string baseQueryUrl = "http://www.douban.com/people/oranjeruud/notes?start=";
            //for (int i = 0; i < 23; i++)
            //{
            //    string queryUrl = baseQueryUrl + (i * 10);
            //    HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(queryUrl);
            //    httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95";
            //    HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();//取得响应
            //    if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))//若响应状态码为200,说明成功,可以分析得到的数据
            //    {
            //        StreamReader reader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024));
            //        GetBlogLink(reader.ReadToEnd());
            //        reader.Close();
            //    }
            //    Thread.Sleep(3000);
            //}
            #endregion

            #region 根据获得的日志链接获取日志网页和mht文件
            Message msg = new CDO.MessageClass();
            CDO.Configuration c = new CDO.ConfigurationClass();
            msg.Configuration = c;
            StreamReader reader = new StreamReader("links.txt");
            int i = 0;
            while (!reader.EndOfStream)
            {
                i++;
                string link = reader.ReadLine();
                HttpWebRequest httpWebRequest = HttpWebRequest.CreateHttp(link);
                httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95";
                HttpWebResponse httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();//取得响应
                if (httpWebResponse.StatusCode.Equals(HttpStatusCode.OK))//若响应状态码为200,说明成功,可以分析得到的数据
                {
                    StreamReader httpReader = new StreamReader(new BufferedStream(httpWebResponse.GetResponseStream(), 4 * 200 * 1024));
                    string htmlContent = httpReader.ReadToEnd();
                    httpReader.Close();
                    string title = GetBlogTitle(htmlContent);
                    #region 保存为html文件
                    StreamWriter writer = new StreamWriter(i + ".html");
                    writer.Write(htmlContent);
                    writer.Close();
                    #endregion
                    #region 保存为mht文件
                    //msg.HTMLBody = htmlContent;
                    msg.CreateMHTMLBody(link, CDO.CdoMHTMLFlags.cdoSuppressNone, "", "");
                    ADODB.Stream stream = msg.GetStream();
                    stream.SaveToFile(title + ".mht", ADODB.SaveOptionsEnum.adSaveCreateOverWrite);
                    #endregion
                }
                if (i == 1) break;
            }
            reader.Close();
            #endregion
        }