예제 #1
0
 /// <summary>
 /// 抓取网页内容
 /// </summary>
 /// <param name="id">数据的ID,源自网页中的ID</param>
 /// <param name="url">抓取的URl</param>
 /// <returns></returns>
 private bool CatchContent(long id, string url)
 {
     dataEncoding = cbbEncoding.Text;
     data         = WebAccess.Request(url, string.Empty, WebAccess.WebAccessMethod.POST, "text\\html", null, dataEncoding);
     if (!string.IsNullOrEmpty(data) && data.Contains(txtFilter.Text) && !data.Contains(txtOppFilter.Text))
     {
         try
         {
             //匹配过滤
             item.ID         = id.ToString();
             item.Title      = FindText.Find(data, config.GetConfig("Title"));
             item.Author     = FindText.Find(data, config.GetConfig("Author"));
             item.CreateDate = FindText.Find(data, config.GetConfig("CreateDate"));
             item.ImageUrl   = FindText.Find(data, config.GetConfig("ImageUrl"));
             item.Content    = FindText.Find(data, config.GetConfig("Content"));
             //处理HTML内容
             if (cbReplaceHtml.Checked)
             {
                 item.Content = HtmlText.ToText(item.Content);
             }
             int contentLength = Convert.ToInt32(txtContentLength.Text);
             if (!string.IsNullOrEmpty(item.Title) && item.Content.Length > contentLength)
             {
                 resultFileName = string.Format("{0}//{1}.txt", OUTPUT_PATH, CommonFile.RemoveInvalidChar(item.Title));
                 System.IO.File.WriteAllText(resultFileName, item.ToString());
                 return(true);
             }
         }
         catch (Exception ex)
         {
             txtMessage.Text = ex.Message;
         }
     }
     return(false);
 }