Ejemplo n.º 1
0
        /// <summary>
        /// 保存HTML
        /// </summary>
        /// <param name="args"></param>
        private static void SaveHtmlEvent(DataReceivedEventArgs args)
        {
            Regex  reg     = new Regex(@"(?m)<title[^>]*>(?<title>(?:\w|\W)*?)</title[^>]*>", RegexOptions.Multiline | RegexOptions.IgnoreCase);
            Match  mc      = reg.Match(args.Html);
            string m_title = String.Empty;

            if (mc.Success)
            {
                m_title = mc.Groups["title"].Value.Trim();
            }

            Regex urlRegex = new Regex(@"(?i)http://(\w+\.){1,3}(com(\.cn)?|cn|net)\b");

            //去除域名后
            var shtml = urlRegex.Replace(args.Html, "/File");

            try
            {
                lock (syncRoot)
                {
                    //更新数据库
                    Articles article = new Articles();
                    article.Id       = PrimaryKeyGen.GuidToLongId();
                    article.IsDelete = false;
                    article.Url      = args.Url;
                    article.Title    = m_title;
                    article.Summary  = m_title;
                    article.Content  = shtml;
                    article.AddTime  = DateTime.Now.ToString("yyyyMMdd hh:mm:ss");
                    var saveResult = _IArticlesService.Add(article);

                    if (saveResult)
                    {
                        // 更新索引库
                        IndexTask task = new IndexTask();
                        task.TaskId = article.Id;
                        task.Title  = m_title;
                        //去除回车,空格,换行
                        task.Content = HtmlConverts.ConvertHtml(shtml).Replace("\n", "").Replace(" ", "").Replace("\t", "").Replace("\r", "");;
                        task.Summary = m_title;
                        IndexManager.Instance.AddArticle(task);
                    }
                }
            }
            catch (Exception ex)
            {
                log.ErrorFormat("Url:{0};\r\n错误信息{1}", args.Url, ex.InnerException.Message);
            }
        }
 public void TestHtmlConverts()
 {
     string url         = "http://blog.csdn.net/wangyi1e/article/details/29204987";
     string responseStr = HttpGet(url, "");
     string result      = HtmlConverts.ConvertHtml(responseStr).Replace("\n", "").Replace(" ", "").Replace("\t", "").Replace("\r", "");
 }