/// <summary>
        /// 百度搜索关键词
        /// </summary>
        /// <param name="p"></param>
        public void query(Dnl_Keyword p)
        {
            try
            {
                //获取要过滤的域名
                var builder         = Builders <Dnl_IgnoreDomain> .Filter;
                var excludedDomains = MongoDBHelper.Instance.GetDnl_IgnoreDomain().Find(builder.Empty).ToList();

                //log("加载 {0} 个排除关键词 ".FormatStr(excludedKeywords == null ? 0 : excludedKeywords.Count));

                //var filterbuilder = Builders<IW2S_KeywordFilter>.Filter;
                //var filterfilter = filterbuilder.Eq(x => x.UsrId, p.UsrId) & filterbuilder.Eq(x => x.ProjectId, p.ProjectId);
                //var filterKeywords = MongoDBHelper.Instance.GetIW2S_KeywordFilters().Find(filterfilter).Project(x => new IW2S_ExcludeKeyword
                //{
                //    Keyword = x.Keyword
                //}).ToList();
                //excludedKeywords.AddRange(filterKeywords);

                try
                {
                    //搜索关键词
                    Queries.DnlBaiduSearchQuery baidu = new Queries.DnlBaiduSearchQuery(p.Keyword);
                    baidu.Query(p, excludedDomains);
                }
                catch (Exception ex)
                {
                    log(ex.Message);
                }
            }
            catch (Exception ex)
            {
                log(ex.Message);
            }
        }
        /// <summary>
        /// 获取搜索网址
        /// </summary>
        /// <param name="tsk">要搜索的关键词信息</param>
        /// <returns></returns>
        string get_urls(Dnl_Keyword tsk)
        {
            string searchKeywords = tsk.Keyword.Trim();

            if (!string.IsNullOrEmpty(searchKeywords))
            {
                string baiduUrlFormat = "http://www.baidu.com/s?ie=utf-8&wd={0}";
                return(baiduUrlFormat.FormatStr(searchKeywords.GetUrlEncodedString("utf-8")));
            }
            return(string.Empty);
        }
        /// <summary>
        /// 保存链接信息
        /// </summary>
        /// <param name="link">链接类</param>
        /// <param name="tsk">关键词类</param>
        /// <param name="excludedKeywords">排除词</param>
        public void save_level1_links(List <IW2S_level1link> links, Dnl_Keyword tsk, List <Dnl_IgnoreDomain> excludedKeywords)
        {
            //link = prehandle_data(link ,tsk ,excludedKeywords);
            try
            {
                if (links != null)
                {
                    log("成功保存1条链接: " + tsk.Keyword);
                    return;
                }

                int pagesize = 100;
                int count    = 0;
                var col      = MongoDBHelper.Instance.GetIW2S_level1links();
                var builder  = Builders <Dnl_Link_Baidu> .Filter;
                for (int page = 0; page *pagesize < links.Count; page++)
                {
                    var list = links.Skip(page * pagesize).Take(pagesize).ToList();
                    //list.ForEach(x => x._id = new MongoDB.Bson.ObjectId(IDHelper.GetGuid("{0}/&itemid={1}".FormatStr(x.Domain, x.LinkUrl)).ToString()));
                    list = ListDistinctBy(list, x => x.BizId);

                    FieldsDocument fd = new FieldsDocument();
                    fd.Add("BizId", 1);

                    List <Guid> BizId = list.Select(x => x.BizId).ToList();
                    //var exists_objs = col.Find(builder.In(x => x.BizId, BizId)).Project(x => x.BizId).ToList();
                    List <Guid> exists_ids = new List <Guid>();
                    //foreach (var result in exists_objs)
                    //{
                    //    exists_ids.Add(result);
                    //}
                    if (exists_ids != null && exists_ids.Count > 0)
                    {
                        list = list.Where(x => !exists_ids.Contains(x.BizId)).ToList();
                    }
                    if (list == null || list.Count == 0)
                    {
                        continue;
                    }
                    count += pagesize;

                    col.InsertMany(links);

                    log("SUCCESS saving " + links.Count + " Level 1 Links for " + tsk.Keyword);
                }
            }
            catch (Exception ex)
            {
                log(ex.Message);
                log("保存出错!");
            }
        }
        /// <summary>
        /// 百度搜索
        /// </summary>
        /// <param name="tsk">要搜索的关键词信息</param>
        /// <param name="excludedDomains">排除域名</param>
        public void Query(Dnl_Keyword tsk, List <Dnl_IgnoreDomain> excludedDomains)
        {
            var link = get_urls(tsk);

            try
            {
                GetLinks(link, tsk, excludedDomains);
            }
            catch (Exception ex)
            {
                log(ex.Message + ex.StackTrace);
            }
        }
        /// <summary>
        /// 保存链接
        /// </summary>
        /// <param name="link">链接</param>
        /// <param name="task">关键词</param>
        public void SaveLink(Dnl_Link_Baidu link, Dnl_Keyword task)
        {
            //查询该链接是否已保存过
            var builder = Builders <Dnl_Link_Baidu> .Filter;
            var filter  = builder.Eq(x => x.SearchkeywordId, task._id.ToString()) & builder.Eq(x => x.LinkUrl, link.LinkUrl);
            var col     = MongoDBHelper.Instance.GetDnl_Link_Baidu();
            var query   = col.Find(filter).FirstOrDefault();

            if (query != null)
            {
                Console.WriteLine(DateTime.Now + "  :  " + "该链接已保存 - " + task.Keyword);
            }
            else
            {
                Console.WriteLine(DateTime.Now + "  :  " + "成功保存1条链接 - " + task.Keyword);
                col.InsertOne(link);
            }
        }
        /// <summary>
        /// 抓取搜索页面
        /// </summary>
        /// <param name="link">搜索链接</param>
        /// <param name="tsk">要搜索的关键词信息</param>
        /// <param name="excludedDomains">排除关键词</param>
        void GetLinks(string link, Dnl_Keyword tsk, List <Dnl_IgnoreDomain> excludedDomains)
        {
            string searchKeyword = tsk.Keyword.Trim();

            int nohist_pages = 0;       //未命中页面
            int quried_pages = 0;       //已搜索页面
            int rank         = 1;       //页面中网址排名

            //最多搜索3页
            while (!string.IsNullOrEmpty(link) && quried_pages <= 2)
            {
                log(link);
                //获取搜索页面源码
                var html = get_html(link);
                if (html == null)
                {
                    break;
                }

                //处理百度推广链接
                var propContents = new List <string>();
                if (!string.IsNullOrEmpty(html.SubAfter("content_left").SubAfter("div id=\"400")))
                {
                    propContents = html.SubAfter("content_left").SubAfter("div id=\"400").SubBefore("c-container").SplitWith("div id=\"400").ToList();
                }
                else if (!string.IsNullOrEmpty(html.SubAfter("content_left").SubAfter("divid=\"400")))
                {
                    propContents = html.SubAfter("content_left").SubAfter("divid=\"400").SubBefore("c-container").SplitWith("divid=\"400").ToList();
                }
                foreach (var tag in propContents)
                {
                    var a = tag.SubAfter("h3").SubAfter("a");
                    //获取标题
                    string title = a.SubBefore("</h3>").GetTxtFromHtml2();
                    if (!string.IsNullOrEmpty(title))
                    {
                        title = title.Trim();
                    }
                    string href = a.GetFirstHref2();
                    //获取描述
                    string abs = tag.SubAfter("</h3>").SubBefore("</a").GetTxtFromHtml2();
                    if (string.IsNullOrEmpty(abs))
                    {
                        abs = abs.Trim();
                    }
                    string domain = string.Empty;   //二级域名

                    //没有包含需要protect item信息的过滤掉
                    string txt = "{0}{1}".FormatStr(title, abs);
                    if (string.IsNullOrEmpty(txt))
                    {
                        continue;
                    }

                    HanleTagData(tsk, excludedDomains, searchKeyword, title, href, abs, ref domain, tag, true);
                }

                //获取搜索结果部分页面
                var tags = html.SubAfter("content_left").SplitWith("c-container");
                if (tags == null || tags.Length == 0)
                {
                    log("BLOCKED " + tsk.Keyword);
                    break;
                }
                bool nohit = true;
                foreach (string tag in tags)
                {
                    //获取单个搜索结果信息
                    var a = tag.SubAfter("h3").SubAfter("a");
                    //获取标题
                    string title = a.SubBefore("</h3>").GetTxtFromHtml2();
                    if (!string.IsNullOrEmpty(title))
                    {
                        title = title.Trim();
                    }
                    string href = a.GetFirstHref2();    //链接
                    //获取描述
                    string description = tag.SubAfter("abstract").SubBefore("</div").GetTxtFromHtml2();
                    if (string.IsNullOrEmpty(description))
                    {
                        description = description.Trim();
                    }
                    string domain = tag.SubLastStringAfter("\"f13").SubBefore("</span").GetTxtFromHtml2();
                    domain = GetDomain(domain);         //域名

                    //没有包含需要protect item信息的过滤掉
                    string txt = "{0}{1}".FormatStr(title, description);
                    if (string.IsNullOrEmpty(txt))
                    {
                        continue;
                    }

                    //解析搜索结果数据
                    HanleTagData(tsk, excludedDomains, searchKeyword, title, href, description, ref domain, tag, false);
                    nohit        = false;
                    nohist_pages = 0;
                    rank++;
                }

                if (nohit)
                {
                    nohist_pages++;
                }
                //如果连续3页都没有结果,就跳出
                if (nohist_pages > 3)
                {
                    break;
                }

                quried_pages++;
                pages++;
                //获取下一页搜索页面链接
                link = html.SubAfter("fk fk_cur").SubBefore("下一页").GetLastHref2();
                if (!string.IsNullOrEmpty(link) && !link.IsStartWith("http"))
                {
                    if (link.IsStartWith("/"))
                    {
                        link = link.SubAfter("/");
                    }
                    link = "http://www.baidu.com/".GetContact(link);
                }
            }
            //return result;
        }
        /// <summary>
        /// 解析搜索结果数据
        /// </summary>
        /// <param name="tsk">关键词信息</param>
        /// <param name="excludedDomains">排除域名列表</param>
        /// <param name="searchKeywords">搜索关键词</param>
        /// <param name="title">标题</param>
        /// <param name="href">链接</param>
        /// <param name="description">描述</param>
        /// <param name="domain">域名</param>
        /// <param name="tag">搜索结果源码</param>
        /// <param name="isMarket">是否为推广链接</param>
        private void HanleTagData(Dnl_Keyword tsk, List <Dnl_IgnoreDomain> excludedDomains, string searchKeywords, string title, string href, string description, ref string domain, string tag, bool isMarket)
        {
            string realUrl = null, detailHtml = null;     //真实网址、网页源码
            //判断百度蓝V等级
            int?baiduVStar = null;

            if (tag.Contains("c-icon-v1"))
            {
                baiduVStar = 1;
            }
            else if (tag.Contains("c-icon-v2"))
            {
                baiduVStar = 2;
            }
            else if (tag.Contains("c-icon-v3"))
            {
                baiduVStar = 3;
            }
            //获取真实网址、网页源码和网页摘要
            if (!string.IsNullOrWhiteSpace(href))
            {
                //获取网页源码及真实地址
                var tuplehtml = get_htmlUrl(href);
                if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1))
                {
                    realUrl = tuplehtml.Item1;
                }
                if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2))
                {
                    detailHtml = tuplehtml.Item2;
                }
                //获取网页二级域名
                if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain))
                {
                    domain = GetDomain(realUrl);
                }
            }
            //如果网页本身也是跳转链接,进一步获取获取真实网页源码并解析数据
            if (!string.IsNullOrEmpty(detailHtml) && detailHtml.Contains("document.getElementById(\"link\").click()"))
            {
                var gourl = detailHtml.GetFirstHref2();
                if (!string.IsNullOrEmpty(gourl))
                {
                    var tuplehtml = get_htmlUrl(gourl);
                    if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item1))
                    {
                        realUrl = tuplehtml.Item1;
                    }
                    if (tuplehtml != null && !string.IsNullOrEmpty(tuplehtml.Item2))
                    {
                        detailHtml = tuplehtml.Item2;
                    }
                    if (!string.IsNullOrEmpty(realUrl) && string.IsNullOrEmpty(domain))
                    {
                        domain = GetDomain(realUrl);
                    }
                }
            }
            //去除前缀并计算域名收录量
            Regex regDomain     = new Regex("http://|https://");
            long  collectionNum = 0;

            if (!string.IsNullOrEmpty(domain))
            {
                domain        = regDomain.Replace(domain, "");
                collectionNum = GetDomainCollectionNum(domain);
            }

            if (string.IsNullOrEmpty(realUrl))
            {
                realUrl = href;
            }
            List <KeywordScore> matchpatterns = new List <KeywordScore>();

            if (string.IsNullOrEmpty(detailHtml))
            {
                return;
            }
            else
            {
                var hrefs = detailHtml.GetDescendents("a", "href");
            }

            string content = GetMainContentHelper.GetMainContent(detailHtml);         //获取网页中文正文

            bool          is_title_matched = title.IsContains2(searchKeywords);       //标题是否匹配到关键词
            bool          is_desc_matched  = description.IsContains2(searchKeywords); //描述是否匹配到关键词
            BaiduItemPart part             = is_title_matched && is_desc_matched ? BaiduItemPart.TitleAbstract :
                                             is_title_matched ? BaiduItemPart.Title :
                                             is_desc_matched ? BaiduItemPart.Abstract : BaiduItemPart.None;

            /* 匹配发布时间 */
            Regex  reg  = new Regex("(20\\d{2}[-/]\\d{1,2}[-/]\\d{1,2})|(20\\d{2}年\\d{1,2}月\\d{1,2}日)");
            string time = "";
            //先匹配搜索结果里是否有数据
            string timeStr = tag.SubAfter("newTimeFactor_before_abs").SubBefore("</span>");

            if (!string.IsNullOrEmpty(timeStr))
            {
                Match mt = reg.Match(timeStr);
                time = mt.Value;
            }
            else
            {
                //匹配网页源码里的时间
                MatchCollection mc = reg.Matches(detailHtml);
                if (mc.Count > 0)
                {
                    foreach (Match x in mc)
                    {
                        //判断是正文中的还是代码和注释中的时间
                        if (!string.IsNullOrEmpty(x.Value))
                        {
                            var txt    = detailHtml.SubAfter(x.Value);
                            var index1 = txt.IndexOf('<');
                            var index2 = txt.IndexOf('>');
                            var index3 = txt.IndexOf('\"');
                            //只使用正文中的时间
                            if (index1 < index2 && index1 < index3)
                            {
                                time = x.Value;
                                break;
                            }
                        }
                    }
                }
            }

            //生成链接信息
            Dnl_Link_Baidu link = new Dnl_Link_Baidu
            {
                Domain          = domain,
                TopDomain       = GetLevel1Domain(domain),
                Keywords        = tsk.Keyword,
                LinkUrl         = realUrl,
                MatchAt         = (byte)part,
                Html            = detailHtml,
                SearchkeywordId = tsk._id.ToString(),
                CreatedAt       = DateTime.UtcNow.AddHours(8),
                Description     = description,
                Title           = title,
                IsPromotion     = isMarket,
                PublishTime     = time,
                Content         = content,
                DCNum           = collectionNum
            };

            if (baiduVStar.HasValue)
            {
                link.BaiduVStar = baiduVStar.Value;
            }

            SaveLink(link, tsk);
        }