string get_html(string url, int timeout, CookieContainer cc, ref Encoding enc, out string Rurl, string cookie, ref CookieCollection cookieColl, out CookieCollection cookieCollection)
        {
            var html = TaobaoWebHelper.GetContent(url, 8000, cc, ref enc, out Rurl, cookie, ref cookieColl, out cookieCollection);  //web.GetHtml(url, null);

            //(string url, int timeout, CookieContainer cc, ref Encoding encoding, out string Rurl, string cookie, ref CookieCollection cookiesColl, out CookieCollection cookiesCollection)

            if (!html.IsContains2("b_content"))
            {
                WebHelperNoCookieProxy web2 = new WebHelperNoCookieProxy();
                var html2 = false;
                while (html2 == false)
                {
                    //web2.ChangeIp();
                    //IP ip = web2.my_ip;
                    //  html = WeChatQueryByBus.GetContentByIP(url, 8000, cc, ip.Ip, ip.Port, ref enc, out Rurl, cookie, ref cookieColl, out cookieCollection);//web2.GetHtml(url, null, "utf-8");
                    html  = TaobaoWebHelper.GetContent(url, 8000, cc, ref enc, out Rurl, cookie, ref cookieColl, out cookieCollection); //web.GetHtml(url, null);
                    html2 = html.IsContains2("b_content");

                    if (html.Contains("没有找到内容!"))
                    {
                        return(html);
                    }

                    if (!string.IsNullOrEmpty(html) && html.Contains("您的访问过于频繁"))
                    {
                        log("您的访问过于频繁,为确认本次访问为正常用户行为,需要您协助验证。");
                        int n = new Random().Next(1000, 3000);
                        Thread.Sleep(n);
                    }
                }
            }
            return(html);
        }
예제 #2
0
        string get_Nohtml(string url, int timeout, CookieContainer cc, ref Encoding enc, out string Rurl, string cookie, ref CookieCollection cookieColl, out CookieCollection cookieCollection)
        {
            // var html = web.GetHtml(url, null);
            var html = TaobaoWebHelper.GetContent(url, 8000, cc, ref enc, out Rurl, cookie, ref cookieColl, out cookieCollection);

            if (!html.IsContains2("wx-rb wx-rb2", "wx-rbwx-rb2"))
            {
                WebHelperNoCookieProxy web2 = new WebHelperNoCookieProxy();
                var html2 = false;
                //while (html2 == false)
                //{
                //    //  web2.ChangeIp();
                //    html = web2.GetHtml(url, null, "utf-8");
                //    html2 = html.IsContains2("wx-rb wx-rb2", "wx-rbwx-rb2");
                //    if (!string.IsNullOrEmpty(html) && html.Contains("您的访问过于频繁"))
                //    {
                //        log("您的访问过于频繁,为确认本次访问为正常用户行为,需要您协助验证。");
                //        //int n = new Random().Next(1000, 3000);
                //        //Thread.Sleep(n);
                //    }

                //}
            }
            return(html);
        }
        string get_Detailehtml(string url, int timeout, CookieContainer cc, ref Encoding enc, out string Rurl, string cookie, ref CookieCollection cookieColl, out CookieCollection cookieCollection)
        {
            WebHelperNoCookieProxy web1 = new WebHelperNoCookieProxy();
            var html = TaobaoWebHelper.GetContent(url, 8000, cc, ref enc, out Rurl, cookie, ref cookieColl, out cookieCollection);

            if (!string.IsNullOrEmpty(url))
            {
                if (!html.IsContains2("profile_inner"))
                {
                    WebHelperNoCookieProxy web2 = new WebHelperNoCookieProxy();
                    var html2 = false;
                    html  = TaobaoWebHelper.GetContent(url, 8000, cc, ref enc, out Rurl, cookie, ref cookieColl, out cookieCollection); //web.GetHtml(url, null);
                    html2 = html.IsContains2("profile_inner");
                    if (!string.IsNullOrEmpty(html) && html.Contains("您的访问过于频繁"))
                    {
                        log("您的访问过于频繁,为确认本次访问为正常用户行为,需要您协助验证。");
                        int n = new Random().Next(1000, 3000);
                        Thread.Sleep(n);
                    }
                }
            }
            return(html);
        }
        public List <IW2S_Bing_level1link> GetLinks(string link, IW2S_Bing_BaiduCommend searchTsk)
        {
            List <IW2S_Bing_level1link> result = new List <IW2S_Bing_level1link>();
            int nohist_pages = 0;
            int quried_pages = 0;
            int fanye        = 0;

            //最多搜索10页
            while (!string.IsNullOrEmpty(link) && quried_pages <= 10)
            {
                log(link);
                CookieContainer  cc               = new CookieContainer();
                Encoding         enc              = null;
                CookieCollection cookiesColl      = new CookieCollection();
                CookieCollection cookieCollection = new CookieCollection();
                string           Rurl             = "http://cn.bing.com/";
                string           cookie           = "";
                string           hhhtml           = TaobaoWebHelper.GetContentByIndex(Rurl, 8000, cc, ref enc, out Rurl, ref cookiesColl, out cookieCollection);
                cookiesColl = cookieCollection;
                int gg = new Random().Next(2000, 5000);
                Thread.Sleep(gg);

                Rurl = link;
                var html = get_html(link, 8000, cc, ref enc, out Rurl, cookie, ref cookiesColl, out cookieCollection);// GetContent(link, 8000, cc, ref enc, out Rurl);
                cookiesColl = cookieCollection;
                if (html == null)
                {
                    break;
                }

                if (html.Contains("没有找到搜索内容!"))
                {
                    break;
                }

                var tags  = html.SubAfter("body").SubBefore("/body").SplitWith("b_content");
                var tagsD = tags[tags.Length - 1].SubAfter("搜索结果").SubBefore("</ol>").ToString().SplitWith("</li>");
                if (tagsD == null || tagsD.Length == 0 || tagsD.Length == 1)
                {
                    tags = html.SplitWith("b_content");
                }
                if (tagsD == null || tagsD.Length == 0)
                {
                    log("BLOCKED " + searchTsk.Keyword + " " + searchTsk.CommendKeyword);
                    break;
                }
                bool nohit = true;
                foreach (var tag in tagsD)
                {
                    if (!tag.Contains("h2"))
                    {
                        continue;
                    }

                    //if (!tag.Contains("sp_requery"))
                    //{
                    //    continue;
                    //}

                    var    a     = tag.SubAfter("h2").SubAfter("a");
                    string title = RemoveInivalidChar(a.RemoveSpace().GetLower().SubBefore("</h2>").GetTxtFromHtml2().RemoveSpace().GetLower()); // RemoveInivalidChar(tag.SubAfter("<h4").SubBefore("</h4>").GetTxtFromHtml2().RemoveSpace());
                    string href  = a.GetFirstHref2();                                                                                            //tag.SubAfter("<h4").SubBefore("</a>").GetFirstHref2();
                    if (string.IsNullOrEmpty(title) && string.IsNullOrEmpty(href))
                    {
                        continue;
                    }

                    href = href.Replace("amp;", "");


                    var sdsfdsf = GetDomain(href);



                    string abs = RemoveInivalidChar(tag.SubAfter("<p>").SubBefore("</p").GetTxtFromHtml2().RemoveSpace().GetLower()); //RemoveInivalidChar(tag.SubAfter("<h4>").SubBefore("\"s-p\"").SubBefore("<script>").GetTxtFromHtml2().RemoveSpace());

                    string timesp = "";

                    if (tag.Contains("此网站的操作"))
                    {
                        timesp = tag.SubAfter("此网站的操作").SubAfter("</a>").SubBefore("</div>").Replace('"', ' ');
                    }

                    string domain = GetDomain(href); //tag.SubLastStringAfter("\"s-p\"").SubBefore("</a").GetTxtFromHtml2().SubAfter("(").SubAfter("(").SubBefore(",").Replace('"', ' ').Trim();
                    //domain = BaiduQuery.GetDomain(domain);

                    int maxScore = 0;

                    byte appType = 0;
                    //没有包含需要protect item信息的过滤掉
                    string txt = "{0},{1}".FormatStr(title, abs);
                    if (string.IsNullOrEmpty(txt))
                    {
                        continue;
                    }

                    int nn = new Random().Next(8000, 20000);
                    Thread.Sleep(nn);
                    var htmldetail = "";

                    try
                    {
                        htmldetail = get_Detailehtml(href, 8000, cc, ref enc, out Rurl, cookie, ref cookiesColl, out cookieCollection);// GetContent(href, 8000, cc, ref enc, out Rurl);
                    }
                    catch (Exception)
                    {
                        //htmldetail = "";
                        href = "http://cn.bing.com" + href;
                    }
                    bool          is_title_matched = title.GetLower().IsContains2(searchTsk.Keyword.ToLower(), searchTsk.CommendKeyword.ToLower());
                    bool          is_abstr_matched = abs.GetLower().IsContains2(searchTsk.Keyword.GetLower(), searchTsk.CommendKeyword.GetLower());
                    BaiduItemPart part             = is_title_matched && is_abstr_matched ? BaiduItemPart.TitleAbstract :
                                                     is_title_matched ? BaiduItemPart.Title :
                                                     is_abstr_matched ? BaiduItemPart.Abstract : BaiduItemPart.None;
                    bool is_itm_title_matched = txt.GetLower().IsContains(searchTsk.Keyword.GetLower());
                    bool is_bus_matched       = txt.GetLower().IsContains2(searchTsk.CommendKeyword.GetLower());



                    IW2S_Bing_level1link l1 = new IW2S_Bing_level1link
                    {
                        UsrId           = searchTsk.UsrId,
                        Domain          = domain,
                        TopDomain       = GetLevel1Domain(domain),
                        Keywords        = string.Format("{0} + {1}", searchTsk.Keyword, searchTsk.CommendKeyword),
                        LinkUrl         = href,
                        MatchAt         = (byte)part,
                        Html            = htmldetail,
                        MatchType       = (byte)((is_bus_matched ? 1 : 0) + (is_itm_title_matched ? 2 : 0)),
                        AppType         = appType,
                        BizId           = IDHelper.GetGuid("{0}/{1}/{2}".FormatStr(href, searchTsk.UsrId, searchTsk.Keyword)),
                        SearchkeywordId = searchTsk._id.ToString(),
                        CreatedAt       = DateTime.UtcNow.AddHours(8),
                        Description     = abs,
                        Title           = title,
                        Score           = maxScore,
                        Abstract        = abs,
                        ProjectId       = searchTsk.ProjectId
                    };
                    if (is_bus_matched)
                    {
                        l1.MatchType = l1.MatchType;
                    }
                    if (is_itm_title_matched)
                    {
                        l1.MatchType = l1.MatchType;
                    }
                    byte MatchType = (byte)((is_bus_matched ? 10 : 0) + (is_itm_title_matched ? 30 : 0));
                    if (is_bus_matched == true && is_itm_title_matched == true)
                    {
                        //l1.Score = busTsk.Score + 5;
                        l1.Score = 80 + 10;
                    }
                    if (is_bus_matched == true && is_itm_title_matched == false)
                    {
                        l1.Score = 80;
                    }
                    if (is_bus_matched == false && is_itm_title_matched == true)
                    {
                        l1.Score = 50;
                    }

                    result.Add(l1);
                    nohit        = false;
                    nohist_pages = 0;
                }

                if (nohit)
                {
                    nohist_pages++;
                }
                //如果连续3页都没有结果,就跳出
                if (nohist_pages > 3)
                {
                    break;
                }

                quried_pages++;
                pages++;

                //****** sougou 需要重写 *********************
                link = html.SubAfter("sb_pagN").SubBefore("下一页").GetLastHref2();
                if (!string.IsNullOrEmpty(link) && !link.IsStartWith("http"))
                {
                    if (link.IsStartWith("/"))
                    {
                        link = link.SubAfter("/");
                    }
                    link = "http://cn.bing.com/".GetContact(link);
                }
                fanye = fanye + 10;
                link  = "http://cn.bing.com/search?q={0}&first={1}&FORM=PERE3".FormatStr(searchTsk.Keyword, fanye);
                SaveResult(result);
                result.Clear();

                int n = new Random().Next(8000, 15000);
                Thread.Sleep(n);
            }
            return(result);
        }
예제 #5
0
        SnapSearchResult GetLinks(SnapSearchResult result, FreeTask tsk, string recordId)
        {
            List <XListing> xListings = new List <XListing>();

            string link = "https://s.taobao.com/search?q={0}&ie=utf8&sort=default".FormatStr(tsk.TaskName);
            //   string link = "https://s.taobao.com/search?q={0}&ie=utf8&sort=default".FormatStr("连衣裙冬");
            int nohist_pages = 0;
            int quried_pages = 1;
            int Position     = 1;

            //最多搜索20页
            while (!string.IsNullOrEmpty(link) && quried_pages <= 20)
            {
                log(link);
                var html = TaobaoWebHelper.GetSnapshotHtml(link);;
                try
                {
                    if (html != null)
                    {
                        var tagslist = html.SubAfter("itemlist").SubBefore("recommendAuctions");
                        var tags     = tagslist.SubAfter("p4pTags").SplitWith("p4pTags");

                        if (tags == null || tags.Length == 0)
                        {
                            log("BLOCKED " + tsk);
                            break;
                        }
                        bool nohit = true;

                        foreach (var tag in tags)
                        {
                            try
                            {
                                if (!tag.Contains("raw_title"))
                                {
                                    Console.WriteLine(DateTime.Now);
                                    break;
                                }
                                if (tag == null || tag == "" || tag.Trim().Length == 0)
                                {
                                    Console.WriteLine(DateTime.Now);
                                    break;
                                }
                                var    a          = tag.SubAfter("raw_title").SubBefore("pic_url");
                                string title      = RemoveChar(a.GetLower());
                                string nid        = RemoveChar(tag.SubAfter("nid").SubBefore("category"));
                                string pic_url    = RemoveChar(tag.SubAfter("pic_url").SubBefore("detail_url"));
                                string detail_url = "https:" + RemoveChar(tag.SubAfter("detail_url").SubBefore("view_price"));
                                detail_url = Regex.Unescape(detail_url);
                                var    view_price = RemoveChar(tag.SubAfter("view_price").SubBefore("view_fee"));
                                double price      = 0;
                                if (!string.IsNullOrEmpty(view_price))
                                {
                                    price = Convert.ToDouble(view_price);
                                }
                                string item_loc   = RemoveChar(tag.SubAfter("item_loc").SubBefore("reserve_price"));
                                string view_sales = RemoveChar(tag.SubAfter("view_sales").SubBefore("comment_count")).Replace("人收货", "").Replace("人付款", "");
                                int    days30     = 0;
                                if (!string.IsNullOrEmpty(view_sales))
                                {
                                    days30 = Convert.ToInt32(view_sales);
                                }
                                string comment_count = RemoveChar(tag.SubAfter("comment_count").SubBefore("user_id"));
                                int    commentcount  = 0;
                                if (!string.IsNullOrEmpty(comment_count))
                                {
                                    commentcount = Convert.ToInt32(comment_count);
                                }
                                //shop
                                string user_id      = RemoveChar(tag.SubAfter("user_id").SubBefore("nick"));
                                string nick         = RemoveChar(tag.SubAfter("nick").SubBefore("shopcard"));
                                string isTmall      = RemoveChar(tag.SubAfter("isTmall").SubBefore("delivery"));
                                string delivery     = RemoveChar(tag.SubAfter("delivery").SubBefore("description"));
                                string description  = RemoveChar(tag.SubAfter("description").SubBefore("service"));
                                string sellerCredit = RemoveChar(tag.SubAfter("sellerCredit").SubBefore("totalRate"));
                                string siteName     = "taobao";
                                Guid   siteId       = Guid.Parse("A00A672B-DD05-65FB-4EE0-CFA26EBF2ED5");
                                var    totalRate    = RemoveChar(tag.SubAfter("totalRate").SubBefore("icon").GetLower());
                                var    shopLink     = RemoveChar(tag.SubAfter("shopLink").SubBefore("}"));
                                shopLink = "https:" + shopLink;
                                shopLink = Regex.Unescape(shopLink);
                                XListing listing = new XListing
                                {
                                    ShopContactUrl        = shopLink,
                                    ItemDetailUrl         = detail_url,
                                    ItemPrice             = price,
                                    ItemName              = title,
                                    ItemID                = nid,
                                    ItemLocation          = item_loc,
                                    ItemSold30Days        = days30,
                                    Itempic               = pic_url,
                                    ItemTotalCommentCount = commentcount,
                                    UId              = tsk.UId,
                                    ShopID           = user_id,
                                    ShopName         = nick,
                                    ShopLocation     = item_loc,
                                    ShopIsTmall      = isTmall == "true" ? true : false,
                                    taskid           = tsk._id,
                                    taskName         = tsk.TaskName,
                                    SiteName         = siteName,
                                    SiteID           = siteId,
                                    usrid            = tsk.UsrId,
                                    ShopIsAuthorized = false,
                                    Position         = Position,
                                    PageNum          = quried_pages,
                                    ProjectId        = tsk.ProjectId
                                };
                                if (listing.ItemDetailUrl != null && listing.ItemName != null)
                                {
                                    listing.ItemBotStatus = BotStatus.Ok;
                                }
                                result.Listings.Add(listing);
                                xListings.Add(listing);
                                nohit        = false;
                                nohist_pages = 0;
                                Position++;
                            }
                            catch (Exception we)
                            {
                                Console.WriteLine(DateTime.Now + "错误:" + we.Message);
                                break;
                            }
                        }

                        if (nohit)
                        {
                            nohist_pages++;
                        }
                        //如果连续3页都没有结果,就跳出
                        if (nohist_pages > 3)
                        {
                            break;
                        }

                        quried_pages++;
                        pages++;
                        NextPage = NextPage + 44;

                        //  link = "https://s.taobao.com/search?q={0}&sort=sale-desc&s={1}".FormatStr(tsk.TaskName, NextPage);
                        //
                        link = "https://s.taobao.com/search?q={0}&ie=utf8&sort=default&s={1}".FormatStr(tsk.TaskName, NextPage);
                        Console.WriteLine(DateTime.Now + "任务名:" + tsk.TaskName + ";开始搜索第" + quried_pages + "页");

                        SaveResult(xListings, BotTypes.ItemSnapshot, recordId, tsk);
                        xListings.Clear();

                        int n = new Random().Next(3000, 6000);
                        Thread.Sleep(n);
                    }
                    else
                    {
                        return(result);
                    }
                }
                catch (Exception ex)
                {
                    Console.WriteLine(DateTime.Now + "错误:" + ex.Message);
                    break;
                }
            }
            return(result);
        }
예제 #6
0
        public List <IW2S_WX_level1link> GetLinks(string link, IW2S_WX_BaiduCommend searchTsk)
        {
            List <IW2S_WX_level1link> result = new List <IW2S_WX_level1link>();
            int nohist_pages = 0;
            int quried_pages = 0;

            //最多搜索10页
            while (!string.IsNullOrEmpty(link) && quried_pages <= 2)
            {
                log(link);
                CookieContainer  cc               = new CookieContainer();
                Encoding         enc              = null;
                CookieCollection cookiesColl      = new CookieCollection();
                CookieCollection cookieCollection = new CookieCollection();
                string           Rurl             = "http://weixin.sogou.com/";
                string           cookie           = "";
                string           hhhtml           = TaobaoWebHelper.GetContentByIndex(Rurl, 8000, cc, ref enc, out Rurl, ref cookiesColl, out cookieCollection);
                cookiesColl = cookieCollection;
                int gg = new Random().Next(5000, 8000);
                Thread.Sleep(gg);

                Rurl = link;
                var html = get_html(link, 8000, cc, ref enc, out Rurl, cookie, ref cookiesColl, out cookieCollection);// GetContent(link, 8000, cc, ref enc, out Rurl);
                cookiesColl = cookieCollection;
                if (html == null)
                {
                    break;
                }

                if (html.Contains("没有找到相关的微信公众号文章"))
                {
                    break;
                }

                var tags = html.SplitWith("wx-rb wx-rb3");
                if (tags == null || tags.Length == 0 || tags.Length == 1)
                {
                    tags = html.SplitWith("wx-rbwx-rb3");
                }
                if (tags == null || tags.Length == 0)
                {
                    log("BLOCKED " + searchTsk.Keyword + " " + searchTsk.CommendKeyword);
                    break;
                }
                bool nohit = true;
                foreach (var tag in tags)
                {
                    if (!tag.Contains("txt-box"))
                    {
                        continue;
                    }
                    string title  = RemoveInivalidChar(tag.SubAfter("<h4").SubBefore("</h4>").GetTxtFromHtml2().RemoveSpace());
                    string href   = tag.SubAfter("<h4").SubBefore("</a>").GetFirstHref2();
                    string abs    = RemoveInivalidChar(tag.SubAfter("<h4>").SubBefore("\"s-p\"").SubBefore("<script>").GetTxtFromHtml2().RemoveSpace());
                    string domain = tag.SubLastStringAfter("\"s-p\"").SubBefore("</a").GetTxtFromHtml2().SubAfter("(").SubAfter("(").SubBefore(",").Replace('"', ' ').Trim();
                    //domain = BaiduQuery.GetDomain(domain);
                    string SourceLink = tag.SubLastStringAfter("\"s-p\"").SubBefore("</a").GetFirstHref2();

                    string TitleImg = tag.SubAfter("img_box2").SubBefore("</a").SubAfter("src=").Replace(">", "").Replace('"', ' ').RemoveSpace();


                    //没有包含需要protect item信息的过滤掉
                    string txt = "{0},{1}".FormatStr(title, abs);

                    if (string.IsNullOrEmpty(txt))
                    {
                        continue;
                    }

                    //var excludekwdcount = ExcludeKeyword.Count(c => txt.Contains(c.KeywordName));
                    //if (excludekwdcount > 0)
                    //    continue;

                    if (href.IsStartWith("/websearch"))
                    {
                        href = "http://weixin.sogou.com" + href.Replace("amp;", "");
                    }
                    if (href.IsStartWith("s?__biz"))
                    {
                        var href1 = href.Replace("amp;", "");
                    }
                    href = href.Replace("amp;", "");
                    int nn = new Random().Next(8000, 20000);
                    Thread.Sleep(nn);

                    var htmldetail = get_Detailehtml(href, 8000, cc, ref enc, out Rurl, cookie, ref cookiesColl, out cookieCollection);// GetContent(href, 8000, cc, ref enc, out Rurl);


                    Regex reg = new Regex("(20\\d{2}[-/]\\d{1,2}[-/]\\d{1,2})|(20\\d{2}年\\d{1,2}月\\d{1,2}日)");
                    Match m   = reg.Match(htmldetail);
                    //MatchCollection cols = reg.Matches(item.Html);
                    string time = "";
                    if (m.Groups.Count > 0)
                    {
                        time = m.Groups[0].Value;
                    }
                    href = Rurl;
                    var hrefNew          = href + "&f=json";
                    var htmldetailNewUrl = get_Detailehtml(hrefNew, 8000, cc, ref enc, out Rurl, cookie, ref cookiesColl, out cookieCollection);// GetContent(href, 8000, cc, ref enc, out Rurl);
                    try
                    {
                        var uuurl = htmldetailNewUrl.SubAfter("\"link\":").SubBefore(",\"source_url\":").Replace('"', ' ').Replace("\\", "").RemoveSpace();
                        href = uuurl;
                    }
                    catch (Exception)
                    {
                    }
                    bool          is_title_matched = title.GetLower().IsContains2(searchTsk.Keyword.ToLower(), searchTsk.CommendKeyword.ToLower());
                    bool          is_abstr_matched = abs.GetLower().IsContains2(searchTsk.Keyword.GetLower(), searchTsk.CommendKeyword.GetLower());
                    BaiduItemPart part             = is_title_matched && is_abstr_matched ? BaiduItemPart.TitleAbstract :
                                                     is_title_matched ? BaiduItemPart.Title :
                                                     is_abstr_matched ? BaiduItemPart.Abstract : BaiduItemPart.None;
                    bool is_itm_title_matched = txt.GetLower().IsContains(searchTsk.Keyword.GetLower());
                    bool is_bus_matched       = txt.GetLower().IsContains2(searchTsk.CommendKeyword.GetLower());
                    var  no         = "";
                    var  qrcode     = "";
                    var  function   = "";
                    var  NoIcon     = "";
                    var  QrcodeIcon = "";
                    SourceLink = SourceLink.Replace("amp;", "");
                    int nnn = new Random().Next(8000, 15000);
                    Thread.Sleep(nnn);
                    var htmlNo = get_Nohtml(SourceLink, 8000, cc, ref enc, out Rurl, cookie, ref cookiesColl, out cookieCollection);
                    if (!string.IsNullOrEmpty(htmlNo) && htmlNo.Contains("em_weixinhao"))
                    {
                        no         = htmlNo.SubAfter("em_weixinhao").SubBefore("/label").GetTxtFromHtml2().RemoveSpace();
                        qrcode     = htmlNo.SubAfter("v-box").SubBefore("<em").SubAfter("src=").Replace(">", "").Replace('"', ' ').RemoveSpace();
                        function   = htmlNo.SubAfter("功能介绍:</").SubBefore("/span").GetTxtFromHtml2().RemoveSpace();
                        SourceLink = htmlNo.SubAfter("微信认证:").SubBefore("/div").GetTxtFromHtml2().RemoveSpace();
                        NoIcon     = htmlNo.SubAfter("img-box").SubBefore("</a").SubAfter("src=").SubBefore("onload").Replace(">", "").Replace('"', ' ').RemoveSpace();
                        QrcodeIcon = htmlNo.SubAfter("img-box").SubBefore("</a").SubAfter("err:").SubBefore(">").Replace(">", "").Replace('"', ' ').Replace("'", "").RemoveSpace();
                    }
                    IW2S_WX_level1link l1 = new IW2S_WX_level1link
                    {
                        BizId           = IDHelper.GetGuid("{0}/{1}/{2}".FormatStr(title, domain, searchTsk.UsrId)),
                        Description     = abs,
                        Domain          = domain,
                        UsrId           = searchTsk.UsrId,
                        LinkUrl         = href,
                        MatchAt         = (byte)part,
                        Title           = title,
                        CreatedAt       = DateTime.Now,
                        DataCleanStatus = 0,
                        Function        = function,
                        SearchkeywordId = searchTsk._id.ToString(),
                        Keywords        = searchTsk.Keyword,
                        PublicNo        = no,
                        QrCode          = qrcode,
                        SourceLink      = SourceLink,
                        TagType         = 0,
                        ImgIcon         = NoIcon,
                        QrCodeIcon      = QrcodeIcon,
                        ProjectId       = searchTsk.ProjectId,
                        TitleImg        = TitleImg,
                        PublishTime     = time,
                        Html            = htmldetail
                    };
                    if (is_bus_matched)
                    {
                        l1.MatchType = l1.MatchType;
                    }
                    if (is_itm_title_matched)
                    {
                        l1.MatchType = l1.MatchType;
                    }
                    byte MatchType = (byte)((is_bus_matched ? 10 : 0) + (is_itm_title_matched ? 30 : 0));
                    if (is_bus_matched == true && is_itm_title_matched == true)
                    {
                        //l1.Score = busTsk.Score + 5;
                        l1.Score = 80 + 10;
                    }
                    if (is_bus_matched == true && is_itm_title_matched == false)
                    {
                        l1.Score = 80;
                    }
                    if (is_bus_matched == false && is_itm_title_matched == true)
                    {
                        l1.Score = 50;
                    }

                    result.Add(l1);
                    nohit        = false;
                    nohist_pages = 0;
                }

                if (nohit)
                {
                    nohist_pages++;
                }
                //如果连续3页都没有结果,就跳出
                if (nohist_pages > 3)
                {
                    break;
                }

                quried_pages++;
                pages++;

                //****** sougou 需要重写 *********************
                link = html.SubAfter("sogou_next").SubBefore("下一页").GetLastHref2();
                if (!string.IsNullOrEmpty(link) && !link.IsStartWith("http"))
                {
                    if (link.IsStartWith("/"))
                    {
                        link = link.SubAfter("/");
                    }
                    link = "http://weixin.sogou.com/weixin".GetContact(link);
                }

                SaveResult(result);
                result.Clear();

                int n = new Random().Next(8000, 15000);
                Thread.Sleep(n);
            }
            return(result);
        }