Ejemplo n.º 1
0
        /// <summary>
        /// 数据返回前
        /// </summary>
        /// <param name="oSession"></param>
        private void FiddlerApplication_BeforeResponse(Session oSession)
        {
            if (oSession.fullUrl.Contains("index.baidu.com/Interface/Social/getSocial"))
            {
                try
                {
                    byte[] te      = oSession.ResponseBody;
                    byte[] dd      = Decompress(te);
                    string jsonstr = System.Text.Encoding.GetEncoding("utf-8").GetString(dd);
                    jsonstr = jsonstr.Replace("\"", string.Empty);
                    Regex         regextemp   = new Regex("str_age:\\{(?<age>.*?)\\},str_sex:\\{(?<sex>.*?)\\}");
                    Match         matchresult = regextemp.Match(jsonstr);
                    string        ageregion   = matchresult.Groups["age"].Value;
                    string        sexstr      = matchresult.Groups["sex"].Value;
                    List <string> agelist     = ageregion.Split(',').ToList();
                    List <string> sexlist     = sexstr.Split(',').ToList();
                    ////解析数据
                    content = "";
                    if (string.IsNullOrEmpty(keyword))
                    {
                        return;
                    }

                    content += keyword + "  ";
                    foreach (string tempage in agelist)
                    {
                        List <string> tempageList = tempage.Split(':').ToList();
                        content += tempageList[1] + "  ";
                    }

                    foreach (string tempsex in sexlist)
                    {
                        List <string> tempsexlist = tempsex.Split(':').ToList();
                        content += tempsexlist[1] + "  ";
                    }

                    ////追加到txt
                    WriteTxt.WriteAppendTxt("F:\\phicommwork\\斐讯大数据文档\\游戏画像\\百度指数\\baidu.txt", content);
                    MessagePipe.ExcuteWriteMessageEvent("添加数据" + content, 0);
                    content = string.Empty;
                    keyword = string.Empty;
                    Monitor.Enter(oAllSessions);
                    oAllSessions.Clear();
                    Monitor.Exit(oAllSessions);
                }
                catch (Exception ex)
                {
                    MessagePipe.ExcuteWriteMessageEvent("捕获到需要的请求,但发生异常:" + ex.Message, 1);
                }
                try
                {
                    Fiddler.FiddlerApplication.Shutdown();
                }
                catch (Exception ex)
                {
                    MessagePipe.ExcuteWriteMessageEvent("释放fillder发生异常:" + ex.Message, 1);
                }
            }
            else
            {
                content = "无";
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 抓取指数
        /// </summary>
        public void ExceuteCrawIndex()
        {
            try
            {
                List <string> keywordsList = new List <string>();
                ////从文本读取关键词
                using (System.IO.StreamReader sr = new System.IO.StreamReader("F:\\amekeyword.txt", Encoding.GetEncoding("GB2312")))
                {
                    string str;
                    while ((str = sr.ReadLine()) != null)
                    {
                        keywordsList.Add(str);
                    }
                }

                MessagePipe.ExcuteWriteMessageEvent("取到关键词" + keywordsList.Count + "条", 0);
                ////开始遍历关键词
                foreach (string keyword in keywordsList)
                {
                    try
                    {
                        MessagePipe.ExcuteWriteMessageEvent("开始处理关键词【" + keyword + "】", 0);
                        HttpRequestParam param    = new HttpRequestParam();
                        string           tempword = System.Web.HttpUtility.UrlEncode(keyword, System.Text.Encoding.GetEncoding("GB2312"));
                        tempword                = tempword.ToUpper();
                        param.URL               = string.Format("http://index.baidu.com/?tpl=crowd&word={0}", tempword);
                        param.Method            = "get";
                        param.AllowAutoRedirect = false;
                        param.IsIEProxy         = true;
                        param.Cookie            = this.cookie;
                        param.Timeout           = 7 * 1000;
                        param.UserAgent         = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36";
                        param.ResultType        = ResultType.String;
                        HttpResult result   = HttpHelper.GetHttpRequestData(param);
                        String     temphtml = result.Html;
                        temphtml = Regex.Replace(temphtml, @"\r|\n|\s|\t", string.Empty, RegexOptions.IgnoreCase);
                        Regex  regextemp   = new Regex("PPval.ppt=\'(?<str>.*?)\',");
                        Match  matchresult = regextemp.Match(temphtml);
                        string tempstr     = matchresult.Groups["str"].Value;
                        tempstr += "&res2=";
                        ////拼凑字符串,进行解析
                        string temprefer = param.URL;
                        param.Referer = temprefer;
                        param.URL     = string.Format("http://index.baidu.com/Interface/Social/getSocial/?res={0}", tempstr);
                        param.Header.Add("X-Requested-With", "XMLHttpRequest");
                        result = HttpHelper.GetHttpRequestData(param);
                        string jsonstr = result.Html;
                        jsonstr     = jsonstr.Replace("\"", string.Empty);
                        regextemp   = new Regex("str_age:\\{(?<age>.*?)\\},str_sex:\\{(?<sex>.*?)\\}");
                        matchresult = regextemp.Match(jsonstr);
                        string        ageregion = matchresult.Groups["age"].Value;
                        string        sexstr    = matchresult.Groups["sex"].Value;
                        List <string> agelist   = ageregion.Split(',').ToList();
                        List <string> sexlist   = sexstr.Split(',').ToList();
                        ////解析数据
                        string content = string.Empty;
                        foreach (string tempage in agelist)
                        {
                            List <string> tempageList = tempage.Split(':').ToList();
                            content += tempageList[1] + "  ";
                        }

                        foreach (string tempsex in sexlist)
                        {
                            List <string> tempsexlist = tempsex.Split(':').ToList();
                            content += tempsexlist[1] + "  ";
                        }

                        ////追加到txt
                        WriteTxt.WriteAppendTxt("F:\\\baidu.txt", content);
                        MessagePipe.ExcuteWriteMessageEvent("关键词【" + keyword + "】指数数据添加" + content, 0);
                    }
                    catch (Exception ex)
                    {
                        MessagePipe.ExcuteWriteMessageEvent("处理关键词【" + keyword + "】发生异常:" + ex.Message, 1);
                    }
                }
            }
            catch (Exception ex)
            { }
        }