Пример #1
0
        public static List <WeiBoContentItem> GetWeiBoTopicContentV2(string topicName, string targetName = "")
        {
            List <WeiBoContentItem> res = new List <WeiBoContentItem>();
            HtmlWeb      webClient      = new HtmlWeb();
            HtmlDocument doc            = webClient.Load("https://s.weibo.com/weibo/" + topicName + "&Refer=weibo_weibo&xsort=time&realtimeweibo=1");

            doc.DocumentNode.InnerHtml = JavaScriptAnalyzer.Decode(doc.DocumentNode.InnerHtml);
            HtmlNodeCollection ContentList = doc.DocumentNode.SelectNodes("//div[@class='content clearfix']");

            //获取一个话题项
            ContentList.ToList().ForEach(p =>
            {
                var item = new WeiBoContentItem();
                //获取时间
                var timeItem    = p.SelectNodes(".//a[@class='W_textb']");
                item.Time       = Convert.ToDateTime(timeItem.FirstOrDefault()?.InnerText);
                var nickName    = p.SelectNodes(".//a[@class='W_texta W_fb']");
                item.Author     = nickName.FirstOrDefault()?.InnerText.Trim();
                var content     = p.SelectNodes(".//p[@class='comment_txt']");
                item.ContentStr = content.FirstOrDefault()?.InnerText.Trim();
                var pic         = p.SelectNodes(".//img[@action-type='feed_list_media_img']");
                item.Pic        = "https:" + pic.FirstOrDefault()?.Attributes.FirstOrDefault(c => c.Name == "src")?.Value.Replace("thumbnail", "large");
                res.Add(item);
            });
            return(res.Where(p => p.Author.Trim().Contains(targetName)).OrderByDescending(p => p.Time).ToList());
        }
Пример #2
0
        public string GetHtml(string url, out HttpStatusCode code,
                              string post = null)
        {
            var mc = extract.Matches(url);

            if (SysProcessManager == null)
            {
                code = HttpStatusCode.NoContent;
                return("");
            }
            var list =
                SysProcessManager.CurrentProcessCollections.ToArray();
            var crawler =
                list.FirstOrDefault(d => d.Name == ShareCookie) as
                SmartCrawler;

            if (crawler != null)
            {
                Http.ProxyIP       = crawler.Http.ProxyIP;
                Http.ProxyPassword = crawler.Http.ProxyPassword;
                Http.ProxyUserName = crawler.Http.ProxyUserName;
                Http.ProxyPort     = crawler.Http.ProxyPort;
                if (Http.Parameters != crawler.Http.Parameters)
                {
                    var cookie = crawler.Http.GetHeaderParameter().Get <string>("Cookie");
                    if (string.IsNullOrWhiteSpace(cookie) == false)
                    {
                        Http.SetValue("Cookie", cookie);
                    }
                }
            }
            Dictionary <string, string> paradict = null;

            foreach (Match m in mc)
            {
                if (paradict == null)
                {
                    paradict = XPathAnalyzer.ParseUrl(URL);
                }
                if (paradict == null)
                {
                    break;
                }
                var str = m.Groups[1].Value;
                if (paradict.ContainsKey(str))
                {
                    url = url.Replace(m.Groups[0].Value, paradict[str]);
                }
            }
            WebHeaderCollection headerCollection;
            var content = helper.GetHtml(Http, out headerCollection, out code, url, post);

            content = JavaScriptAnalyzer.Decode(content);
            if (IsSuperMode)
            {
                content = JavaScriptAnalyzer.Parse2XML(content);
            }

            return(content);
        }
Пример #3
0
        public string GetHtml(string url, out HttpStatusCode code,
                              string post = null)
        {
            var mc = extract.Matches(url);
            Dictionary <string, string> paradict = null;

            foreach (Match m in mc)
            {
                if (paradict == null)
                {
                    paradict = XPathAnalyzer.ParseUrl(URL);
                }
                if (paradict == null)
                {
                    break;
                }
                var str = m.Groups[1].Value;
                if (paradict.ContainsKey(str))
                {
                    url = url.Replace(m.Groups[0].Value, paradict[str]);
                }
            }
            WebHeaderCollection headerCollection;
            var content = helper.GetHtml(Http, out headerCollection, out code, url, post);

            content = JavaScriptAnalyzer.Decode(content);
            if (IsSuperMode)
            {
                content = JavaScriptAnalyzer.Parse2XML(content);
            }

            return(content);
        }
Пример #4
0
        private void FiddlerApplicationAfterSessionComplete(Session oSession)
        {
            if (oSession.oRequest.headers == null)
            {
                return;
            }
            var httpitem = new HttpItem {
                Parameters = oSession.oRequest.headers.ToString()
            };


            if ((oSession.BitFlags & SessionFlags.IsHTTPS) != 0)
            {
                httpitem.URL = "https://" + oSession.url;
            }
            else
            {
                httpitem.URL = "http://" + oSession.url;
            }


            httpitem.Postdata = Encoding.Default.GetString(oSession.RequestBody);


            if (string.IsNullOrWhiteSpace(SelectText) == false)
            {
                var content = oSession.GetResponseBodyAsString();

                content = JavaScriptAnalyzer.Decode(content);
                if (content.Contains(SelectText) == false)
                {
                    return;
                }
            }
            IsSuperMode = true;
            StopVisit();
            httpitem.DictCopyTo(Http);
            var post = "";

            if (Http.Method == MethodType.POST)
            {
                post = "post请求的内容为:\n" + httpitem.Postdata + "\n";
            }
            var window = MainFrm as Window;

            ControlExtended.UIInvoke(() => { if (window != null)
                                             {
                                                 window.Topmost = true;
                                             }
                                     });
            var info = $"已经成功获取嗅探字段! 真实请求地址:\n{oSession.url},\n已自动配置了网页采集器,请求类型为{Http.Method}\n {post}已经刷新了网页采集器的内容";

            XLogSys.Print.Info(info);
            ControlExtended.UIInvoke(() => { if (window != null)
                                             {
                                                 window.Topmost = false;
                                             }
                                     });
            URL = oSession.url;
        }
Пример #5
0
        /// <summary>
        /// 获取微博话题内容列表(使用微博话题api),此接口返回内容详细,非常好用
        /// </summary>
        /// <param name="topicId">话题名</param>
        /// <param name="tragetName">指定发送者名称</param>
        /// <returns></returns>
        public static List <WeiBoContentItem> GetWeiBoTopicContentV1(string topicName, string targetName = "")
        {
            var encode      = System.Web.HttpUtility.UrlEncode(topicName);
            var res         = JavaScriptAnalyzer.Decode(ToolClass.GetAPI($"https://m.weibo.cn/api/container/getIndex?type=uid&value=1761587065"));
            var ret         = Newtonsoft.Json.JsonConvert.DeserializeObject <WeiBoTopicRes>(res);
            var card_Groups = new List <WeiBoTopicRes.Card_Group>();

            ret.data.cards.Where(p => p.card_group != null).Select(p => p).ToList().ForEach(
                c =>
            {
                card_Groups.AddRange(c.card_group);
            });

            List <WeiBoContentItem> theres = new List <WeiBoContentItem>();

            card_Groups.ForEach(p =>
            {
                HtmlDocument htmlDocument = new HtmlDocument();
                htmlDocument.LoadHtml(p.mblog.text);
                WeiBoContentItem item = new WeiBoContentItem
                {
                    Pic        = p.mblog.original_pic,
                    Author     = p.mblog.user.screen_name,
                    ContentStr = htmlDocument.DocumentNode?.InnerText
                };
                if (p.mblog.created_at.Contains("分钟"))
                {
                    var getNum = Convert.ToInt32(p.mblog.created_at.Replace("分钟前", ""));
                    item.Time  = DateTime.Now.AddMinutes(-getNum);
                }
                else if (p.mblog.created_at.Contains("小时"))
                {
                    var getNum = Convert.ToInt32(p.mblog.created_at.Replace("小时前", ""));
                    item.Time  = DateTime.Now.AddHours(-getNum);
                }
                else if (p.mblog.created_at.Contains("昨天"))
                {
                    var getNum = Convert.ToDateTime(p.mblog.created_at.Replace("昨天", "").Trim());
                    item.Time  = getNum.AddDays(-1);
                }
                else if (p.mblog.created_at.Contains("前天"))
                {
                    var getNum = Convert.ToDateTime(p.mblog.created_at.Replace("前天", "").Trim());
                    item.Time  = getNum.AddDays(-2);
                }
                else
                {
                    item.Time = Convert.ToDateTime(p.mblog.created_at);
                }

                theres.Add(item);
            });
            return(theres.Where(p => p.Author.Trim().Contains(targetName)).OrderByDescending(p => p.Time).ToList());
        }
Пример #6
0
        public string GetHtml(string url, out HttpStatusCode code,
                              string post = null)
        {
            string result = "";

            HttpHelper.HttpResponse response;
            code = HttpStatusCode.NotFound;
            if (Regex.IsMatch(url, @"^[A-Z]:\\")) //本地文件
            {
                if (File.Exists(url))
                {
                    result = File.ReadAllText(url, AttributeHelper.GetEncoding(this.Http.Encoding));
                    code   = HttpStatusCode.Accepted;
                }
            }
            else
            {
                var mc = extract.Matches(url);
                if (SysProcessManager == null)
                {
                    code = HttpStatusCode.NoContent;
                    return("");
                }
                SetCookie(Http);
                Dictionary <string, string> paramDict = null;
                foreach (Match m in mc)
                {
                    if (paramDict == null)
                    {
                        paramDict = XPathAnalyzer.ParseUrl(URL);
                    }
                    if (paramDict == null)
                    {
                        break;
                    }
                    var str = m.Groups[1].Value;
                    if (paramDict.ContainsKey(str))
                    {
                        url = url.Replace(m.Groups[0].Value, paramDict[str]);
                    }
                }
                response = helper.GetHtml(Http, url, post).Result;
                result   = response.Html;
                code     = response.Code;
            }
            result = JavaScriptAnalyzer.Decode(result);
            if (IsSuperMode)
            {
                result = JavaScriptAnalyzer.Parse2XML(result);
            }

            return(result);
        }
Пример #7
0
        public static List <WeiBoContentItem> GetWeiboByUid(string Uid, string ContainerId, string TopicFilter = "")
        {
            var res         = JavaScriptAnalyzer.Decode(ToolClass.GetAPI($"https://m.weibo.cn/api/container/getIndex?type=uid&value={Uid}&containerid={ContainerId}"));
            var ret         = Newtonsoft.Json.JsonConvert.DeserializeObject <WeiBoDirectContentItem.WeiBoDirectRes>(res);
            var card_Groups = ret.data.cards.ToList();
            List <WeiBoContentItem> theres = new List <WeiBoContentItem>();

            card_Groups.ForEach(p =>
            {
                HtmlDocument htmlDocument = new HtmlDocument();
                htmlDocument.LoadHtml(p.mblog.text);
                WeiBoContentItem item = new WeiBoContentItem
                {
                    Pic        = p.mblog.original_pic,
                    Author     = p.mblog.user.screen_name,
                    ContentStr = htmlDocument.DocumentNode?.InnerText
                };
                if (p.mblog.created_at.Contains("分钟"))
                {
                    var getNum = Convert.ToInt32(p.mblog.created_at.Replace("分钟前", ""));
                    item.Time  = DateTime.Now.AddMinutes(-getNum);
                }
                else if (p.mblog.created_at.Contains("小时"))
                {
                    var getNum = Convert.ToInt32(p.mblog.created_at.Replace("小时前", ""));
                    item.Time  = DateTime.Now.AddHours(-getNum);
                }
                else if (p.mblog.created_at.Contains("昨天"))
                {
                    var getNum = Convert.ToDateTime(p.mblog.created_at.Replace("昨天", "").Trim());
                    item.Time  = getNum.AddDays(-1);
                }
                else if (p.mblog.created_at.Contains("前天"))
                {
                    var getNum = Convert.ToDateTime(p.mblog.created_at.Replace("前天", "").Trim());
                    item.Time  = getNum.AddDays(-2);
                }
                else
                {
                    item.Time = Convert.ToDateTime(p.mblog.created_at);
                }

                theres.Add(item);
            });
            return(theres.Where(p => p.ContentStr.Contains(TopicFilter)).OrderByDescending(p => p.Time).ToList());
        }
Пример #8
0
        /// <summary>
        /// 获取话题Id
        /// </summary>
        /// <param name="topicName"></param>
        /// <returns></returns>
        public static string GetWeiBoTopicId(string topicName)
        {
            string             topicUrl    = "";
            HtmlWeb            webClient   = new HtmlWeb();
            HtmlDocument       doc         = webClient.Load("https://s.weibo.com/weibo/" + topicName + "&Refer=weibo_weibo&xsort=time&realtimeweibo=1");
            var                ress        = JavaScriptAnalyzer.Decode(doc.DocumentNode.InnerHtml);
            HtmlNodeCollection ContentList = doc.DocumentNode.SelectNodes("//a[@class='W_btn_b6']");
            var                item        = ContentList.FirstOrDefault();

            if (item == null)
            {
                return(null);
            }
            else
            {
                var res = item.Attributes["action-data"];
                topicUrl = res.Value;
            }

            var ret = topicUrl.Substring(topicUrl.LastIndexOf(':') + 1);

            return(ret);
        }
Пример #9
0
        private void FiddlerApplicationAfterSessionComplete(Session oSession)
        {
            if (oSession.oRequest.headers == null)
            {
                return;
            }

            var httpitem = new HttpItem {
                Parameters = oSession.oRequest.headers.ToString()
            };

            XLogSys.Print.Debug("visiting... " + oSession.url);

            if ((oSession.BitFlags & SessionFlags.IsHTTPS) != 0)
            {
                httpitem.URL = "https://" + oSession.url;
            }
            else
            {
                httpitem.URL = "http://" + oSession.url;
            }
            if (oSession.RequestMethod.ToLower() == "post")
            {
                httpitem.Method = MethodType.POST;
            }

            httpitem.Postdata = Encoding.Default.GetString(oSession.RequestBody);



            if (string.IsNullOrWhiteSpace(SelectText) == false)
            {
                var content = oSession.GetResponseBodyAsString();

                content = JavaScriptAnalyzer.Decode(content);
                if (content.Contains(SelectText) == false)
                {
                    return;
                }
            }
            if (string.IsNullOrWhiteSpace(SelectText) == true)
            {
                return;
            }
            if (ConfigFile.Config.Get <bool>("AutoStartStopFiddler"))
            {
                StopVisit();
            }
            httpitem.DictCopyTo(Http);
            var post = "";

            if (Http.Method == MethodType.POST)
            {
                post = "POST content is:\n" + httpitem.Postdata + "\n";
            }
            var window = MainFrm as Window;

            ControlExtended.UIInvoke(() => { if (window != null)
                                             {
                                                 window.Topmost = true;
                                             }
                                     });
            var info = GlobalHelper.FormatArgs("success_get", oSession.url, Http.Method, post);

            XLogSys.Print.Info(info);
            //IsSuperMode = false;
            ControlExtended.UIInvoke(() => { if (window != null)
                                             {
                                                 window.Topmost = false;
                                             }
                                     });
            SniffSucceed?.Invoke(this, new EventArgs());
            URL = oSession.url;
        }
Пример #10
0
        public string GetHtml(string url, out HttpStatusCode code,
                              string post = null)
        {
            string result = "";

            HttpHelper.HttpResponse response;
            code = HttpStatusCode.NotFound;
            if (Regex.IsMatch(url, @"^[A-Z]:\\")) //本地文件
            {
                if (File.Exists(url))
                {
                    result = File.ReadAllText(url, AttributeHelper.GetEncoding(this.Http.Encoding));
                    code   = HttpStatusCode.Accepted;
                }
            }
            else
            {
                var mc = extract.Matches(url);
                if (SysProcessManager == null)
                {
                    code = HttpStatusCode.NoContent;
                    return("");
                }
                var crawler = this.SysProcessManager.GetTask <SmartCrawler>(ShareCookie.SelectItem);
                if (crawler != null)
                {
                    Http.ProxyIP = crawler.Http.ProxyIP;
                    if (Http.Parameters != crawler.Http.Parameters)
                    {
                        var cookie = crawler.Http.GetHeaderParameter().Get <string>("Cookie");
                        if (string.IsNullOrWhiteSpace(cookie) == false)
                        {
                            Http.SetValue("Cookie", cookie);
                        }
                    }
                }
                Dictionary <string, string> paradict = null;
                foreach (Match m in mc)
                {
                    if (paradict == null)
                    {
                        paradict = XPathAnalyzer.ParseUrl(URL);
                    }
                    if (paradict == null)
                    {
                        break;
                    }
                    var str = m.Groups[1].Value;
                    if (paradict.ContainsKey(str))
                    {
                        url = url.Replace(m.Groups[0].Value, paradict[str]);
                    }
                }
                response = helper.GetHtml(Http, url, post).Result;
                result   = response.Html;
                code     = response.Code;
            }
            result = JavaScriptAnalyzer.Decode(result);
            if (IsSuperMode)
            {
                result = JavaScriptAnalyzer.Parse2XML(result);
            }

            return(result);
        }