示例#1
0
        /// <summary>
        /// 获取该页面各条微博
        /// </summary>
        /// <returns></returns>
        public List <WeiboFeed> GetAllFeeds()
        {
            //微博正文内容 在下面这个字符串所在的那一行
            string searchStr = "<script>FM.view({\"ns\":\"pl.content.homeFeed.index\",\"domid\":\"Pl_Official_MyProfileFeed";

            //按换行符分割HTML
            string[] line = OriginHTML.Split('\n');
            int      i;

            for (i = 0; i < line.Length; i++)
            {
                if (line[i].Length > searchStr.Length && line[i].Substring(0, searchStr.Length) == searchStr)
                {
                    break;
                }
            }
            //无法搜索到此字符串时
            if (i == line.Length)
            {
                return(null);
            }

            //取出<script>标签内的Json数据
            string jsonStr = line[i].Replace("<script>FM.view(", "").Replace(")</script>", "");
            //使用 Newtonsoft.Json 反序列化
            ViewJson viewJson = JsonConvert.DeserializeObject <ViewJson>(jsonStr);

            //使用 HtmlAgilityPack 解析HTML
            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(viewJson.html);
            //HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes; //.SelectSingleNode("/div/div[2]/div[1]/div[3]/div[3]");

            HtmlNode         topNode    = doc.DocumentNode.ChildNodes[1];
            List <WeiboFeed> wbFeedList = new List <WeiboFeed>();

            foreach (HtmlNode feedListItem in topNode.ChildNodes)
            {
                if (feedListItem.Attributes.Contains("action-type") && feedListItem.Attributes["action-type"].Value == "feed_list_item")
                {
                    string mid        = feedListItem.Attributes["mid"].Value;
                    string username   = feedListItem.SelectSingleNode("div[1]/div[@class='WB_detail']/div[1]/a[1]").InnerHtml;
                    string time       = feedListItem.SelectSingleNode("div[1]/div[@class='WB_detail']/div[2]/a[1]").Attributes["title"].Value;
                    string content    = feedListItem.SelectSingleNode("div[1]/div[@class='WB_detail']/div[3]").InnerHtml;
                    string likeStatus = feedListItem.SelectSingleNode("div[@class='WB_feed_handle']/div/ul/li[4]/a/span/span/span/i").Attributes["class"].Value;
                    bool   isLike     = false;
                    if (likeStatus.IndexOf("_bc") > 0)
                    {
                        isLike = true;
                    }

                    WeiboFeed wbFeedTmp = new WeiboFeed(this, mid, username, time, content, isLike);
                    wbFeedList.Add(wbFeedTmp);
                }
            }

            return(wbFeedList);
        }
示例#2
0
        /// <summary>
        /// 获取该页面各条微博
        /// </summary>
        /// <returns></returns>
        public List <WeiboFeed> GetAllFeeds()
        {
            //微博正文内容 在下面这个字符串所在的那一行
            string searchStr = "<script>FM.view({\"ns\":\"pl.content.homeFeed.index\",\"domid\":\"Pl_Official_MyProfileFeed";

            //按换行符分割HTML
            string[] line = OriginHTML.Split('\n');
            int      i;

            for (i = 0; i < line.Length; i++)
            {
                if (line[i].Length > searchStr.Length && line[i].Substring(0, searchStr.Length) == searchStr)
                {
                    break;
                }
            }
            //无法搜索到此字符串时
            if (i == line.Length)
            {
                return(null);
            }

            //取出<script>标签内的Json数据
            string jsonStr = line[i].Replace("<script>FM.view(", "").Replace(")</script>", "");
            //使用 Newtonsoft.Json 反序列化
            ViewJson viewJson = JsonConvert.DeserializeObject <ViewJson>(jsonStr);

            //使用 HtmlAgilityPack 解析HTML
            HtmlDocument doc = new HtmlDocument();

            //System.Windows.Forms.MessageBox.Show(viewJson.html);
            //System.IO.StreamWriter sw = new System.IO.StreamWriter("123.txt");
            //sw.Write(viewJson.html);
            //sw.Close();

            doc.LoadHtml(viewJson.html);
            //HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes; //.SelectSingleNode("/div/div[2]/div[1]/div[3]/div[3]");

            HtmlNode topNode = doc.DocumentNode.ChildNodes[1];

            //System.IO.StreamWriter sw = new System.IO.StreamWriter("123.txt");
            //sw.Write(topNode.InnerHtml);
            //sw.Close();
            //while (true);

            List <WeiboFeed> wbFeedList = new List <WeiboFeed>();

            //System.IO.StreamWriter sr = new System.IO.StreamWriter("123.html");
            //sr.Write(topNode.InnerHtml);
            //sr.Close();

            foreach (HtmlNode feedListItem in topNode.ChildNodes)
            {
                if (feedListItem.Attributes.Contains("action-type") && feedListItem.Attributes["action-type"].Value == "feed_list_item")
                {
                    string mid = feedListItem.Attributes["mid"].Value;
                    string username;
                    string time;
                    string content;
                    string date;
                    try
                    {
                        username = feedListItem.SelectSingleNode("div[1]/div[@class='WB_detail']/div[1]/a[1]").InnerHtml;
                        date     = feedListItem.SelectSingleNode("div[1]/div[@class='WB_detail']/div[2]/a[1]").Attributes["date"].Value;
                        content  = feedListItem.SelectSingleNode("div[1]/div[@class='WB_detail']/div[3]").InnerHtml;
                    }
                    //存在某些dom结点不规范
                    catch
                    {
                        username = "";
                        string datepattern = "date=\"\\d+\"";
                        Regex  regex       = new Regex(datepattern);
                        date    = regex.Match(feedListItem.InnerHtml).ToString();
                        date    = date.Substring(6, date.Length - 7);
                        content = "";
                    }
                    long     unixDate;
                    DateTime start;
                    DateTime end;
                    unixDate = long.Parse(date);
                    start    = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc);
                    end      = start.AddMilliseconds(unixDate).ToLocalTime();

                    //获取时间
                    time = end.ToString();

                    string likeStatus = "";

                    bool isLike = false;

                    //System.Windows.Forms.MessageBox.Show("mid:" + mid + ", username:"******", time:" + time + ", content:" + content + ", likeStatus:" + likeStatus);

                    if (likeStatus.IndexOf("UI") > 0)
                    {
                        isLike = true;
                    }

                    WeiboFeed wbFeedTmp = new WeiboFeed(this, mid, username, time, content, isLike);
                    wbFeedList.Add(wbFeedTmp);
                }
            }
            return(wbFeedList);
        }