Example #1
0
        private void button2_Click(object sender, EventArgs e)
        {
            var oldIds = from t in db.QuestionSet select t.Id;
            List<decimal> newIds = new List<decimal>();

            if (txtPageCount.Text.Length>0)
            {
                var pageCount = int.Parse(txtPageCount.Text);
                for (int i = pageCount; i >= 0; i--)
                {
                    var url = string.Format("http://www.zhihu.com/topic/19776749/questions?page={0}",i);
                    HtmlAgilityPack.HtmlDocument doc = htmlWeb.Load(url);

                    var nodes = Utility.FindNodesByClass(doc, "question-item");
                    foreach (var node in nodes)
                    {
                        Question q = new Question();
                        q.Id = Utility.GetId(node);
                        //q.AnswerCount = GetAnswerCount(node); 拿不到,html里面没有
                        q.CreateTime = Utility.GetTime(node);
                        //q.FollowerCount = GetFollower(node);
                        q.Title = Utility.GetTitle(node);
                        q.Topic = Utility.GetTopic(node);
                        //q.ViewCount = GetViewCount(node);

                        bool contain = newIds.Contains(q.Id) || oldIds.Contains(q.Id);

                        if (!contain)
                        {                         
                            db.QuestionSet.AddObject(q);
                            newIds.Add(q.Id);
                            
                            Application.DoEvents();
                        }
                        txtTitle.Text = q.Title;
                    }

                    db.SaveChanges();
   
                }
                
            }
        }
 /// <summary>
 /// Create a new Question object.
 /// </summary>
 /// <param name="id">Initial value of the Id property.</param>
 /// <param name="title">Initial value of the Title property.</param>
 /// <param name="answerCount">Initial value of the AnswerCount property.</param>
 /// <param name="topic">Initial value of the Topic property.</param>
 /// <param name="followerCount">Initial value of the FollowerCount property.</param>
 /// <param name="viewCount">Initial value of the ViewCount property.</param>
 /// <param name="questionId">Initial value of the QuestionId property.</param>
 public static Question CreateQuestion(global::System.Decimal id, global::System.String title, global::System.Decimal answerCount, global::System.String topic, global::System.Decimal followerCount, global::System.Decimal viewCount, global::System.Decimal questionId)
 {
     Question question = new Question();
     question.Id = id;
     question.Title = title;
     question.AnswerCount = answerCount;
     question.Topic = topic;
     question.FollowerCount = followerCount;
     question.ViewCount = viewCount;
     question.QuestionId = questionId;
     return question;
 }
Example #3
0
        private void button2_Click(object sender, EventArgs e)
        {
            var oldIds = from t in db.QuestionSet select t.Id;
            var oldAnswerIds = from t in db.AnswerSet select t.Id;

            List<decimal> newIds = new List<decimal>();
            List<decimal> newAnswerIds = new List<decimal>();
            //WebBrowser browser = new WebBrowser();
            browser.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(browser_DocumentCompleted);
            browserDetail.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(browserDetail_DocumentCompleted);

            var pageCount = (int)numericUpDown1.Value;
            
            for (int i = pageCount; i > 0; i--)
            {
                var url = string.Format("http://www.zhihu.com/topic/19776749/top-answers?page={0}", i);
                
                //NetworkCredential c = new NetworkCredential("*****@*****.**","password");
                //HtmlAgilityPack.HtmlDocument doc =  htmlWeb.Load(url,"get",null,c);

                browser.Navigate(url);
                while (!ready)
                {
                    Application.DoEvents();
                }
                //System.Threading.Thread.Sleep(1000);
                ready = false;
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(browser.Document.Body.OuterHtml);

                var nodes = Utility.FindNodesByClass(doc, "feed-main");
                foreach (var node in nodes)
                {
                    //先找问题
                    Question q = new Question();
                    q.Id = Utility.GetTopId(node);
                    //q.AnswerCount = GetAnswerCount(node); 拿不到,html里面没有
                    //q.CreateTime = Utility.GetTopTime(node);
                    //q.FollowerCount = GetFollower(node);
                    q.Title = Utility.GetTopTitle(node);
                    q.Topic = Utility.GetTopic(node);
                    //q.ViewCount = GetViewCount(node);

                    bool contain = newIds.Contains(q.Id) || oldIds.Contains(q.Id);

                    if (!contain)
                    {
                        db.QuestionSet.AddObject(q);
                        newIds.Add(q.Id);

                        // Application.DoEvents();
                    }
                    Application.DoEvents();
                    txtTitle.Text = string.Format("P:{0},T:{1}", i, q.Title);

                    //再找答案
                    var answerId = Utility.GetAnswerId(node);
                    Answer a;
                    bool containAnswer = newAnswerIds.Contains(answerId) || oldAnswerIds.Contains(answerId);
                    if (!containAnswer)
                    {
                        a = new Answer();
                        a.Id = answerId;
                        db.AnswerSet.AddObject(a);
                        newIds.Add(a.Id);
                        //Application.DoEvents();
                    }
                    else
                    {
                        a = db.AnswerSet.First(t => t.Id == answerId);
                    }

                    a.Voteup = Utility.GetVoteCount(node);
                    a.Author = Utility.GetAnswerAuthor(node);
                    a.AuthorId = Utility.GetAnswerAuthorId(node);
                   
                    var answerUrl = string.Format("http://www.zhihu.com/question/{0}/answer/{1}", q.Id, a.Id);

                    
                    //var answerDoc = htmlWeb.Load(answerUrl);
                    browserDetail.Navigate(answerUrl);
                    while (!readyDetail)
                    {
                        Application.DoEvents();
                    }
                    readyDetail = false;
                    HtmlAgilityPack.HtmlDocument answerDoc = new HtmlAgilityPack.HtmlDocument();
                    answerDoc.LoadHtml(browserDetail.Document.Body.OuterHtml);

                    var rootNode = answerDoc.DocumentNode;
                    //var author = Utility.GetAnswerAuthor2(rootNode);
                    //if ( string.IsNullOrEmpty(a.Author) && !string.IsNullOrEmpty(author))
                    //{
                    //    a.Author = author;
                    //}
                    a.CollectCount = Utility.GetCollectCount(rootNode);
                    a.CreateTime = Utility.GetAnswerTime(rootNode);
                    a.CommentCount = Utility.GetCommentCount(rootNode);
                    a.QuestionId = q.Id;
                }

                db.SaveChanges();

            }
            MessageBox.Show("Over!");

        }
 /// <summary>
 /// Deprecated Method for adding a new object to the QuestionSet EntitySet. Consider using the .Add method of the associated ObjectSet&lt;T&gt; property instead.
 /// </summary>
 public void AddToQuestionSet(Question question)
 {
     base.AddObject("QuestionSet", question);
 }