コード例 #1
0
ファイル: CrawlSina.cs プロジェクト: Ailsa0910026911/Palas
        private Tweet[] FillUserTweet(UserTweet result, string content)
        {
            var          matches   = Regex.Matches(content, RegexContent, RegexOptions.Multiline | RegexOptions.IgnoreCase);
            List <Tweet> tweetList = new List <Tweet>();

            try
            {
                foreach (Match match in matches)
                {
                    Tweet tweet = new Tweet();
                    int   comment;
                    int.TryParse(match.Groups["Reply"].Value, out comment);
                    int forward;
                    int.TryParse(match.Groups["Forward"].Value, out forward);
                    tweet.Comment = comment;
                    tweet.Content = TextCleaner.FullClean(match.Groups["Content"].Value);
                    tweet.Mid     = match.Groups["Mid"].Value;
                    tweet.Forward = forward;
                    tweet.Source  = match.Groups["Source"].Value;
                    tweet.PubDate = DateTimeParser.Parser(match.Groups["PubDate"].Value) ?? DateTime.MinValue;
                    tweet.Url     = RegexParser.AbsoluteUrl(match.Groups["Url"].Value, result.Url, true);
                    result.Tweets.Add(tweet);
                    tweetList.Add(tweet);
                }
            }
            catch {}

            return(tweetList.ToArray());
        }
コード例 #2
0
ファイル: CrawlSina.cs プロジェクト: Ailsa0910026911/Palas
        private void FillTweetComment(Tweet tweet, SiteEntity site)
        {
            if (tweet.Comment == 0)
            {
                return;
            }
            int    currentPage = 1;
            string mid         = tweet.Mid;

            try
            {
                while (true)
                {
                    string url = string.Format(CommentUrlFormat, mid, currentPage);

                    var request = BuildRequest(url);

                    CrawlResponse response = null;
                    for (int i = 0; i < 5; i++)
                    {
                        try
                        {
                            response = GeckoRequestProcessor.DoRequest(request, site, null, null);
                            AggrSum();
                        }
                        catch {}

                        if (response.Status != Enums.CrawlResult.Succ)
                        {
                            Logger.Info("访问页面错误:Url = " + response.Url);
                        }
                        else
                        {
                            break;
                        }
                    }
                    CommentJsonResponse tmpResult =
                        JsonConvert.DeserializeObject <CommentJsonResponse>(response.Content.Trim("</pre>".ToArray()));
                    response.Content = HttpUtility.HtmlDecode(tmpResult.data.html);
                    var pageMatch = Regex.Match(response.Content, RegexCommentPage,
                                                RegexOptions.IgnoreCase | RegexOptions.Multiline);
                    if (currentPage != 1 &&
                        (!pageMatch.Success ||
                         pageMatch.Groups["CurrentPageNum"].Value != currentPage.ToString(CultureInfo.InvariantCulture)))
                    {
                        return;
                    }
                    //Fill Tweet
                    var matches = Regex.Matches(response.Content, RegexComment,
                                                RegexOptions.IgnoreCase | RegexOptions.Multiline);

                    foreach (Match match in matches)
                    {
                        Comment comment = new Comment();
                        comment.Author    = match.Groups["Author"].Value;
                        comment.AuthorUrl = RegexParser.AbsoluteUrl(match.Groups["AuthorUrl"].Value, tweet.Url, true);
                        comment.Content   = TextCleaner.FullClean(match.Groups["Content"].Value);
                        comment.PubDate   = DateTimeParser.Parser(match.Groups["PubDate"].Value) ?? DateTime.MinValue;
                        tweet.Comments.Add(comment);
                    }

                    currentPage++;
                }
            }
            catch {
            }
        }