private Tweet[] FillUserTweet(UserTweet result, string content) { var matches = Regex.Matches(content, RegexContent, RegexOptions.Multiline | RegexOptions.IgnoreCase); List <Tweet> tweetList = new List <Tweet>(); try { foreach (Match match in matches) { Tweet tweet = new Tweet(); int comment; int.TryParse(match.Groups["Reply"].Value, out comment); int forward; int.TryParse(match.Groups["Forward"].Value, out forward); tweet.Comment = comment; tweet.Content = TextCleaner.FullClean(match.Groups["Content"].Value); tweet.Mid = match.Groups["Mid"].Value; tweet.Forward = forward; tweet.Source = match.Groups["Source"].Value; tweet.PubDate = DateTimeParser.Parser(match.Groups["PubDate"].Value) ?? DateTime.MinValue; tweet.Url = RegexParser.AbsoluteUrl(match.Groups["Url"].Value, result.Url, true); result.Tweets.Add(tweet); tweetList.Add(tweet); } } catch {} return(tweetList.ToArray()); }
private void FillTweetComment(Tweet tweet, SiteEntity site) { if (tweet.Comment == 0) { return; } int currentPage = 1; string mid = tweet.Mid; try { while (true) { string url = string.Format(CommentUrlFormat, mid, currentPage); var request = BuildRequest(url); CrawlResponse response = null; for (int i = 0; i < 5; i++) { try { response = GeckoRequestProcessor.DoRequest(request, site, null, null); AggrSum(); } catch {} if (response.Status != Enums.CrawlResult.Succ) { Logger.Info("访问页面错误:Url = " + response.Url); } else { break; } } CommentJsonResponse tmpResult = JsonConvert.DeserializeObject <CommentJsonResponse>(response.Content.Trim("</pre>".ToArray())); response.Content = HttpUtility.HtmlDecode(tmpResult.data.html); var pageMatch = Regex.Match(response.Content, RegexCommentPage, RegexOptions.IgnoreCase | RegexOptions.Multiline); if (currentPage != 1 && (!pageMatch.Success || pageMatch.Groups["CurrentPageNum"].Value != currentPage.ToString(CultureInfo.InvariantCulture))) { return; } //Fill Tweet var matches = Regex.Matches(response.Content, RegexComment, RegexOptions.IgnoreCase | RegexOptions.Multiline); foreach (Match match in matches) { Comment comment = new Comment(); comment.Author = match.Groups["Author"].Value; comment.AuthorUrl = RegexParser.AbsoluteUrl(match.Groups["AuthorUrl"].Value, tweet.Url, true); comment.Content = TextCleaner.FullClean(match.Groups["Content"].Value); comment.PubDate = DateTimeParser.Parser(match.Groups["PubDate"].Value) ?? DateTime.MinValue; tweet.Comments.Add(comment); } currentPage++; } } catch { } }