Пример #1
0
        private Post PostParser(HtmlNode postNode)
        {
            var post = new Post();

            var contentQuery = @".//div[contains(concat(' ', normalize-space(@class), ' '), ' js-post__content-text')]";

            post.Content = postNode.SelectSingleNode(contentQuery).InnerHtml.Trim();

            var          accountQuery = @".//div[contains(concat(' ', normalize-space(@class), ' '), 'author ')]";
            var          vbName       = postNode.SelectSingleNode(accountQuery).InnerText.Trim();
            ForumAccount acc          = _db.Accounts.Where(a => a.VbName == vbName).SingleOrDefault();

            post.ForumAccount = acc;

            var postIdQuery = @".//a[contains(concat(' ', normalize-space(@class), ' '), 'b-post__count')]";
            var postHref    = postNode.SelectSingleNode(postIdQuery).Attributes["href"].Value;

            var matchedNumbers = Regex.Matches(postHref, @"\d+");
            var postId         = matchedNumbers[matchedNumbers.Count - 1].Value;

            post.PostId = int.Parse(postId);

            var timeString = postNode.SelectSingleNode(@".//time").Attributes["datetime"].Value.Trim();

            post.PostDate = DateTime.Parse(timeString);

            post.Hash = _hashProvider.Compute(post.Content);

            return(post);
        }
Пример #2
0
        private IEnumerable <Post> GetNewPosts(ForumAccount account, List <Thread> threads)
        {
            try
            {
                var url = string.Format(Settings.Default.ProfileUrl, account.ForumId);

                var req = (HttpWebRequest)WebRequest.Create(url);
                req.Method          = "GET";
                req.ContentType     = "application/x-www-form-urlencoded";
                req.UserAgent       = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.121 Safari/535.2";
                req.CookieContainer = new CookieContainer();
                req.CookieContainer.Add(new Cookie(".YAFNET_Authentication", Settings.Default.AuthToken, "/", "forums.white-wolf.com"));

                var responseStream = req.GetResponse().GetResponseStream();
                var document       = new HtmlDocument();

                if (responseStream == null)
                {
                    throw new NoNullAllowedException();
                }

                using (var reader = new StreamReader(responseStream))
                {
                    using (var memoryStream = new MemoryStream())
                    {
                        using (var writer = new StreamWriter(memoryStream))
                        {
                            writer.Write(reader.ReadToEnd());
                            memoryStream.Position = 0;
                            document.Load(memoryStream, new UTF8Encoding());
                        }
                    }
                }

                document = CleanHtml(document);

                var postsCollection = new List <Post>();

                const string placeholderFragment    = "id('MasterPageContentPlaceHolder_forum_ctl01_ProfileTabs_Last10PostsTab')//table//tr[";
                const string anchorSelectorFragment = "]//td/a/@href";
                const string titleSelectorFragment  = "]//td/a/text()";
                const string dateSelectorFragment   = "]//td/text()[4]";

                foreach (int i in Enumerable.Range(0, 10))
                {
                    var tableRow = 1 + (2 * i);

                    var threadIdSelector = String.Format("{0}{1}{2}", placeholderFragment, tableRow, anchorSelectorFragment);
                    var titleSelector    = String.Format("{0}{1}{2}", placeholderFragment, tableRow, titleSelectorFragment);
                    var dateSelector     = String.Format("{0}{1}{2}", placeholderFragment, tableRow, dateSelectorFragment);

                    var threadHref = document.DocumentNode.SelectSingleNode(threadIdSelector).Attributes[0].Value;
                    var threadId   = int.Parse(Regex.Match(threadHref, @"(\d+)$").Groups[0].Value);

                    var threadTitle = document.DocumentNode.SelectSingleNode(titleSelector).InnerHtml.Trim();

                    var postDate = DateTime.Parse(document.DocumentNode.SelectSingleNode(dateSelector).InnerHtml.Trim());

                    var postContent =
                        document.DocumentNode.SelectSingleNode(
                            "id('MasterPageContentPlaceHolder_forum_ctl01_ProfileTabs_Last10PostsTab_LastPosts_MessagePost_" + i + "')").InnerHtml;

                    Thread thread = threads.SingleOrDefault(s => s.ThreadId == threadId);
                    if (thread == null)
                    {
                        thread = new Thread {
                            ThreadId = threadId, Title = threadTitle
                        };
                        threads.Add(thread);
                    }

                    postsCollection.Add(new Post
                    {
                        Content  = postContent,
                        Hash     = _hashProvider.Compute(postContent),
                        PostDate = postDate,
                        Thread   = thread
                    });
                }

                return(postsCollection.Where(newPost => !account.Posts.Select(p => p.Hash).Contains(newPost.Hash)));
            }
            catch (Exception ex)
            {
                Debug.WriteLine(ex.Message + "\n----\n" + ex.StackTrace);
                return(new Post[0]);
            }
        }