Example #1
0
 public BoardEndPoint(ForumSettings settings, IHttpClient httpClient)
     : base("board", settings.RootURL, httpClient)
 {
     _theme     = settings.Theme;
     _parser    = new BoardParser();
     _urlParser = settings.UrlParser;
 }
Example #2
0
        private List <Board> GetCategoryBoards(HtmlNode categoryBody, ISmfTheme theme)
        {
            var boardCollection = new List <Board>();

            var boardNodes = categoryBody.SelectNodes(theme.ForumBoard);

            for (int i = 0; i < boardNodes.Count; i++)
            {
                var board = boardNodes[i];

                int    boardID          = Convert.ToInt32(board.Id.Substring(6));
                string boardName        = board.SelectSingleNode(theme.ForumBoardName).InnerText.Trim();
                string boardDescription = board.SelectSingleNode(theme.ForumBoardDescription).InnerText.Trim();

                if (HasChildrenBoards(boardNodes, i))
                {
                    boardCollection.Add(new Board(boardID, boardName, boardDescription, GetBoardChieldBoards(boardNodes[i + 1], theme)));
                    i++;//prevent crawling children board and step to the next board
                }
                else
                {
                    boardCollection.Add(new Board(boardID, boardName, boardDescription));
                }
            }

            return(boardCollection);
        }
Example #3
0
        private List <Board> ParseChildrenBoards(HtmlNode boardMainFrame, ISmfTheme theme)
        {
            var boards = new List <Board>();

            var boardNodes = boardMainFrame.SelectNodes(theme.BoardChildrenBoards);

            if (boardNodes == null)
            {
                return(boards);
            }

            foreach (var board in boardNodes)
            {
                var boardNode = board.SelectSingleNode(theme.BoardChildrenBoardName);

                string boardID = boardNode.Attributes["name"].Value;
                int    id      = int.Parse(boardID.Substring(1));

                string name = boardNode.InnerText;

                boards.Add(new Board(id, name));
            }

            return(boards);
        }
Example #4
0
        private List <Topic> ParseTopics(HtmlNode boardMainFrame, ISmfTheme theme, IUrlParser urlParser)
        {
            var topics = new List <Topic>();

            var topicNodes = boardMainFrame.SelectNodes(theme.BoardTopicModel);

            if (topicNodes == null)
            {
                return(topics);
            }

            foreach (var topicNode in topicNodes)
            {
                var urlNode = topicNode.SelectSingleNode(theme.BoardTopicLink);

                string topicURL = urlNode.Attributes["href"].Value;
                int    id       = urlParser.FromURL(topicURL, "topic");

                string name = urlNode.InnerText.Trim();

                topics.Add(new Topic(id, name));
            }

            return(topics);
        }
Example #5
0
 public TopicEndPoint(ForumSettings settings, IHttpClient httpClient)
     : base("topic", settings.RootURL, httpClient)
 {
     _theme     = settings.Theme;
     _parser    = new TopicParser();
     _urlParser = settings.UrlParser;
 }
Example #6
0
        private List <Board> GetBoardChieldBoards(HtmlNode parentBoard, ISmfTheme theme)
        {
            var boardCollection = new List <Board>();

            var chieldBoardNotes = parentBoard.SelectNodes(theme.ForumBoardChieldBoard);

            for (int i = 0; i < chieldBoardNotes.Count; i++)
            {
                var board = chieldBoardNotes[i];

                var boardURL = board.Attributes["href"].Value;
                var match    = Regex.Match(boardURL, @"(?<=board.)\d{1,2}");

                if (!match.Success)
                {
                    throw new Exception("Regex couldn't find the child boardID for this URL: " + boardURL);
                }

                int    boardID   = Convert.ToInt32(match.Value);
                string boardName = board.InnerText;

                boardCollection.Add(new Board(boardID, boardName));
            }

            return(boardCollection);
        }
Example #7
0
 public ForumSettings(ISmfTheme theme, Uri rootURL, IUrlParser urlType)
 {
     DateFormat        = "yyyy-MM-dd, HH:mm::ss";
     Culture           = new CultureInfo("hu-HU");
     Theme             = theme;
     RootURL           = rootURL;
     UrlParser         = urlType;
     CustomHomePageURL = "";
 }
Example #8
0
        public NonAuthenticated(Uri forumHomePageUrl, ISmfTheme webpageTheme, int maxPageToCrawl)
        {
            Tasks    = new List <Task <Webpage> >();
            ForumURL = forumHomePageUrl;

            Theme = webpageTheme;

            MaxPagesToCrawl = maxPageToCrawl;
        }
Example #9
0
        public void Execute(Board board, string sourceCode, ISmfTheme theme, IUrlParser urlParser)
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(sourceCode);

            var boardMainFrameNode = htmlDoc.DocumentNode.SelectSingleNode(theme.BoardContainer);

            // TODO : This is good, when the board has only one page, but what if not?
            board.ChildBoards = ParseChildrenBoards(boardMainFrameNode, theme);

            board.Topics = ParseTopics(boardMainFrameNode, theme, urlParser);
        }
Example #10
0
        private void ParseMessage(List <Message> messages, HtmlNode node, ISmfTheme theme)
        {
            var msgAnchor     = node.SelectSingleNode(theme.TopicMessageLink);
            var msgURL        = msgAnchor.Attributes["href"].Value;
            var rawPostedTime = node.SelectSingleNode(theme.TopicMessagePostedTime).InnerText.Replace("ยป", "").Trim();


            var msgID = Convert.ToInt32(Regex.Match(msgURL, @"(?<=#msg)\d+").Value);

            var msgSubject = msgAnchor.InnerText;

            var msgBody = new HtmlCleaner().Remove(node.SelectSingleNode(theme.TopicMessageBody).InnerHtml);

            var postedTime = DateTime.Parse(rawPostedTime).ToUniversalTime();

            // TODO : This is good, but what if this run multiple times on the same Topic?
            messages.Add(new Message(msgID, msgSubject, msgBody, postedTime));
        }
Example #11
0
        public void Execute(Topic topic, string sourceCode, ISmfTheme theme, IUrlParser urlParser)
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(sourceCode);

            var messageNodes = htmlDoc.DocumentNode.SelectNodes(theme.TopicMessageModel);

            if (messageNodes == null)
            {
                throw new NodeNotFoundException("Could not find any message in this Topic");
            }

            foreach (var messageNode in messageNodes)
            {
                ParseMessage(topic.Messages, messageNode, theme);
            }
        }
Example #12
0
        /// <summary>
        /// Gets a list of <see cref="Category"/> from the given source code.
        /// </summary>
        /// <exception cref="NodeNotFoundException"></exception>
        /// <exception cref="Exception"></exception>
        public List <Category> Execute(string sourceCode, ISmfTheme theme)
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(sourceCode);

            var forumCategories = htmlDoc.DocumentNode.SelectNodes(theme.ForumCategories);

            if (forumCategories.Count % 2 != 0)
            {
                throw new NodeNotFoundException("forumCategories.Count is not what is should be!");
            }

            var categorieCollection = new List <Category>();

            for (int i = 0; i < forumCategories.Count - 1; i += 2)
            {
                var categoryHeader = forumCategories[i];
                var categoryBody   = forumCategories[i + 1];

                int    categoryID    = Convert.ToInt32(categoryHeader.Id.Substring(9));
                int    categoryOrder = i / 2;
                string categoryName  = categoryHeader.InnerText.Trim();

                var boardCollection = GetCategoryBoards(categoryBody, theme);

                if (boardCollection.Count > 0)
                {
                    categorieCollection.Add(new Category(categoryID, categoryOrder, categoryName, boardCollection));
                }
                else
                {
                    categorieCollection.Add(new Category(categoryID, categoryOrder, categoryName));
                }
            }

            return(categorieCollection);
        }
Example #13
0
 public ForumSettings(ISmfTheme theme, Uri rootURL)
     : this(theme, rootURL, new EqualSignUrlParser())
 {
 }
Example #14
0
 public ForumEndPoint(ForumSettings settings, IHttpClient httpClient)
     : base(settings.CustomHomePageURL, settings.RootURL, httpClient)
 {
     _theme  = settings.Theme;
     _parser = new ForumParser();
 }
Example #15
0
 public Webpage(string sourceCode, ISmfTheme websiteTheme)
 {
     SourceCode = sourceCode;
     Theme      = websiteTheme;
 }
Example #16
0
 public NonAuthenticated(Uri forumHomePageUrl, ISmfTheme webpageTheme)
     : this(forumHomePageUrl, webpageTheme, int.MaxValue)
 {
 }
Example #17
0
 public Webpage(Uri webpageURL, ISmfTheme websiteTheme)
 {
     URL   = webpageURL;
     Theme = websiteTheme;
 }