public BoardEndPoint(ForumSettings settings, IHttpClient httpClient) : base("board", settings.RootURL, httpClient) { _theme = settings.Theme; _parser = new BoardParser(); _urlParser = settings.UrlParser; }
private List <Board> GetCategoryBoards(HtmlNode categoryBody, ISmfTheme theme) { var boardCollection = new List <Board>(); var boardNodes = categoryBody.SelectNodes(theme.ForumBoard); for (int i = 0; i < boardNodes.Count; i++) { var board = boardNodes[i]; int boardID = Convert.ToInt32(board.Id.Substring(6)); string boardName = board.SelectSingleNode(theme.ForumBoardName).InnerText.Trim(); string boardDescription = board.SelectSingleNode(theme.ForumBoardDescription).InnerText.Trim(); if (HasChildrenBoards(boardNodes, i)) { boardCollection.Add(new Board(boardID, boardName, boardDescription, GetBoardChieldBoards(boardNodes[i + 1], theme))); i++;//prevent crawling children board and step to the next board } else { boardCollection.Add(new Board(boardID, boardName, boardDescription)); } } return(boardCollection); }
private List <Board> ParseChildrenBoards(HtmlNode boardMainFrame, ISmfTheme theme) { var boards = new List <Board>(); var boardNodes = boardMainFrame.SelectNodes(theme.BoardChildrenBoards); if (boardNodes == null) { return(boards); } foreach (var board in boardNodes) { var boardNode = board.SelectSingleNode(theme.BoardChildrenBoardName); string boardID = boardNode.Attributes["name"].Value; int id = int.Parse(boardID.Substring(1)); string name = boardNode.InnerText; boards.Add(new Board(id, name)); } return(boards); }
private List <Topic> ParseTopics(HtmlNode boardMainFrame, ISmfTheme theme, IUrlParser urlParser) { var topics = new List <Topic>(); var topicNodes = boardMainFrame.SelectNodes(theme.BoardTopicModel); if (topicNodes == null) { return(topics); } foreach (var topicNode in topicNodes) { var urlNode = topicNode.SelectSingleNode(theme.BoardTopicLink); string topicURL = urlNode.Attributes["href"].Value; int id = urlParser.FromURL(topicURL, "topic"); string name = urlNode.InnerText.Trim(); topics.Add(new Topic(id, name)); } return(topics); }
public TopicEndPoint(ForumSettings settings, IHttpClient httpClient) : base("topic", settings.RootURL, httpClient) { _theme = settings.Theme; _parser = new TopicParser(); _urlParser = settings.UrlParser; }
private List <Board> GetBoardChieldBoards(HtmlNode parentBoard, ISmfTheme theme) { var boardCollection = new List <Board>(); var chieldBoardNotes = parentBoard.SelectNodes(theme.ForumBoardChieldBoard); for (int i = 0; i < chieldBoardNotes.Count; i++) { var board = chieldBoardNotes[i]; var boardURL = board.Attributes["href"].Value; var match = Regex.Match(boardURL, @"(?<=board.)\d{1,2}"); if (!match.Success) { throw new Exception("Regex couldn't find the child boardID for this URL: " + boardURL); } int boardID = Convert.ToInt32(match.Value); string boardName = board.InnerText; boardCollection.Add(new Board(boardID, boardName)); } return(boardCollection); }
public ForumSettings(ISmfTheme theme, Uri rootURL, IUrlParser urlType) { DateFormat = "yyyy-MM-dd, HH:mm::ss"; Culture = new CultureInfo("hu-HU"); Theme = theme; RootURL = rootURL; UrlParser = urlType; CustomHomePageURL = ""; }
public NonAuthenticated(Uri forumHomePageUrl, ISmfTheme webpageTheme, int maxPageToCrawl) { Tasks = new List <Task <Webpage> >(); ForumURL = forumHomePageUrl; Theme = webpageTheme; MaxPagesToCrawl = maxPageToCrawl; }
public void Execute(Board board, string sourceCode, ISmfTheme theme, IUrlParser urlParser) { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(sourceCode); var boardMainFrameNode = htmlDoc.DocumentNode.SelectSingleNode(theme.BoardContainer); // TODO : This is good, when the board has only one page, but what if not? board.ChildBoards = ParseChildrenBoards(boardMainFrameNode, theme); board.Topics = ParseTopics(boardMainFrameNode, theme, urlParser); }
private void ParseMessage(List <Message> messages, HtmlNode node, ISmfTheme theme) { var msgAnchor = node.SelectSingleNode(theme.TopicMessageLink); var msgURL = msgAnchor.Attributes["href"].Value; var rawPostedTime = node.SelectSingleNode(theme.TopicMessagePostedTime).InnerText.Replace("ยป", "").Trim(); var msgID = Convert.ToInt32(Regex.Match(msgURL, @"(?<=#msg)\d+").Value); var msgSubject = msgAnchor.InnerText; var msgBody = new HtmlCleaner().Remove(node.SelectSingleNode(theme.TopicMessageBody).InnerHtml); var postedTime = DateTime.Parse(rawPostedTime).ToUniversalTime(); // TODO : This is good, but what if this run multiple times on the same Topic? messages.Add(new Message(msgID, msgSubject, msgBody, postedTime)); }
public void Execute(Topic topic, string sourceCode, ISmfTheme theme, IUrlParser urlParser) { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(sourceCode); var messageNodes = htmlDoc.DocumentNode.SelectNodes(theme.TopicMessageModel); if (messageNodes == null) { throw new NodeNotFoundException("Could not find any message in this Topic"); } foreach (var messageNode in messageNodes) { ParseMessage(topic.Messages, messageNode, theme); } }
/// <summary> /// Gets a list of <see cref="Category"/> from the given source code. /// </summary> /// <exception cref="NodeNotFoundException"></exception> /// <exception cref="Exception"></exception> public List <Category> Execute(string sourceCode, ISmfTheme theme) { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(sourceCode); var forumCategories = htmlDoc.DocumentNode.SelectNodes(theme.ForumCategories); if (forumCategories.Count % 2 != 0) { throw new NodeNotFoundException("forumCategories.Count is not what is should be!"); } var categorieCollection = new List <Category>(); for (int i = 0; i < forumCategories.Count - 1; i += 2) { var categoryHeader = forumCategories[i]; var categoryBody = forumCategories[i + 1]; int categoryID = Convert.ToInt32(categoryHeader.Id.Substring(9)); int categoryOrder = i / 2; string categoryName = categoryHeader.InnerText.Trim(); var boardCollection = GetCategoryBoards(categoryBody, theme); if (boardCollection.Count > 0) { categorieCollection.Add(new Category(categoryID, categoryOrder, categoryName, boardCollection)); } else { categorieCollection.Add(new Category(categoryID, categoryOrder, categoryName)); } } return(categorieCollection); }
public ForumSettings(ISmfTheme theme, Uri rootURL) : this(theme, rootURL, new EqualSignUrlParser()) { }
public ForumEndPoint(ForumSettings settings, IHttpClient httpClient) : base(settings.CustomHomePageURL, settings.RootURL, httpClient) { _theme = settings.Theme; _parser = new ForumParser(); }
public Webpage(string sourceCode, ISmfTheme websiteTheme) { SourceCode = sourceCode; Theme = websiteTheme; }
public NonAuthenticated(Uri forumHomePageUrl, ISmfTheme webpageTheme) : this(forumHomePageUrl, webpageTheme, int.MaxValue) { }
public Webpage(Uri webpageURL, ISmfTheme websiteTheme) { URL = webpageURL; Theme = websiteTheme; }