public PttTheme parse(string boardName, string themeId) { PttTheme result = new PttTheme(); result.boardName = boardName; result.code = themeId; result = parse(result); return(result); }
private List <PttTheme> parse(string boardName, DateTime?fromDate, DateTime?toDate, int initPage, int maxPage) { List <PttTheme> result = new List <PttTheme>(); int ePage = 0; int sPage = 1; if (maxPage > 0) { ePage = maxPage; } else { ePage = getMaxPageInTheBoard(boardName); } if (initPage > 0) { sPage = initPage; } //最新的在上面 for (int i = ePage; i >= sPage; i--) { HtmlDocument newDoc = HtmlParser.Core.Utility.downLoadHtmlDoc(string.Format(PTT_BOARD_URL_FORMAT, boardName, i), Encoding.UTF8); var collection = newDoc.DocumentNode.SelectNodes("//*[@id=\"main-container\"]/div[contains(@class,'r-list-container')]/div[@class='r-ent']"); foreach (HtmlNode node in collection.AsEnumerable().Reverse()) { PttTheme theme = parseBoard(node); if (theme == null) { continue; } theme.boardName = boardName; if (isIncludeDateRegion(fromDate, toDate, theme.issueDate)) { result.Add(theme); } } } return(result); }
public PttTheme parse(PttTheme initTheme) { HtmlDocument doc = Utility.downLoadHtmlDoc(string.Format(PTT_THEME_URL_FORMAT, initTheme.boardName, initTheme.code), Encoding.UTF8); try { if (doc != null) { HtmlNode mainNode = doc.DocumentNode.SelectSingleNode("//*[@id=\"main-content\"]"); if (mainNode.SelectNodes("div[@class='article-metaline']") != null) { initTheme.author = mainNode.SelectSingleNode("div[@class='article-metaline'][1]/span[@class='article-meta-value']").InnerText; //author initTheme.title = mainNode.SelectSingleNode("div[@class='article-metaline'][2]/span[@class='article-meta-value']").InnerText; //title initTheme.issueDate = parseExact(mainNode.SelectSingleNode("div[@class='article-metaline'][3]/span[@class='article-meta-value']").InnerText); //time } //content\ var nos = mainNode.ChildNodes.Where(x => !checkNode(x)); initTheme.content = nos .Select(x => x.InnerText) .Aggregate((current, next) => current + System.Environment.NewLine + next); if (mainNode.SelectNodes("span[@class='f2'][a]") != null) { initTheme.url = mainNode.SelectSingleNode("span[@class='f2'][a]/a").InnerText; } //推文 foreach (HtmlNode pushNode in mainNode.SelectNodes("div[@class='push']").AsEnumerable()) { PttThemePush push = new PttThemePush(); push.author = pushNode.SelectSingleNode("span[contains(@class,'push-userid')]").InnerText; push.content = pushNode.SelectSingleNode("span[contains(@class,'push-content')]").InnerText; push.pushType = choosePushType(pushNode.SelectSingleNode("span[contains(@class,'push-tag')]").InnerText); push.pushDate = parseExact(pushNode.SelectSingleNode("span[contains(@class,'push-ipdatetime')]").InnerText); initTheme.pushContents.Add(push); } } } catch (NullReferenceException ex) { return(null); } return(initTheme); }
public PttTheme parseBoard(HtmlNode node) { PttTheme theme = new PttTheme(); foreach (HtmlNode cNode in node.SelectNodes("div[@class]").AsEnumerable()) { switch (cNode.Attributes["class"].Value) { case "nrec": theme.popularity = cNode.SelectSingleNode("span") == null ? string.Empty : cNode.SelectSingleNode("span").InnerText; break; case "mark": break; case "title": if (cNode.InnerText.Contains("本文已被刪除") || Regex.IsMatch(cNode.InnerText, "已被[0-9A-Za-z]+刪除")) { return(null); } try { theme.code = cNode.SelectSingleNode("a").Attributes["href"].Value.Split('/').Last().Replace(".html", string.Empty); theme.title = cNode.SelectSingleNode("a").InnerText; } catch (Exception ex) { throw ex; } break; case "meta": theme.issueDate = DateTime.ParseExact(cNode.SelectSingleNode("div[@class='date']").InnerText, DATETIME_LIST, CultureInfo.InvariantCulture, DateTimeStyles.AllowWhiteSpaces); theme.author = cNode.SelectSingleNode("div[@class='author']").InnerText; break; } } return(theme); }