Exemplo n.º 1
0
        public PttTheme parse(string boardName, string themeId)
        {
            PttTheme result = new PttTheme();

            result.boardName = boardName;
            result.code      = themeId;
            result           = parse(result);
            return(result);
        }
Exemplo n.º 2
0
        private List <PttTheme> parse(string boardName, DateTime?fromDate, DateTime?toDate, int initPage, int maxPage)
        {
            List <PttTheme> result = new List <PttTheme>();
            int             ePage  = 0;
            int             sPage  = 1;


            if (maxPage > 0)
            {
                ePage = maxPage;
            }
            else
            {
                ePage = getMaxPageInTheBoard(boardName);
            }

            if (initPage > 0)
            {
                sPage = initPage;
            }

            //最新的在上面
            for (int i = ePage; i >= sPage; i--)
            {
                HtmlDocument newDoc = HtmlParser.Core.Utility.downLoadHtmlDoc(string.Format(PTT_BOARD_URL_FORMAT, boardName, i), Encoding.UTF8);

                var collection = newDoc.DocumentNode.SelectNodes("//*[@id=\"main-container\"]/div[contains(@class,'r-list-container')]/div[@class='r-ent']");

                foreach (HtmlNode node in collection.AsEnumerable().Reverse())
                {
                    PttTheme theme = parseBoard(node);

                    if (theme == null)
                    {
                        continue;
                    }

                    theme.boardName = boardName;

                    if (isIncludeDateRegion(fromDate, toDate, theme.issueDate))
                    {
                        result.Add(theme);
                    }
                }
            }

            return(result);
        }
Exemplo n.º 3
0
        public PttTheme parse(PttTheme initTheme)
        {
            HtmlDocument doc = Utility.downLoadHtmlDoc(string.Format(PTT_THEME_URL_FORMAT, initTheme.boardName, initTheme.code), Encoding.UTF8);

            try
            {
                if (doc != null)
                {
                    HtmlNode mainNode = doc.DocumentNode.SelectSingleNode("//*[@id=\"main-content\"]");
                    if (mainNode.SelectNodes("div[@class='article-metaline']") != null)
                    {
                        initTheme.author    = mainNode.SelectSingleNode("div[@class='article-metaline'][1]/span[@class='article-meta-value']").InnerText;             //author
                        initTheme.title     = mainNode.SelectSingleNode("div[@class='article-metaline'][2]/span[@class='article-meta-value']").InnerText;             //title
                        initTheme.issueDate = parseExact(mainNode.SelectSingleNode("div[@class='article-metaline'][3]/span[@class='article-meta-value']").InnerText); //time
                    }

                    //content\
                    var nos = mainNode.ChildNodes.Where(x => !checkNode(x));

                    initTheme.content = nos
                                        .Select(x => x.InnerText)
                                        .Aggregate((current, next) => current + System.Environment.NewLine + next);

                    if (mainNode.SelectNodes("span[@class='f2'][a]") != null)
                    {
                        initTheme.url = mainNode.SelectSingleNode("span[@class='f2'][a]/a").InnerText;
                    }

                    //推文
                    foreach (HtmlNode pushNode in mainNode.SelectNodes("div[@class='push']").AsEnumerable())
                    {
                        PttThemePush push = new PttThemePush();
                        push.author   = pushNode.SelectSingleNode("span[contains(@class,'push-userid')]").InnerText;
                        push.content  = pushNode.SelectSingleNode("span[contains(@class,'push-content')]").InnerText;
                        push.pushType = choosePushType(pushNode.SelectSingleNode("span[contains(@class,'push-tag')]").InnerText);
                        push.pushDate = parseExact(pushNode.SelectSingleNode("span[contains(@class,'push-ipdatetime')]").InnerText);
                        initTheme.pushContents.Add(push);
                    }
                }
            }
            catch (NullReferenceException ex)
            {
                return(null);
            }
            return(initTheme);
        }
Exemplo n.º 4
0
        public PttTheme parseBoard(HtmlNode node)
        {
            PttTheme theme = new PttTheme();

            foreach (HtmlNode cNode in node.SelectNodes("div[@class]").AsEnumerable())
            {
                switch (cNode.Attributes["class"].Value)
                {
                case "nrec":
                    theme.popularity = cNode.SelectSingleNode("span") == null ? string.Empty : cNode.SelectSingleNode("span").InnerText;
                    break;

                case "mark":
                    break;

                case "title":
                    if (cNode.InnerText.Contains("本文已被刪除") || Regex.IsMatch(cNode.InnerText, "已被[0-9A-Za-z]+刪除"))
                    {
                        return(null);
                    }
                    try
                    {
                        theme.code  = cNode.SelectSingleNode("a").Attributes["href"].Value.Split('/').Last().Replace(".html", string.Empty);
                        theme.title = cNode.SelectSingleNode("a").InnerText;
                    }
                    catch (Exception ex)
                    {
                        throw ex;
                    }
                    break;

                case "meta":
                    theme.issueDate = DateTime.ParseExact(cNode.SelectSingleNode("div[@class='date']").InnerText,
                                                          DATETIME_LIST, CultureInfo.InvariantCulture, DateTimeStyles.AllowWhiteSpaces);

                    theme.author = cNode.SelectSingleNode("div[@class='author']").InnerText;
                    break;
                }
            }
            return(theme);
        }