Beispiel #1
0
        /// <summary>
        /// 處理列表
        /// </summary>
        /// <param name="forumUrl"></param>
        /// <param name="thisPage"></param>
        public static void ProcessForum(string forumUrl, int thisPage)
        {
            var thisUrl = forumUrl;

            if (thisPage != 1)
            {
                thisUrl = forumUrl + "&p=" + thisPage;
            }
            //Get Content
            Utility.Logging("Processing Forum = " + thisUrl, Utility.DebugLevel.Info);

            var source = WebTool.GetHtmlAsyncUtf8(thisUrl);

            //Get Page
            var page      = WebTool.GetContent("<p class=\"numbers\">", "</p>", source);
            var pageTotal = WebTool.GetContent("共", "頁", page);

            source = WebTool.GetContent("<table summary=\"文章列表\">", "</table>", source);
            source = WebTool.GetContent("<tbody>", "</tbody>", source).Replace("\n", string.Empty); //topic list

            var arTopic = source.Split(new[] { "<tr>" }, StringSplitOptions.RemoveEmptyEntries);

            arTopic = arTopic.Where(x => x.Trim().Length > 10).ToArray();
            foreach (var s in arTopic)
            {
                var reply    = WebTool.GetContent("<td width=\"7%\" class=\"reply\">", "</td>", s).Replace(",", string.Empty);
                var postDate = Convert.ToDateTime(WebTool.StripTagsCharArray(WebTool.GetContent("<td width=\"17%\" class=\"authur\">", "</p>", s)));
                //if (postDate)
                if (int.Parse(reply) >= 500)
                {
                    continue;
                }
                var url        = WebTool.GetContent("<td class=\"subject\">", "</a>", s);
                var topicUrl   = WebTool.GetContent("<a href=\"", "\"", url);
                var topicTitle = WebTool.StripTagsCharArray((url.Split(new[] { "</td>" }, StringSplitOptions.RemoveEmptyEntries)[0])).Trim();
                QueuePage(topicUrl, topicTitle);
            }

            //go to next page
            if (thisPage < int.Parse(pageTotal))
            {
                Thread.Sleep(ChangePage);
                ProcessForum(forumUrl, thisPage + 1);
            }
        }
Beispiel #2
0
        /// <summary>
        /// 處理文章
        /// </summary>
        /// <param name="url"></param>
        /// <param name="thisPage"></param>
        /// <param name="aid"></param>
        /// <param name="aowner"></param>
        public static bool ProcessUrl(string url, int thisPage, string aid, string aowner)
        {
            var pageTotal = string.Empty;
            var thisUrl   = url;

            if (thisPage != 1)
            {
                thisUrl = url + "&p=" + thisPage;
            }

            Utility.Logging("Processing Url = " + thisUrl, Utility.DebugLevel.Info);

            try
            {
                var forum = thisUrl.Split(new[] { "f=" }, StringSplitOptions.RemoveEmptyEntries)[1].Split('&')[0];

                //Get Content
                var source = WebTool.GetHtmlAsyncUtf8(thisUrl);

                //if (!string.IsNullOrEmpty(source))
                //{
                //if (source.IndexOf("action=\"error.php\"", System.StringComparison.Ordinal) == -1) return;
                //Console.WriteLine("Processing Url start");

                source = WebTool.GetContent("<div class=\"forum-content\">", "<div class=\"sidebar\">", source);

                //Get Title
                var title = WebTool.GetContent("<h2 class=\"topic\">", "</h2>", source);
                //Get Page
                var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source);
                //var pageNow = WebTool.GetContent("<span>", "</span>", page);
                pageTotal = WebTool.GetContent("共", "頁", page);

                //process post -begin-
                source = WebTool.GetContent("<div class=\"single-post\">", "<div class=\"pagination\">", source);
                var ar = source.Split(new[] { "<div class=\"single-post\">" }, StringSplitOptions.RemoveEmptyEntries);

                //Get all
                foreach (var s in ar)
                {
                    ProcessPost(s, ref aid, ref aowner, forum, title);
                }
                //}


                //process post -end-
                //ThriftTool.TransportClose(ref _transport);

                if (thisPage >= int.Parse(pageTotal))
                {
                    return(true);
                }
                Thread.Sleep(ChangePage);
                return(ProcessUrl(url, thisPage + 1, aid, aowner));
            }
            catch (Exception ex)
            {
                Utility.Logging("ProcessUrl Error:" + ex.Message);
                return(false);
            }
        }