/// <summary> /// 處理列表 /// </summary> /// <param name="forumUrl"></param> /// <param name="thisPage"></param> public static void ProcessForum(string forumUrl, int thisPage) { var thisUrl = forumUrl; if (thisPage != 1) { thisUrl = forumUrl + "&p=" + thisPage; } //Get Content Utility.Logging("Processing Forum = " + thisUrl, Utility.DebugLevel.Info); var source = WebTool.GetHtmlAsyncUtf8(thisUrl); //Get Page var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source); var pageTotal = WebTool.GetContent("共", "頁", page); source = WebTool.GetContent("<table summary=\"文章列表\">", "</table>", source); source = WebTool.GetContent("<tbody>", "</tbody>", source).Replace("\n", string.Empty); //topic list var arTopic = source.Split(new[] { "<tr>" }, StringSplitOptions.RemoveEmptyEntries); arTopic = arTopic.Where(x => x.Trim().Length > 10).ToArray(); foreach (var s in arTopic) { var reply = WebTool.GetContent("<td width=\"7%\" class=\"reply\">", "</td>", s).Replace(",", string.Empty); var postDate = Convert.ToDateTime(WebTool.StripTagsCharArray(WebTool.GetContent("<td width=\"17%\" class=\"authur\">", "</p>", s))); //if (postDate) if (int.Parse(reply) >= 500) { continue; } var url = WebTool.GetContent("<td class=\"subject\">", "</a>", s); var topicUrl = WebTool.GetContent("<a href=\"", "\"", url); var topicTitle = WebTool.StripTagsCharArray((url.Split(new[] { "</td>" }, StringSplitOptions.RemoveEmptyEntries)[0])).Trim(); QueuePage(topicUrl, topicTitle); } //go to next page if (thisPage < int.Parse(pageTotal)) { Thread.Sleep(ChangePage); ProcessForum(forumUrl, thisPage + 1); } }
/// <summary> /// 處理文章 /// </summary> /// <param name="url"></param> /// <param name="thisPage"></param> /// <param name="aid"></param> /// <param name="aowner"></param> public static bool ProcessUrl(string url, int thisPage, string aid, string aowner) { var pageTotal = string.Empty; var thisUrl = url; if (thisPage != 1) { thisUrl = url + "&p=" + thisPage; } Utility.Logging("Processing Url = " + thisUrl, Utility.DebugLevel.Info); try { var forum = thisUrl.Split(new[] { "f=" }, StringSplitOptions.RemoveEmptyEntries)[1].Split('&')[0]; //Get Content var source = WebTool.GetHtmlAsyncUtf8(thisUrl); //if (!string.IsNullOrEmpty(source)) //{ //if (source.IndexOf("action=\"error.php\"", System.StringComparison.Ordinal) == -1) return; //Console.WriteLine("Processing Url start"); source = WebTool.GetContent("<div class=\"forum-content\">", "<div class=\"sidebar\">", source); //Get Title var title = WebTool.GetContent("<h2 class=\"topic\">", "</h2>", source); //Get Page var page = WebTool.GetContent("<p class=\"numbers\">", "</p>", source); //var pageNow = WebTool.GetContent("<span>", "</span>", page); pageTotal = WebTool.GetContent("共", "頁", page); //process post -begin- source = WebTool.GetContent("<div class=\"single-post\">", "<div class=\"pagination\">", source); var ar = source.Split(new[] { "<div class=\"single-post\">" }, StringSplitOptions.RemoveEmptyEntries); //Get all foreach (var s in ar) { ProcessPost(s, ref aid, ref aowner, forum, title); } //} //process post -end- //ThriftTool.TransportClose(ref _transport); if (thisPage >= int.Parse(pageTotal)) { return(true); } Thread.Sleep(ChangePage); return(ProcessUrl(url, thisPage + 1, aid, aowner)); } catch (Exception ex) { Utility.Logging("ProcessUrl Error:" + ex.Message); return(false); } }